summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--CONTRIBUTING20
-rw-r--r--COPYING2
-rw-r--r--README173
-rw-r--r--README.md446
-rw-r--r--deps/Makefile2
-rw-r--r--deps/README.md88
-rw-r--r--deps/hiredis/.gitignore1
-rw-r--r--deps/hiredis/.travis.yml35
-rw-r--r--deps/hiredis/CHANGELOG.md125
-rw-r--r--deps/hiredis/Makefile107
-rw-r--r--deps/hiredis/README.md203
-rw-r--r--deps/hiredis/adapters/glib.h153
-rw-r--r--deps/hiredis/adapters/ivykis.h81
-rw-r--r--deps/hiredis/adapters/libevent.h24
-rw-r--r--deps/hiredis/adapters/libuv.h7
-rw-r--r--deps/hiredis/adapters/macosx.h114
-rw-r--r--deps/hiredis/adapters/qt.h135
-rw-r--r--deps/hiredis/appveyor.yml36
-rw-r--r--deps/hiredis/async.c52
-rw-r--r--deps/hiredis/async.h3
-rw-r--r--deps/hiredis/dict.c4
-rw-r--r--deps/hiredis/examples/example-glib.c73
-rw-r--r--deps/hiredis/examples/example-ivykis.c58
-rw-r--r--deps/hiredis/examples/example-macosx.c66
-rw-r--r--deps/hiredis/examples/example-qt.cpp46
-rw-r--r--deps/hiredis/examples/example-qt.h32
-rw-r--r--deps/hiredis/examples/example.c2
-rw-r--r--deps/hiredis/fmacros.h15
-rw-r--r--deps/hiredis/hiredis.c719
-rw-r--r--deps/hiredis/hiredis.h153
-rw-r--r--deps/hiredis/net.c132
-rw-r--r--deps/hiredis/net.h8
-rw-r--r--deps/hiredis/read.c525
-rw-r--r--deps/hiredis/read.h111
-rw-r--r--deps/hiredis/sds.c443
-rw-r--r--deps/hiredis/sds.h194
-rw-r--r--deps/hiredis/sdsalloc.h42
-rw-r--r--deps/hiredis/test.c120
-rw-r--r--deps/hiredis/win32.h42
-rw-r--r--deps/hiredis/zmalloc.h13
-rw-r--r--deps/jemalloc/.appveyor.yml42
-rw-r--r--deps/jemalloc/.autom4te.cfg3
-rw-r--r--deps/jemalloc/.gitattributes1
-rw-r--r--deps/jemalloc/.gitignore53
-rw-r--r--deps/jemalloc/.travis.yml156
-rw-r--r--deps/jemalloc/COPYING4
-rw-r--r--deps/jemalloc/ChangeLog844
-rw-r--r--deps/jemalloc/INSTALL306
-rw-r--r--deps/jemalloc/INSTALL.md423
-rw-r--r--deps/jemalloc/Makefile.in383
-rw-r--r--deps/jemalloc/README14
-rw-r--r--deps/jemalloc/TUNING.md129
-rw-r--r--deps/jemalloc/VERSION2
-rw-r--r--deps/jemalloc/bin/jemalloc-config.in83
-rw-r--r--[-rwxr-xr-x]deps/jemalloc/bin/jeprof.in (renamed from deps/jemalloc/bin/pprof)662
-rwxr-xr-xdeps/jemalloc/build-aux/config.guess (renamed from deps/jemalloc/config.guess)354
-rwxr-xr-xdeps/jemalloc/build-aux/config.sub (renamed from deps/jemalloc/config.sub)84
-rwxr-xr-xdeps/jemalloc/build-aux/install-sh (renamed from deps/jemalloc/install-sh)0
-rwxr-xr-xdeps/jemalloc/configure6614
-rw-r--r--deps/jemalloc/configure.ac1737
-rwxr-xr-xdeps/jemalloc/coverage.sh16
-rw-r--r--deps/jemalloc/doc/html.xsl.in1
-rw-r--r--deps/jemalloc/doc/jemalloc.31630
-rw-r--r--deps/jemalloc/doc/jemalloc.html1508
-rw-r--r--deps/jemalloc/doc/jemalloc.xml.in2472
-rw-r--r--deps/jemalloc/doc/stylesheet.xsl7
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena.h1063
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_externs.h94
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h57
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h354
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_stats.h237
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_structs_a.h11
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_structs_b.h229
-rw-r--r--deps/jemalloc/include/jemalloc/internal/arena_types.h43
-rw-r--r--deps/jemalloc/include/jemalloc/internal/assert.h56
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic.h353
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_c11.h97
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h127
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h191
-rw-r--r--deps/jemalloc/include/jemalloc/internal/atomic_msvc.h158
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_externs.h33
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h57
-rw-r--r--deps/jemalloc/include/jemalloc/internal/background_thread_structs.h53
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base_externs.h22
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base_inlines.h13
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base_structs.h59
-rw-r--r--deps/jemalloc/include/jemalloc/internal/base_types.h33
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bin.h106
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bin_stats.h51
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bit_util.h165
-rw-r--r--deps/jemalloc/include/jemalloc/internal/bitmap.h349
-rw-r--r--deps/jemalloc/include/jemalloc/internal/cache_bin.h114
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk.h63
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_dss.h38
-rw-r--r--deps/jemalloc/include/jemalloc/internal/chunk_mmap.h22
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ckh.h115
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ctl.h182
-rw-r--r--deps/jemalloc/include/jemalloc/internal/div.h41
-rw-r--r--deps/jemalloc/include/jemalloc/internal/emitter.h435
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent.h46
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_dss.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_externs.h73
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_inlines.h433
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_mmap.h10
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_structs.h219
-rw-r--r--deps/jemalloc/include/jemalloc/internal/extent_types.h17
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hash.h167
-rw-r--r--deps/jemalloc/include/jemalloc/internal/hooks.h19
-rw-r--r--deps/jemalloc/include/jemalloc/internal/huge.h46
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in1028
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h91
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in263
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h53
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h94
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h172
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h86
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h246
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h72
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h185
-rw-r--r--deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in194
-rw-r--r--deps/jemalloc/include/jemalloc/internal/large_externs.h26
-rw-r--r--deps/jemalloc/include/jemalloc/internal/log.h115
-rw-r--r--deps/jemalloc/include/jemalloc/internal/malloc_io.h102
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mb.h115
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex.h273
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex_pool.h94
-rw-r--r--deps/jemalloc/include/jemalloc/internal/mutex_prof.h99
-rw-r--r--deps/jemalloc/include/jemalloc/internal/nstime.h34
-rw-r--r--deps/jemalloc/include/jemalloc/internal/pages.h88
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ph.h391
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/private_namespace.sh4
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/private_symbols.sh51
-rw-r--r--deps/jemalloc/include/jemalloc/internal/private_symbols.txt413
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/private_unnamespace.sh5
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prng.h199
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof.h613
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_externs.h92
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h83
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h206
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_structs.h201
-rw-r--r--deps/jemalloc/include/jemalloc/internal/prof_types.h56
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/public_namespace.sh2
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ql.h47
-rw-r--r--deps/jemalloc/include/jemalloc/internal/qr.h41
-rw-r--r--deps/jemalloc/include/jemalloc/internal/quarantine.h67
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rb.h345
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree.h602
-rw-r--r--deps/jemalloc/include/jemalloc/internal/rtree_tsd.h50
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/size_classes.sh407
-rw-r--r--deps/jemalloc/include/jemalloc/internal/smoothstep.h232
-rwxr-xr-xdeps/jemalloc/include/jemalloc/internal/smoothstep.sh101
-rw-r--r--deps/jemalloc/include/jemalloc/internal/spin.h40
-rw-r--r--deps/jemalloc/include/jemalloc/internal/stats.h193
-rw-r--r--deps/jemalloc/include/jemalloc/internal/sz.h317
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache.h443
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_externs.h55
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_inlines.h223
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_structs.h61
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tcache_types.h56
-rw-r--r--deps/jemalloc/include/jemalloc/internal/ticker.h78
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd.h710
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_generic.h157
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h60
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_tls.h59
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_types.h10
-rw-r--r--deps/jemalloc/include/jemalloc/internal/tsd_win.h139
-rw-r--r--deps/jemalloc/include/jemalloc/internal/util.h167
-rw-r--r--deps/jemalloc/include/jemalloc/internal/witness.h346
-rwxr-xr-xdeps/jemalloc/include/jemalloc/jemalloc.sh7
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_defs.h.in25
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_macros.h.in153
-rwxr-xr-xdeps/jemalloc/include/jemalloc/jemalloc_mangle.sh2
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_protos.h.in86
-rw-r--r--deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in77
-rw-r--r--deps/jemalloc/include/msvc_compat/C99/stdbool.h (renamed from deps/jemalloc/include/msvc_compat/stdbool.h)4
-rw-r--r--deps/jemalloc/include/msvc_compat/C99/stdint.h (renamed from deps/jemalloc/include/msvc_compat/stdint.h)0
-rw-r--r--deps/jemalloc/include/msvc_compat/inttypes.h313
-rw-r--r--deps/jemalloc/include/msvc_compat/strings.h55
-rw-r--r--deps/jemalloc/include/msvc_compat/windows_extra.h6
-rw-r--r--deps/jemalloc/jemalloc.pc.in12
-rw-r--r--deps/jemalloc/m4/ax_cxx_compile_stdcxx.m4562
-rw-r--r--deps/jemalloc/msvc/ReadMe.txt23
-rw-r--r--deps/jemalloc/msvc/jemalloc_vc2015.sln63
-rw-r--r--deps/jemalloc/msvc/jemalloc_vc2017.sln63
-rw-r--r--deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj348
-rw-r--r--deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters101
-rw-r--r--deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj327
-rw-r--r--deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters26
-rw-r--r--deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj347
-rw-r--r--deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters101
-rw-r--r--deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj326
-rw-r--r--deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters26
-rw-r--r--deps/jemalloc/msvc/test_threads/test_threads.cpp88
-rw-r--r--deps/jemalloc/msvc/test_threads/test_threads.h3
-rw-r--r--deps/jemalloc/msvc/test_threads/test_threads_main.cpp11
-rwxr-xr-xdeps/jemalloc/run_tests.sh1
-rwxr-xr-xdeps/jemalloc/scripts/gen_run_tests.py112
-rwxr-xr-xdeps/jemalloc/scripts/gen_travis.py107
-rw-r--r--deps/jemalloc/src/arena.c3742
-rw-r--r--deps/jemalloc/src/atomic.c2
-rw-r--r--deps/jemalloc/src/background_thread.c909
-rw-r--r--deps/jemalloc/src/base.c556
-rw-r--r--deps/jemalloc/src/bin.c50
-rw-r--r--deps/jemalloc/src/bitmap.c113
-rw-r--r--deps/jemalloc/src/chunk.c395
-rw-r--r--deps/jemalloc/src/chunk_dss.c198
-rw-r--r--deps/jemalloc/src/chunk_mmap.c210
-rw-r--r--deps/jemalloc/src/ckh.c254
-rw-r--r--deps/jemalloc/src/ctl.c2893
-rw-r--r--deps/jemalloc/src/div.c55
-rw-r--r--deps/jemalloc/src/extent.c2190
-rw-r--r--deps/jemalloc/src/extent_dss.c270
-rw-r--r--deps/jemalloc/src/extent_mmap.c42
-rw-r--r--deps/jemalloc/src/hash.c5
-rw-r--r--deps/jemalloc/src/hooks.c12
-rw-r--r--deps/jemalloc/src/huge.c347
-rw-r--r--deps/jemalloc/src/jemalloc.c3694
-rw-r--r--deps/jemalloc/src/jemalloc_cpp.cpp141
-rw-r--r--deps/jemalloc/src/large.c371
-rw-r--r--deps/jemalloc/src/log.c78
-rw-r--r--deps/jemalloc/src/malloc_io.c (renamed from deps/jemalloc/src/util.c)250
-rw-r--r--deps/jemalloc/src/mb.c2
-rw-r--r--deps/jemalloc/src/mutex.c207
-rw-r--r--deps/jemalloc/src/mutex_pool.c18
-rw-r--r--deps/jemalloc/src/nstime.c170
-rw-r--r--deps/jemalloc/src/pages.c606
-rw-r--r--deps/jemalloc/src/prng.c3
-rw-r--r--deps/jemalloc/src/prof.c2419
-rw-r--r--deps/jemalloc/src/quarantine.c199
-rw-r--r--deps/jemalloc/src/rtree.c383
-rw-r--r--deps/jemalloc/src/stats.c1645
-rw-r--r--deps/jemalloc/src/sz.c107
-rw-r--r--deps/jemalloc/src/tcache.c790
-rw-r--r--deps/jemalloc/src/ticker.c3
-rw-r--r--deps/jemalloc/src/tsd.c292
-rw-r--r--deps/jemalloc/src/witness.c100
-rw-r--r--deps/jemalloc/src/zone.c471
-rw-r--r--deps/jemalloc/test/include/test/SFMT-alti.h12
-rw-r--r--deps/jemalloc/test/include/test/SFMT-sse2.h12
-rw-r--r--deps/jemalloc/test/include/test/SFMT.h49
-rw-r--r--deps/jemalloc/test/include/test/btalloc.h30
-rw-r--r--deps/jemalloc/test/include/test/extent_hooks.h289
-rw-r--r--deps/jemalloc/test/include/test/jemalloc_test.h.in124
-rw-r--r--deps/jemalloc/test/include/test/jemalloc_test_defs.h.in6
-rw-r--r--deps/jemalloc/test/include/test/math.h97
-rw-r--r--deps/jemalloc/test/include/test/mq.h59
-rw-r--r--deps/jemalloc/test/include/test/mtx.h2
-rw-r--r--deps/jemalloc/test/include/test/test.h449
-rw-r--r--deps/jemalloc/test/include/test/thd.h2
-rw-r--r--deps/jemalloc/test/include/test/timer.h11
-rw-r--r--deps/jemalloc/test/integration/MALLOCX_ARENA.c42
-rw-r--r--deps/jemalloc/test/integration/aligned_alloc.c42
-rw-r--r--deps/jemalloc/test/integration/allocated.c51
-rw-r--r--deps/jemalloc/test/integration/allocm.c107
-rw-r--r--deps/jemalloc/test/integration/cpp/basic.cpp25
-rw-r--r--deps/jemalloc/test/integration/extent.c248
-rw-r--r--deps/jemalloc/test/integration/extent.sh5
-rw-r--r--deps/jemalloc/test/integration/mallocx.c211
-rw-r--r--deps/jemalloc/test/integration/mallocx.sh5
-rw-r--r--deps/jemalloc/test/integration/mremap.c45
-rw-r--r--deps/jemalloc/test/integration/overflow.c46
-rw-r--r--deps/jemalloc/test/integration/posix_memalign.c42
-rw-r--r--deps/jemalloc/test/integration/rallocm.c111
-rw-r--r--deps/jemalloc/test/integration/rallocx.c133
-rw-r--r--deps/jemalloc/test/integration/sdallocx.c55
-rw-r--r--deps/jemalloc/test/integration/thread_arena.c49
-rw-r--r--deps/jemalloc/test/integration/thread_tcache_enabled.c82
-rw-r--r--deps/jemalloc/test/integration/xallocx.c347
-rw-r--r--deps/jemalloc/test/integration/xallocx.sh5
-rw-r--r--deps/jemalloc/test/src/SFMT.c98
-rw-r--r--deps/jemalloc/test/src/btalloc.c6
-rw-r--r--deps/jemalloc/test/src/btalloc_0.c3
-rw-r--r--deps/jemalloc/test/src/btalloc_1.c3
-rw-r--r--deps/jemalloc/test/src/math.c2
-rw-r--r--deps/jemalloc/test/src/mq.c27
-rw-r--r--deps/jemalloc/test/src/mtx.c42
-rw-r--r--deps/jemalloc/test/src/test.c181
-rw-r--r--deps/jemalloc/test/src/thd.c27
-rw-r--r--deps/jemalloc/test/src/timer.c56
-rw-r--r--deps/jemalloc/test/stress/microbench.c165
-rw-r--r--deps/jemalloc/test/test.sh.in31
-rw-r--r--deps/jemalloc/test/unit/SFMT.c2036
-rw-r--r--deps/jemalloc/test/unit/a0.c16
-rw-r--r--deps/jemalloc/test/unit/arena_reset.c344
-rw-r--r--deps/jemalloc/test/unit/arena_reset_prof.c4
-rw-r--r--deps/jemalloc/test/unit/arena_reset_prof.sh3
-rw-r--r--deps/jemalloc/test/unit/atomic.c229
-rw-r--r--deps/jemalloc/test/unit/background_thread.c119
-rw-r--r--deps/jemalloc/test/unit/background_thread_enable.c83
-rw-r--r--deps/jemalloc/test/unit/base.c234
-rw-r--r--deps/jemalloc/test/unit/bit_util.c57
-rw-r--r--deps/jemalloc/test/unit/bitmap.c502
-rw-r--r--deps/jemalloc/test/unit/ckh.c97
-rw-r--r--deps/jemalloc/test/unit/decay.c599
-rw-r--r--deps/jemalloc/test/unit/decay.sh3
-rw-r--r--deps/jemalloc/test/unit/div.c29
-rw-r--r--deps/jemalloc/test/unit/emitter.c413
-rw-r--r--deps/jemalloc/test/unit/extent_quantize.c141
-rw-r--r--deps/jemalloc/test/unit/fork.c141
-rw-r--r--deps/jemalloc/test/unit/hash.c78
-rw-r--r--deps/jemalloc/test/unit/hooks.c38
-rw-r--r--deps/jemalloc/test/unit/junk.c221
-rw-r--r--deps/jemalloc/test/unit/junk.sh5
-rw-r--r--deps/jemalloc/test/unit/junk_alloc.c1
-rw-r--r--deps/jemalloc/test/unit/junk_alloc.sh5
-rw-r--r--deps/jemalloc/test/unit/junk_free.c1
-rw-r--r--deps/jemalloc/test/unit/junk_free.sh5
-rw-r--r--deps/jemalloc/test/unit/log.c193
-rw-r--r--deps/jemalloc/test/unit/mallctl.c684
-rw-r--r--deps/jemalloc/test/unit/malloc_io.c (renamed from deps/jemalloc/test/unit/util.c)166
-rw-r--r--deps/jemalloc/test/unit/math.c56
-rw-r--r--deps/jemalloc/test/unit/mq.c35
-rw-r--r--deps/jemalloc/test/unit/mtx.c29
-rw-r--r--deps/jemalloc/test/unit/nstime.c249
-rw-r--r--deps/jemalloc/test/unit/pack.c166
-rw-r--r--deps/jemalloc/test/unit/pack.sh4
-rw-r--r--deps/jemalloc/test/unit/pages.c29
-rw-r--r--deps/jemalloc/test/unit/ph.c318
-rw-r--r--deps/jemalloc/test/unit/prng.c237
-rw-r--r--deps/jemalloc/test/unit/prof_accum.c45
-rw-r--r--deps/jemalloc/test/unit/prof_accum.h35
-rw-r--r--deps/jemalloc/test/unit/prof_accum.sh5
-rw-r--r--deps/jemalloc/test/unit/prof_accum_a.c3
-rw-r--r--deps/jemalloc/test/unit/prof_accum_b.c3
-rw-r--r--deps/jemalloc/test/unit/prof_active.c117
-rw-r--r--deps/jemalloc/test/unit/prof_active.sh5
-rw-r--r--deps/jemalloc/test/unit/prof_gdump.c58
-rw-r--r--deps/jemalloc/test/unit/prof_gdump.sh6
-rw-r--r--deps/jemalloc/test/unit/prof_idump.c27
-rw-r--r--deps/jemalloc/test/unit/prof_idump.sh8
-rw-r--r--deps/jemalloc/test/unit/prof_reset.c286
-rw-r--r--deps/jemalloc/test/unit/prof_reset.sh5
-rw-r--r--deps/jemalloc/test/unit/prof_tctx.c46
-rw-r--r--deps/jemalloc/test/unit/prof_tctx.sh5
-rw-r--r--deps/jemalloc/test/unit/prof_thread_name.c120
-rw-r--r--deps/jemalloc/test/unit/prof_thread_name.sh5
-rw-r--r--deps/jemalloc/test/unit/ql.c51
-rw-r--r--deps/jemalloc/test/unit/qr.c65
-rw-r--r--deps/jemalloc/test/unit/quarantine.c108
-rw-r--r--deps/jemalloc/test/unit/rb.c160
-rw-r--r--deps/jemalloc/test/unit/retained.c181
-rw-r--r--deps/jemalloc/test/unit/rtree.c283
-rw-r--r--deps/jemalloc/test/unit/size_classes.c183
-rw-r--r--deps/jemalloc/test/unit/slab.c32
-rw-r--r--deps/jemalloc/test/unit/smoothstep.c102
-rw-r--r--deps/jemalloc/test/unit/spin.c18
-rw-r--r--deps/jemalloc/test/unit/stats.c390
-rw-r--r--deps/jemalloc/test/unit/stats_print.c999
-rw-r--r--deps/jemalloc/test/unit/ticker.c73
-rw-r--r--deps/jemalloc/test/unit/tsd.c152
-rw-r--r--deps/jemalloc/test/unit/witness.c280
-rw-r--r--deps/jemalloc/test/unit/zero.c51
-rw-r--r--deps/jemalloc/test/unit/zero.sh5
-rw-r--r--deps/linenoise/README.markdown176
-rw-r--r--deps/linenoise/example.c10
-rw-r--r--deps/linenoise/linenoise.c130
-rw-r--r--deps/linenoise/linenoise.h15
-rw-r--r--deps/lua/src/ldo.c2
-rw-r--r--deps/lua/src/lua_cmsgpack.c126
-rw-r--r--deps/lua/src/lua_struct.c44
-rw-r--r--redis.conf469
-rwxr-xr-xruntest2
-rw-r--r--sentinel.conf43
-rw-r--r--src/Makefile96
-rw-r--r--src/Makefile.dep142
-rw-r--r--src/adlist.c53
-rw-r--r--src/adlist.h2
-rw-r--r--src/ae.c142
-rw-r--r--src/ae.h20
-rw-r--r--src/ae_epoll.c6
-rw-r--r--src/ae_select.c1
-rw-r--r--src/anet.c47
-rw-r--r--src/anet.h3
-rw-r--r--src/aof.c758
-rw-r--r--src/atomicvar.h133
-rw-r--r--src/bio.c88
-rw-r--r--src/bio.h9
-rw-r--r--src/bitops.c699
-rw-r--r--src/blocked.c506
-rw-r--r--src/childinfo.c85
-rw-r--r--src/cluster.c2399
-rw-r--r--src/cluster.h237
-rw-r--r--src/config.c1246
-rw-r--r--src/config.h35
-rw-r--r--src/crc16.c2
-rw-r--r--src/db.c930
-rw-r--r--src/debug.c653
-rw-r--r--src/debugmacro.h41
-rw-r--r--src/defrag.c1140
-rw-r--r--src/dict.c644
-rw-r--r--src/dict.h42
-rw-r--r--src/endianconv.h8
-rw-r--r--src/evict.c620
-rw-r--r--src/expire.c526
-rw-r--r--src/geo.c818
-rw-r--r--src/geo.h22
-rw-r--r--src/geohash.c295
-rw-r--r--src/geohash.h118
-rw-r--r--src/geohash_helper.c235
-rw-r--r--src/geohash_helper.h70
-rw-r--r--src/help.h318
-rw-r--r--src/hyperloglog.c426
-rw-r--r--src/intset.c88
-rw-r--r--src/intset.h3
-rw-r--r--src/latency.c35
-rw-r--r--src/latency.h4
-rw-r--r--src/lazyfree.c152
-rw-r--r--src/listpack.c783
-rw-r--r--src/listpack.h61
-rw-r--r--src/listpack_malloc.h45
-rw-r--r--src/localtime.c123
-rw-r--r--src/lzfP.h6
-rw-r--r--src/lzf_d.c23
-rw-r--r--src/memtest.c225
-rw-r--r--src/module.c4703
-rw-r--r--src/modules/.gitignore2
-rw-r--r--src/modules/Makefile52
-rw-r--r--src/modules/gendoc.rb51
-rw-r--r--src/modules/helloblock.c219
-rw-r--r--src/modules/hellocluster.c108
-rw-r--r--src/modules/hellotimer.c79
-rw-r--r--src/modules/hellotype.c286
-rw-r--r--src/modules/helloworld.c621
-rw-r--r--src/modules/testmodule.c445
-rw-r--r--src/multi.c89
-rw-r--r--src/networking.c1435
-rw-r--r--src/notify.c68
-rw-r--r--src/object.c1105
-rw-r--r--src/pubsub.c60
-rw-r--r--src/quicklist.c28
-rw-r--r--src/quicklist.h8
-rw-r--r--src/rax.c1810
-rw-r--r--src/rax.h184
-rw-r--r--src/rax_malloc.h44
-rw-r--r--src/rdb.c1727
-rw-r--r--src/rdb.h137
-rw-r--r--src/redis-benchmark.c59
-rw-r--r--src/redis-check-aof.c41
-rw-r--r--src/redis-check-dump.c771
-rw-r--r--src/redis-check-rdb.c363
-rw-r--r--src/redis-cli.c4996
-rwxr-xr-xsrc/redis-trib.rb1448
-rw-r--r--src/redis.h1583
-rw-r--r--src/redisassert.h8
-rw-r--r--src/redismodule.h457
-rw-r--r--src/replication.c1700
-rw-r--r--src/rio.c23
-rw-r--r--src/rio.h7
-rw-r--r--src/scripting.c1844
-rw-r--r--src/sds.c444
-rw-r--r--src/sds.h197
-rw-r--r--src/sdsalloc.h42
-rw-r--r--src/sentinel.c1514
-rw-r--r--src/server.c (renamed from src/redis.c)2554
-rw-r--r--src/server.h2136
-rw-r--r--src/setproctitle.c6
-rw-r--r--src/sha1.c14
-rw-r--r--src/sha1.h8
-rw-r--r--src/siphash.c360
-rw-r--r--src/slowlog.c53
-rw-r--r--src/slowlog.h8
-rw-r--r--src/sort.c159
-rw-r--r--src/sparkline.c2
-rw-r--r--src/stream.h113
-rw-r--r--src/syncio.c12
-rw-r--r--src/t_hash.c634
-rw-r--r--src/t_list.c575
-rw-r--r--src/t_set.c697
-rw-r--r--src/t_stream.c2369
-rw-r--r--src/t_string.c174
-rw-r--r--src/t_zset.c1667
-rw-r--r--src/util.c233
-rw-r--r--src/util.h10
-rw-r--r--src/version.h2
-rw-r--r--src/ziplist.c428
-rw-r--r--src/ziplist.h6
-rw-r--r--src/zmalloc.c146
-rw-r--r--src/zmalloc.h27
-rw-r--r--tests/assets/default.conf1
-rw-r--r--tests/cluster/cluster.tcl10
-rw-r--r--tests/cluster/run.tcl2
-rw-r--r--tests/cluster/tests/04-resharding.tcl84
-rw-r--r--tests/cluster/tests/05-slave-selection.tcl77
-rw-r--r--tests/cluster/tests/07-replica-migration.tcl56
-rw-r--r--tests/cluster/tests/10-manual-failover.tcl192
-rw-r--r--tests/cluster/tests/11-manual-takeover.tcl59
-rw-r--r--tests/cluster/tests/12-replica-migration-2.tcl64
-rw-r--r--tests/cluster/tests/13-no-failover-option.tcl61
-rw-r--r--tests/cluster/tests/includes/init-tests.tcl9
-rw-r--r--tests/helpers/bg_block_op.tcl52
-rw-r--r--tests/instances.tcl132
-rw-r--r--tests/integration/aof-race.tcl2
-rw-r--r--tests/integration/aof.tcl8
-rw-r--r--tests/integration/block-repl.tcl58
-rw-r--r--tests/integration/logging.tcl24
-rw-r--r--tests/integration/psync2-reg.tcl78
-rw-r--r--tests/integration/psync2.tcl249
-rw-r--r--tests/integration/rdb.tcl51
-rw-r--r--tests/integration/replication-2.tcl9
-rw-r--r--tests/integration/replication-3.tcl35
-rw-r--r--tests/integration/replication-4.tcl19
-rw-r--r--tests/integration/replication-psync.tcl67
-rw-r--r--tests/integration/replication.tcl105
-rw-r--r--tests/sentinel/run.tcl1
-rw-r--r--tests/sentinel/tests/05-manual.tcl3
-rw-r--r--tests/sentinel/tests/06-ckquorum.tcl34
-rw-r--r--tests/sentinel/tests/07-down-conditions.tcl78
-rw-r--r--tests/support/cluster.tcl6
-rw-r--r--tests/support/server.tcl19
-rw-r--r--tests/support/test.tcl26
-rw-r--r--tests/support/util.tcl84
-rw-r--r--tests/test_helper.tcl122
-rw-r--r--tests/unit/aofrw.tcl105
-rw-r--r--tests/unit/bitfield.tcl201
-rw-r--r--tests/unit/bitops.tcl14
-rw-r--r--tests/unit/dump.tcl148
-rw-r--r--tests/unit/expire.tcl23
-rw-r--r--tests/unit/geo.tcl311
-rw-r--r--tests/unit/hyperloglog.tcl19
-rw-r--r--tests/unit/introspection-2.tcl23
-rw-r--r--tests/unit/introspection.tcl7
-rw-r--r--tests/unit/keyspace.tcl275
-rw-r--r--tests/unit/latency-monitor.tcl14
-rw-r--r--tests/unit/lazyfree.tcl39
-rw-r--r--tests/unit/maxmemory.tcl96
-rw-r--r--tests/unit/memefficiency.tcl182
-rw-r--r--tests/unit/other.tcl5
-rw-r--r--tests/unit/pendingquerybuf.tcl35
-rw-r--r--tests/unit/scan.tcl46
-rw-r--r--tests/unit/scripting.tcl290
-rw-r--r--tests/unit/slowlog.tcl23
-rw-r--r--tests/unit/type/hash.tcl76
-rw-r--r--tests/unit/type/incr.tcl154
-rw-r--r--tests/unit/type/list-3.tcl44
-rw-r--r--tests/unit/type/list.tcl4
-rw-r--r--tests/unit/type/set.tcl28
-rw-r--r--tests/unit/type/stream-cgroups.tcl99
-rw-r--r--tests/unit/type/stream.tcl319
-rw-r--r--tests/unit/type/string.tcl (renamed from tests/unit/basic.tcl)394
-rw-r--r--tests/unit/type/zset.tcl270
-rw-r--r--tests/unit/wait.tcl42
-rw-r--r--utils/cluster_fail_time.tcl50
-rw-r--r--utils/corrupt_rdb.c44
-rw-r--r--utils/create-cluster/.gitignore5
-rw-r--r--utils/create-cluster/README27
-rwxr-xr-xutils/create-cluster/create-cluster102
-rwxr-xr-xutils/generate-command-help.rb5
-rw-r--r--utils/graphs/commits-over-time/README.md16
-rwxr-xr-xutils/graphs/commits-over-time/genhtml.tcl96
-rw-r--r--utils/hashtable/README13
-rw-r--r--utils/hashtable/rehashing.c142
-rw-r--r--utils/hyperloglog/hll-err.rb2
-rw-r--r--utils/hyperloglog/hll-gnuplot-graph.rb2
-rwxr-xr-xutils/install_server.sh107
-rw-r--r--utils/lru/README10
-rw-r--r--utils/lru/lfu-simulation.c158
-rw-r--r--utils/lru/test-lru.rb301
-rwxr-xr-xutils/redis_init_script8
-rwxr-xr-xutils/releasetools/03_test_release.sh (renamed from utils/releasetools/00_test_release.sh)2
-rwxr-xr-xutils/releasetools/04_release_hash.sh (renamed from utils/releasetools/03_release_hash.sh)4
-rwxr-xr-xutils/releasetools/changelog.tcl30
563 files changed, 106309 insertions, 38782 deletions
diff --git a/.gitignore b/.gitignore
index d3b1c2f24..a188cfc82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
dump.rdb
redis-benchmark
redis-check-aof
+redis-check-rdb
redis-check-dump
redis-cli
redis-sentinel
@@ -26,3 +27,4 @@ deps/lua/src/liblua.a
.make-*
.prerequisites
*.dSYM
+Makefile.dep
diff --git a/CONTRIBUTING b/CONTRIBUTING
index b416b9561..7dee24c74 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -8,27 +8,31 @@ each source file that you contribute.
# IMPORTANT: HOW TO USE REDIS GITHUB ISSUES
* Github issues SHOULD ONLY BE USED to report bugs, and for DETAILED feature
- requests. Everything else belongs to the Redis Google Group.
+ requests. Everything else belongs to the Redis Google Group:
+
+ https://groups.google.com/forum/m/#!forum/Redis-db
PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected
bugs in the Github issues system. We'll be very happy to help you and provide
- all the support in the Redis Google Group.
+ all the support at the Reddit sub:
- Redis Google Group address:
-
- https://groups.google.com/forum/?fromgroups#!forum/redis-db
+ http://reddit.com/r/redis
+
+ There is also an active community of Redis users at Stack Overflow:
+
+ http://stackoverflow.com/questions/tagged/redis
# How to provide a patch for a new feature
-1. If it is a major feature or a semantical change, write an RCP (Redis Change Proposal). Check the documentation here: https://github.com/redis/redis-rcp
+1. If it is a major feature or a semantical change, please post it as a new submission in r/redis on Reddit at http://reddit.com/r/redis. Try to be passionate about why the feature is needed, make users upvote your proposal to gain traction and so forth. Read feedbacks about the community. But in this first step **please don't write code yet**.
-2. If in step 1 you get an acknowledge from the project leaders, use the
+2. If in step 1 you get an acknowledgment from the project leaders, use the
following procedure to submit a patch:
a. Fork Redis on github ( http://help.github.com/fork-a-repo/ )
b. Create a topic branch (git checkout -b my_branch)
c. Push to your branch (git push origin my_branch)
- d. Initiate a pull request on github ( http://help.github.com/send-pull-requests/ )
+ d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ )
e. Done :)
For minor fixes just open a pull request on Github.
diff --git a/COPYING b/COPYING
index a58de44dd..ac68e012b 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (c) 2006-2014, Salvatore Sanfilippo
+Copyright (c) 2006-2015, Salvatore Sanfilippo
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/README b/README
deleted file mode 100644
index 369118631..000000000
--- a/README
+++ /dev/null
@@ -1,173 +0,0 @@
-Where to find complete Redis documentation?
--------------------------------------------
-
-This README is just a fast "quick start" document. You can find more detailed
-documentation at http://redis.io
-
-Building Redis
---------------
-
-Redis can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD.
-We support big endian and little endian architectures.
-
-It may compile on Solaris derived systems (for instance SmartOS) but our
-support for this platform is "best effort" and Redis is not guaranteed to
-work as well as in Linux, OSX, and *BSD there.
-
-It is as simple as:
-
- % make
-
-You can run a 32 bit Redis binary using:
-
- % make 32bit
-
-After building Redis is a good idea to test it, using:
-
- % make test
-
-Fixing build problems with dependencies or cached build options
-—--------
-Redis has some dependencies which are included into the "deps" directory.
-"make" does not rebuild dependencies automatically, even if something in the
-source code of dependencies is changes.
-
-When you update the source code with `git pull` or when code inside the
-dependencies tree is modified in any other way, make sure to use the following
-command in order to really clean everything and rebuild from scratch:
-
- make distclean
-
-This will clean: jemalloc, lua, hiredis, linenoise.
-
-Also if you force certain build options like 32bit target, no C compiler
-optimizations (for debugging purposes), and other similar build time options,
-those options are cached indefinitely until you issue a "make distclean"
-command.
-
-Fixing problems building 32 bit binaries
----------
-
-If after building Redis with a 32 bit target you need to rebuild it
-with a 64 bit target, or the other way around, you need to perform a
-"make distclean" in the root directory of the Redis distribution.
-
-In case of build errors when trying to build a 32 bit binary of Redis, try
-the following steps:
-
-* Install the packages libc6-dev-i386 (also try g++-multilib).
-* Try using the following command line instead of "make 32bit":
-
- make CFLAGS="-m32 -march=native" LDFLAGS="-m32"
-
-Allocator
----------
-
-Selecting a non-default memory allocator when building Redis is done by setting
-the `MALLOC` environment variable. Redis is compiled and linked against libc
-malloc by default, with the exception of jemalloc being the default on Linux
-systems. This default was picked because jemalloc has proven to have fewer
-fragmentation problems than libc malloc.
-
-To force compiling against libc malloc, use:
-
- % make MALLOC=libc
-
-To compile against jemalloc on Mac OS X systems, use:
-
- % make MALLOC=jemalloc
-
-Verbose build
--------------
-
-Redis will build with a user friendly colorized output by default.
-If you want to see a more verbose output use the following:
-
- % make V=1
-
-Running Redis
--------------
-
-To run Redis with the default configuration just type:
-
- % cd src
- % ./redis-server
-
-If you want to provide your redis.conf, you have to run it using an additional
-parameter (the path of the configuration file):
-
- % cd src
- % ./redis-server /path/to/redis.conf
-
-It is possible to alter the Redis configuration passing parameters directly
-as options using the command line. Examples:
-
- % ./redis-server --port 9999 --slaveof 127.0.0.1 6379
- % ./redis-server /etc/redis/6379.conf --loglevel debug
-
-All the options in redis.conf are also supported as options using the command
-line, with exactly the same name.
-
-Playing with Redis
-------------------
-
-You can use redis-cli to play with Redis. Start a redis-server instance,
-then in another terminal try the following:
-
- % cd src
- % ./redis-cli
- redis> ping
- PONG
- redis> set foo bar
- OK
- redis> get foo
- "bar"
- redis> incr mycounter
- (integer) 1
- redis> incr mycounter
- (integer) 2
- redis>
-
-You can find the list of all the available commands here:
-
- http://redis.io/commands
-
-Installing Redis
------------------
-
-In order to install Redis binaries into /usr/local/bin just use:
-
- % make install
-
-You can use "make PREFIX=/some/other/directory install" if you wish to use a
-different destination.
-
-Make install will just install binaries in your system, but will not configure
-init scripts and configuration files in the appropriate place. This is not
-needed if you want just to play a bit with Redis, but if you are installing
-it the proper way for a production system, we have a script doing this
-for Ubuntu and Debian systems:
-
- % cd utils
- % ./install_server.sh
-
-The script will ask you a few questions and will setup everything you need
-to run Redis properly as a background daemon that will start again on
-system reboots.
-
-You'll be able to stop and start Redis using the script named
-/etc/init.d/redis_<portnumber>, for instance /etc/init.d/redis_6379.
-
-Code contributions
----
-
-Note: by contributing code to the Redis project in any form, including sending
-a pull request via Github, a code fragment or patch via private email or
-public discussion groups, you agree to release your code under the terms
-of the BSD license that you can find in the COPYING file included in the Redis
-source distribution.
-
-Please see the CONTRIBUTING file in this source distribution for more
-information.
-
-Enjoy!
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..8dbad7dbf
--- /dev/null
+++ b/README.md
@@ -0,0 +1,446 @@
+This README is just a fast *quick start* document. You can find more detailed documentation at [redis.io](https://redis.io).
+
+What is Redis?
+--------------
+
+Redis is often referred as a *data structures* server. What this means is that Redis provides access to mutable data structures via a set of commands, which are sent using a *server-client* model with TCP sockets and a simple protocol. So different processes can query and modify the same data structures in a shared way.
+
+Data structures implemented into Redis have a few special properties:
+
+* Redis cares to store them on disk, even if they are always served and modified into the server memory. This means that Redis is fast, but that is also non-volatile.
+* Implementation of data structures stress on memory efficiency, so data structures inside Redis will likely use less memory compared to the same data structure modeled using an high level programming language.
+* Redis offers a number of features that are natural to find in a database, like replication, tunable levels of durability, cluster, high availability.
+
+Another good example is to think of Redis as a more complex version of memcached, where the operations are not just SETs and GETs, but operations to work with complex data types like Lists, Sets, ordered data structures, and so forth.
+
+If you want to know more, this is a list of selected starting points:
+
+* Introduction to Redis data types. http://redis.io/topics/data-types-intro
+* Try Redis directly inside your browser. http://try.redis.io
+* The full list of Redis commands. http://redis.io/commands
+* There is much more inside the Redis official documentation. http://redis.io/documentation
+
+Building Redis
+--------------
+
+Redis can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD.
+We support big endian and little endian architectures, and both 32 bit
+and 64 bit systems.
+
+It may compile on Solaris derived systems (for instance SmartOS) but our
+support for this platform is *best effort* and Redis is not guaranteed to
+work as well as in Linux, OSX, and \*BSD there.
+
+It is as simple as:
+
+ % make
+
+You can run a 32 bit Redis binary using:
+
+ % make 32bit
+
+After building Redis, it is a good idea to test it using:
+
+ % make test
+
+Fixing build problems with dependencies or cached build options
+---------
+
+Redis has some dependencies which are included into the `deps` directory.
+`make` does not automatically rebuild dependencies even if something in
+the source code of dependencies changes.
+
+When you update the source code with `git pull` or when code inside the
+dependencies tree is modified in any other way, make sure to use the following
+command in order to really clean everything and rebuild from scratch:
+
+ make distclean
+
+This will clean: jemalloc, lua, hiredis, linenoise.
+
+Also if you force certain build options like 32bit target, no C compiler
+optimizations (for debugging purposes), and other similar build time options,
+those options are cached indefinitely until you issue a `make distclean`
+command.
+
+Fixing problems building 32 bit binaries
+---------
+
+If after building Redis with a 32 bit target you need to rebuild it
+with a 64 bit target, or the other way around, you need to perform a
+`make distclean` in the root directory of the Redis distribution.
+
+In case of build errors when trying to build a 32 bit binary of Redis, try
+the following steps:
+
+* Install the packages libc6-dev-i386 (also try g++-multilib).
+* Try using the following command line instead of `make 32bit`:
+ `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"`
+
+Allocator
+---------
+
+Selecting a non-default memory allocator when building Redis is done by setting
+the `MALLOC` environment variable. Redis is compiled and linked against libc
+malloc by default, with the exception of jemalloc being the default on Linux
+systems. This default was picked because jemalloc has proven to have fewer
+fragmentation problems than libc malloc.
+
+To force compiling against libc malloc, use:
+
+ % make MALLOC=libc
+
+To compile against jemalloc on Mac OS X systems, use:
+
+ % make MALLOC=jemalloc
+
+Verbose build
+-------------
+
+Redis will build with a user friendly colorized output by default.
+If you want to see a more verbose output use the following:
+
+ % make V=1
+
+Running Redis
+-------------
+
+To run Redis with the default configuration just type:
+
+ % cd src
+ % ./redis-server
+
+If you want to provide your redis.conf, you have to run it using an additional
+parameter (the path of the configuration file):
+
+ % cd src
+ % ./redis-server /path/to/redis.conf
+
+It is possible to alter the Redis configuration by passing parameters directly
+as options using the command line. Examples:
+
+ % ./redis-server --port 9999 --slaveof 127.0.0.1 6379
+ % ./redis-server /etc/redis/6379.conf --loglevel debug
+
+All the options in redis.conf are also supported as options using the command
+line, with exactly the same name.
+
+Playing with Redis
+------------------
+
+You can use redis-cli to play with Redis. Start a redis-server instance,
+then in another terminal try the following:
+
+ % cd src
+ % ./redis-cli
+ redis> ping
+ PONG
+ redis> set foo bar
+ OK
+ redis> get foo
+ "bar"
+ redis> incr mycounter
+ (integer) 1
+ redis> incr mycounter
+ (integer) 2
+ redis>
+
+You can find the list of all the available commands at http://redis.io/commands.
+
+Installing Redis
+-----------------
+
+In order to install Redis binaries into /usr/local/bin just use:
+
+ % make install
+
+You can use `make PREFIX=/some/other/directory install` if you wish to use a
+different destination.
+
+Make install will just install binaries in your system, but will not configure
+init scripts and configuration files in the appropriate place. This is not
+needed if you want just to play a bit with Redis, but if you are installing
+it the proper way for a production system, we have a script doing this
+for Ubuntu and Debian systems:
+
+ % cd utils
+ % ./install_server.sh
+
+The script will ask you a few questions and will setup everything you need
+to run Redis properly as a background daemon that will start again on
+system reboots.
+
+You'll be able to stop and start Redis using the script named
+`/etc/init.d/redis_<portnumber>`, for instance `/etc/init.d/redis_6379`.
+
+Code contributions
+-----------------
+
+Note: by contributing code to the Redis project in any form, including sending
+a pull request via Github, a code fragment or patch via private email or
+public discussion groups, you agree to release your code under the terms
+of the BSD license that you can find in the [COPYING][1] file included in the Redis
+source distribution.
+
+Please see the [CONTRIBUTING][2] file in this source distribution for more
+information.
+
+[1]: https://github.com/antirez/redis/blob/unstable/COPYING
+[2]: https://github.com/antirez/redis/blob/unstable/CONTRIBUTING
+
+Redis internals
+===
+
+If you are reading this README you are likely in front of a Github page
+or you just untarred the Redis distribution tar ball. In both the cases
+you are basically one step away from the source code, so here we explain
+the Redis source code layout, what is in each file as a general idea, the
+most important functions and structures inside the Redis server and so forth.
+We keep all the discussion at a high level without digging into the details
+since this document would be huge otherwise and our code base changes
+continuously, but a general idea should be a good starting point to
+understand more. Moreover most of the code is heavily commented and easy
+to follow.
+
+Source code layout
+---
+
+The Redis root directory just contains this README, the Makefile which
+calls the real Makefile inside the `src` directory and an example
+configuration for Redis and Sentinel. You can find a few shell
+scripts that are used in order to execute the Redis, Redis Cluster and
+Redis Sentinel unit tests, which are implemented inside the `tests`
+directory.
+
+Inside the root are the following important directories:
+
+* `src`: contains the Redis implementation, written in C.
+* `tests`: contains the unit tests, implemented in Tcl.
+* `deps`: contains libraries Redis uses. Everything needed to compile Redis is inside this directory; your system just needs to provide `libc`, a POSIX compatible interface and a C compiler. Notably `deps` contains a copy of `jemalloc`, which is the default allocator of Redis under Linux. Note that under `deps` there are also things which started with the Redis project, but for which the main repository is not `anitrez/redis`. An exception to this rule is `deps/geohash-int` which is the low level geocoding library used by Redis: it originated from a different project, but at this point it diverged so much that it is developed as a separated entity directly inside the Redis repository.
+
+There are a few more directories but they are not very important for our goals
+here. We'll focus mostly on `src`, where the Redis implementation is contained,
+exploring what there is inside each file. The order in which files are
+exposed is the logical one to follow in order to disclose different layers
+of complexity incrementally.
+
+Note: lately Redis was refactored quite a bit. Function names and file
+names have been changed, so you may find that this documentation reflects the
+`unstable` branch more closely. For instance in Redis 3.0 the `server.c`
+and `server.h` files were named to `redis.c` and `redis.h`. However the overall
+structure is the same. Keep in mind that all the new developments and pull
+requests should be performed against the `unstable` branch.
+
+server.h
+---
+
+The simplest way to understand how a program works is to understand the
+data structures it uses. So we'll start from the main header file of
+Redis, which is `server.h`.
+
+All the server configuration and in general all the shared state is
+defined in a global structure called `server`, of type `struct redisServer`.
+A few important fields in this structure are:
+
+* `server.db` is an array of Redis databases, where data is stored.
+* `server.commands` is the command table.
+* `server.clients` is a linked list of clients connected to the server.
+* `server.master` is a special client, the master, if the instance is a slave.
+
+There are tons of other fields. Most fields are commented directly inside
+the structure definition.
+
+Another important Redis data structure is the one defining a client.
+In the past it was called `redisClient`, now just `client`. The structure
+has many fields, here we'll just show the main ones:
+
+ struct client {
+ int fd;
+ sds querybuf;
+ int argc;
+ robj **argv;
+ redisDb *db;
+ int flags;
+ list *reply;
+ char buf[PROTO_REPLY_CHUNK_BYTES];
+ ... many other fields ...
+ }
+
+The client structure defines a *connected client*:
+
+* The `fd` field is the client socket file descriptor.
+* `argc` and `argv` are populated with the command the client is executing, so that functions implementing a given Redis command can read the arguments.
+* `querybuf` accumulates the requests from the client, which are parsed by the Redis server according to the Redis protocol and executed by calling the implementations of the commands the client is executing.
+* `reply` and `buf` are dynamic and static buffers that accumulate the replies the server sends to the client. These buffers are incrementally written to the socket as soon as the file descriptor is writable.
+
+As you can see in the client structure above, arguments in a command
+are described as `robj` structures. The following is the full `robj`
+structure, which defines a *Redis object*:
+
+ typedef struct redisObject {
+ unsigned type:4;
+ unsigned encoding:4;
+ unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */
+ int refcount;
+ void *ptr;
+ } robj;
+
+Basically this structure can represent all the basic Redis data types like
+strings, lists, sets, sorted sets and so forth. The interesting thing is that
+it has a `type` field, so that it is possible to know what type a given
+object has, and a `refcount`, so that the same object can be referenced
+in multiple places without allocating it multiple times. Finally the `ptr`
+field points to the actual representation of the object, which might vary
+even for the same type, depending on the `encoding` used.
+
+Redis objects are used extensively in the Redis internals, however in order
+to avoid the overhead of indirect accesses, recently in many places
+we just use plain dynamic strings not wrapped inside a Redis object.
+
+server.c
+---
+
+This is the entry point of the Redis server, where the `main()` function
+is defined. The following are the most important steps in order to startup
+the Redis server.
+
+* `initServerConfig()` setups the default values of the `server` structure.
+* `initServer()` allocates the data structures needed to operate, setup the listening socket, and so forth.
+* `aeMain()` starts the event loop which listens for new connections.
+
+There are two special functions called periodically by the event loop:
+
+1. `serverCron()` is called periodically (according to `server.hz` frequency), and performs tasks that must be performed from time to time, like checking for timedout clients.
+2. `beforeSleep()` is called every time the event loop fired, Redis served a few requests, and is returning back into the event loop.
+
+Inside server.c you can find code that handles other vital things of the Redis server:
+
+* `call()` is used in order to call a given command in the context of a given client.
+* `activeExpireCycle()` handles eviciton of keys with a time to live set via the `EXPIRE` command.
+* `freeMemoryIfNeeded()` is called when a new write command should be performed but Redis is out of memory according to the `maxmemory` directive.
+* The global variable `redisCommandTable` defines all the Redis commands, specifying the name of the command, the function implementing the command, the number of arguments required, and other properties of each command.
+
+networking.c
+---
+
+This file defines all the I/O functions with clients, masters and slaves
+(which in Redis are just special clients):
+
+* `createClient()` allocates and initializes a new client.
+* the `addReply*()` family of functions are used by commands implementations in order to append data to the client structure, that will be transmitted to the client as a reply for a given command executed.
+* `writeToClient()` transmits the data pending in the output buffers to the client and is called by the *writable event handler* `sendReplyToClient()`.
+* `readQueryFromClient()` is the *readable event handler* and accumulates data from read from the client into the query buffer.
+* `processInputBuffer()` is the entry point in order to parse the client query buffer according to the Redis protocol. Once commands are ready to be processed, it calls `processCommand()` which is defined inside `server.c` in order to actually execute the command.
+* `freeClient()` deallocates, disconnects and removes a client.
+
+aof.c and rdb.c
+---
+
+As you can guess from the names these files implement the RDB and AOF
+persistence for Redis. Redis uses a persistence model based on the `fork()`
+system call in order to create a thread with the same (shared) memory
+content of the main Redis thread. This secondary thread dumps the content
+of the memory on disk. This is used by `rdb.c` to create the snapshots
+on disk and by `aof.c` in order to perform the AOF rewrite when the
+append only file gets too big.
+
+The implementation inside `aof.c` has additional functions in order to
+implement an API that allows commands to append new commands into the AOF
+file as clients execute them.
+
+The `call()` function defined inside `server.c` is responsible to call
+the functions that in turn will write the commands into the AOF.
+
+db.c
+---
+
+Certain Redis commands operate on specific data types, others are general.
+Examples of generic commands are `DEL` and `EXPIRE`. They operate on keys
+and not on their values specifically. All those generic commands are
+defined inside `db.c`.
+
+Moreover `db.c` implements an API in order to perform certain operations
+on the Redis dataset without directly accessing the internal data structures.
+
+The most important functions inside `db.c` which are used in many commands
+implementations are the following:
+
+* `lookupKeyRead()` and `lookupKeyWrite()` are used in order to get a pointer to the value associated to a given key, or `NULL` if the key does not exist.
+* `dbAdd()` and its higher level counterpart `setKey()` create a new key in a Redis database.
+* `dbDelete()` removes a key and its associated value.
+* `emptyDb()` removes an entire single database or all the databases defined.
+
+The rest of the file implements the generic commands exposed to the client.
+
+object.c
+---
+
+The `robj` structure defining Redis objects was already described. Inside
+`object.c` there are all the functions that operate with Redis objects at
+a basic level, like functions to allocate new objects, handle the reference
+counting and so forth. Notable functions inside this file:
+
+* `incrRefcount()` and `decrRefCount()` are used in order to increment or decrement an object reference count. When it drops to 0 the object is finally freed.
+* `createObject()` allocates a new object. There are also specialized functions to allocate string objects having a specific content, like `createStringObjectFromLongLong()` and similar functions.
+
+This file also implements the `OBJECT` command.
+
+replication.c
+---
+
+This is one of the most complex files inside Redis, it is recommended to
+approach it only after getting a bit familiar with the rest of the code base.
+In this file there is the implementation of both the master and slave role
+of Redis.
+
+One of the most important functions inside this file is `replicationFeedSlaves()` that writes commands to the clients representing slave instances connected
+to our master, so that the slaves can get the writes performed by the clients:
+this way their data set will remain synchronized with the one in the master.
+
+This file also implements both the `SYNC` and `PSYNC` commands that are
+used in order to perform the first synchronization between masters and
+slaves, or to continue the replication after a disconnection.
+
+Other C files
+---
+
+* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c` and `t_zset.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types.
+* `ae.c` implements the Redis event loop, it's a self contained library which is simple to read and understand.
+* `sds.c` is the Redis string library, check http://github.com/antirez/sds for more information.
+* `anet.c` is a library to use POSIX networking in a simpler way compared to the raw interface exposed by the kernel.
+* `dict.c` is an implementation of a non-blocking hash table which rehashes incrementally.
+* `scripting.c` implements Lua scripting. It is completely self contained from the rest of the Redis implementation and is simple enough to understand if you are familar with the Lua API.
+* `cluster.c` implements the Redis Cluster. Probably a good read only after being very familiar with the rest of the Redis code base. If you want to read `cluster.c` make sure to read the [Redis Cluster specification][3].
+
+[3]: http://redis.io/topics/cluster-spec
+
+Anatomy of a Redis command
+---
+
+All the Redis commands are defined in the following way:
+
+ void foobarCommand(client *c) {
+ printf("%s",c->argv[1]->ptr); /* Do something with the argument. */
+ addReply(c,shared.ok); /* Reply something to the client. */
+ }
+
+The command is then referenced inside `server.c` in the command table:
+
+ {"foobar",foobarCommand,2,"rtF",0,NULL,0,0,0,0,0},
+
+In the above example `2` is the number of arguments the command takes,
+while `"rtF"` are the command flags, as documented in the command table
+top comment inside `server.c`.
+
+After the command operates in some way, it returns a reply to the client,
+usually using `addReply()` or a similar function defined inside `networking.c`.
+
+There are tons of commands implementations inside the Redis source code
+that can serve as examples of actual commands implementations. To write
+a few toy commands can be a good exercise to familiarize with the code base.
+
+There are also many other files not described here, but it is useless to
+cover everything. We want to just help you with the first steps.
+Eventually you'll find your way inside the Redis code base :-)
+
+Enjoy!
diff --git a/deps/Makefile b/deps/Makefile
index 71f6d3a2c..eb35c1e1f 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -77,7 +77,7 @@ JEMALLOC_LDFLAGS= $(LDFLAGS)
jemalloc: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
- cd jemalloc && ./configure --with-jemalloc-prefix=je_ --enable-cc-silence CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)"
+ cd jemalloc && ./configure --with-version=5.1.0-0-g0 --with-lg-quantum=3 --with-jemalloc-prefix=je_ --enable-cc-silence CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)"
cd jemalloc && $(MAKE) CFLAGS="$(JEMALLOC_CFLAGS)" LDFLAGS="$(JEMALLOC_LDFLAGS)" lib/libjemalloc.a
.PHONY: jemalloc
diff --git a/deps/README.md b/deps/README.md
new file mode 100644
index 000000000..367ee1627
--- /dev/null
+++ b/deps/README.md
@@ -0,0 +1,88 @@
+This directory contains all Redis dependencies, except for the libc that
+should be provided by the operating system.
+
+* **Jemalloc** is our memory allocator, used as replacement for libc malloc on Linux by default. It has good performances and excellent fragmentation behavior. This component is upgraded from time to time.
+* **geohash-int** is inside the dependencies directory but is actually part of the Redis project, since it is our private fork (heavily modified) of a library initially developed for Ardb, which is in turn a fork of Redis.
+* **hiredis** is the official C client library for Redis. It is used by redis-cli, redis-benchmark and Redis Sentinel. It is part of the Redis official ecosystem but is developed externally from the Redis repository, so we just upgrade it as needed.
+* **linenoise** is a readline replacement. It is developed by the same authors of Redis but is managed as a separated project and updated as needed.
+* **lua** is Lua 5.1 with minor changes for security and additional libraries.
+
+How to upgrade the above dependencies
+===
+
+Jemalloc
+---
+
+Jemalloc is modified with changes that allow us to implement the Redis
+active defragmentation logic. However this feature of Redis is not mandatory
+and Redis is able to understand if the Jemalloc version it is compiled
+against supports such Redis-specific modifications. So in theory, if you
+are not interested in the active defragmentation, you can replace Jemalloc
+just following tose steps:
+
+1. Remove the jemalloc directory.
+2. Substitute it with the new jemalloc source tree.
+3. Edit the Makefile localted in the same directory as the README you are
+ reading, and change the --with-version in the Jemalloc configure script
+ options with the version you are using. This is required because otherwise
+ Jemalloc configuration script is broken and will not work nested in another
+ git repository.
+
+However note that we change Jemalloc settings via the `configure` script of Jemalloc using the `--with-lg-quantum` option, setting it to the value of 3 instead of 4. This provides us with more size classes that better suit the Redis data structures, in order to gain memory efficiency.
+
+If you want to upgrade Jemalloc while also providing support for
+active defragmentation, in addition to the above steps you need to perform
+the following additional steps:
+
+5. In Jemalloc three, file `include/jemalloc/jemalloc_macros.h.in`, make sure
+ to add `#define JEMALLOC_FRAG_HINT`.
+6. Implement the function `je_get_defrag_hint()` inside `src/jemalloc.c`. You
+ can see how it is implemented in the current Jemalloc source tree shipped
+ with Redis, and rewrite it according to the new Jemalloc internals, if they
+ changed, otherwise you could just copy the old implementation if you are
+ upgrading just to a similar version of Jemalloc.
+
+Geohash
+---
+
+This is never upgraded since it's part of the Redis project. If there are changes to merge from Ardb there is the need to manually check differences, but at this point the source code is pretty different.
+
+Hiredis
+---
+
+Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is advised to take a lot of care:
+
+1. Check with diff if hiredis API changed and what impact it could have in Redis.
+2. Make sure thet the SDS library inside Hiredis and inside Redis are compatible.
+3. After the upgrade, run the Redis Sentinel test.
+4. Check manually that redis-cli and redis-benchmark behave as expecteed, since we have no tests for CLI utilities currently.
+
+Linenoise
+---
+
+Linenoise is rarely upgraded as needed. The upgrade process is trivial since
+Redis uses a non modified version of linenoise, so to upgrade just do the
+following:
+
+1. Remove the linenoise directory.
+2. Substitute it with the new linenoise source tree.
+
+Lua
+---
+
+We use Lua 5.1 and no upgrade is planned currently, since we don't want to break
+Lua scripts for new Lua features: in the context of Redis Lua scripts the
+capabilities of 5.1 are usually more than enough, the release is rock solid,
+and we definitely don't want to break old scripts.
+
+So upgrading of Lua is up to the Redis project maintainers and should be a
+manual procedure performed by taking a diff between the different versions.
+
+Currently we have at least the following differences between official Lua 5.1
+and our version:
+
+1. Makefile is modified to allow a different compiler than GCC.
+2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`.
+3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode execution.
+
+
diff --git a/deps/hiredis/.gitignore b/deps/hiredis/.gitignore
index 0c166a02e..c44b5c537 100644
--- a/deps/hiredis/.gitignore
+++ b/deps/hiredis/.gitignore
@@ -4,3 +4,4 @@
/*.so
/*.dylib
/*.a
+/*.pc
diff --git a/deps/hiredis/.travis.yml b/deps/hiredis/.travis.yml
index 030427ff4..ad08076d8 100644
--- a/deps/hiredis/.travis.yml
+++ b/deps/hiredis/.travis.yml
@@ -1,6 +1,39 @@
language: c
+sudo: false
compiler:
- gcc
- clang
-script: make && make check
+os:
+ - linux
+ - osx
+
+before_script:
+ - if [ "$TRAVIS_OS_NAME" == "osx" ] ; then brew update; brew install redis; fi
+
+addons:
+ apt:
+ packages:
+ - libc6-dbg
+ - libc6-dev
+ - libc6:i386
+ - libc6-dev-i386
+ - libc6-dbg:i386
+ - gcc-multilib
+ - valgrind
+
+env:
+ - CFLAGS="-Werror"
+ - PRE="valgrind --track-origins=yes --leak-check=full"
+ - TARGET="32bit" TARGET_VARS="32bit-vars" CFLAGS="-Werror"
+ - TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full"
+
+matrix:
+ exclude:
+ - os: osx
+ env: PRE="valgrind --track-origins=yes --leak-check=full"
+
+ - os: osx
+ env: TARGET="32bit" TARGET_VARS="32bit-vars" PRE="valgrind --track-origins=yes --leak-check=full"
+
+script: make $TARGET CFLAGS="$CFLAGS" && make check PRE="$PRE" && make $TARGET_VARS hiredis-example
diff --git a/deps/hiredis/CHANGELOG.md b/deps/hiredis/CHANGELOG.md
index 268b15cd5..f92bcb3c9 100644
--- a/deps/hiredis/CHANGELOG.md
+++ b/deps/hiredis/CHANGELOG.md
@@ -1,3 +1,128 @@
+### 1.0.0 (unreleased)
+
+**Fixes**:
+
+* Catch a buffer overflow when formatting the error message
+* Import latest upstream sds. This breaks applications that are linked against the old hiredis v0.13
+* Fix warnings, when compiled with -Wshadow
+* Make hiredis compile in Cygwin on Windows, now CI-tested
+
+**BREAKING CHANGES**:
+
+* Change `redisReply.len` to `size_t`, as it denotes the the size of a string
+
+User code should compare this to `size_t` values as well.
+If it was used to compare to other values, casting might be necessary or can be removed, if casting was applied before.
+
+* Remove backwards compatibility macro's
+
+This removes the following old function aliases, use the new name now:
+
+| Old | New |
+| --------------------------- | ---------------------- |
+| redisReplyReaderCreate | redisReaderCreate |
+| redisReplyReaderCreate | redisReaderCreate |
+| redisReplyReaderFree | redisReaderFree |
+| redisReplyReaderFeed | redisReaderFeed |
+| redisReplyReaderGetReply | redisReaderGetReply |
+| redisReplyReaderSetPrivdata | redisReaderSetPrivdata |
+| redisReplyReaderGetObject | redisReaderGetObject |
+| redisReplyReaderGetError | redisReaderGetError |
+
+* The `DEBUG` variable in the Makefile was renamed to `DEBUG_FLAGS`
+
+Previously it broke some builds for people that had `DEBUG` set to some arbitrary value,
+due to debugging other software.
+By renaming we avoid unintentional name clashes.
+
+Simply rename `DEBUG` to `DEBUG_FLAGS` in your environment to make it working again.
+
+### 0.13.3 (2015-09-16)
+
+* Revert "Clear `REDIS_CONNECTED` flag when connection is closed".
+* Make tests pass on FreeBSD (Thanks, Giacomo Olgeni)
+
+
+If the `REDIS_CONNECTED` flag is cleared,
+the async onDisconnect callback function will never be called.
+This causes problems as the disconnect is never reported back to the user.
+
+### 0.13.2 (2015-08-25)
+
+* Prevent crash on pending replies in async code (Thanks, @switch-st)
+* Clear `REDIS_CONNECTED` flag when connection is closed (Thanks, Jerry Jacobs)
+* Add MacOS X addapter (Thanks, @dizzus)
+* Add Qt adapter (Thanks, Pietro Cerutti)
+* Add Ivykis adapter (Thanks, Gergely Nagy)
+
+All adapters are provided as is and are only tested where possible.
+
+### 0.13.1 (2015-05-03)
+
+This is a bug fix release.
+The new `reconnect` method introduced new struct members, which clashed with pre-defined names in pre-C99 code.
+Another commit forced C99 compilation just to make it work, but of course this is not desirable for outside projects.
+Other non-C99 code can now use hiredis as usual again.
+Sorry for the inconvenience.
+
+* Fix memory leak in async reply handling (Salvatore Sanfilippo)
+* Rename struct member to avoid name clash with pre-c99 code (Alex Balashov, ncopa)
+
+### 0.13.0 (2015-04-16)
+
+This release adds a minimal Windows compatibility layer.
+The parser, standalone since v0.12.0, can now be compiled on Windows
+(and thus used in other client libraries as well)
+
+* Windows compatibility layer for parser code (tzickel)
+* Properly escape data printed to PKGCONF file (Dan Skorupski)
+* Fix tests when assert() undefined (Keith Bennett, Matt Stancliff)
+* Implement a reconnect method for the client context, this changes the structure of `redisContext` (Aaron Bedra)
+
+### 0.12.1 (2015-01-26)
+
+* Fix `make install`: DESTDIR support, install all required files, install PKGCONF in proper location
+* Fix `make test` as 32 bit build on 64 bit platform
+
+### 0.12.0 (2015-01-22)
+
+* Add optional KeepAlive support
+
+* Try again on EINTR errors
+
+* Add libuv adapter
+
+* Add IPv6 support
+
+* Remove possiblity of multiple close on same fd
+
+* Add ability to bind source address on connect
+
+* Add redisConnectFd() and redisFreeKeepFd()
+
+* Fix getaddrinfo() memory leak
+
+* Free string if it is unused (fixes memory leak)
+
+* Improve redisAppendCommandArgv performance 2.5x
+
+* Add support for SO_REUSEADDR
+
+* Fix redisvFormatCommand format parsing
+
+* Add GLib 2.0 adapter
+
+* Refactor reading code into read.c
+
+* Fix errno error buffers to not clobber errors
+
+* Generate pkgconf during build
+
+* Silence _BSD_SOURCE warnings
+
+* Improve digit counting for multibulk creation
+
+
### 0.11.0
* Increase the maximum multi-bulk reply depth to 7.
diff --git a/deps/hiredis/Makefile b/deps/hiredis/Makefile
index ddcc4e4f6..9a4de8360 100644
--- a/deps/hiredis/Makefile
+++ b/deps/hiredis/Makefile
@@ -3,13 +3,25 @@
# Copyright (C) 2010-2011 Pieter Noordhuis <pcnoordhuis at gmail dot com>
# This file is released under the BSD license, see the COPYING file
-OBJ=net.o hiredis.o sds.o async.o
-EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev
+OBJ=net.o hiredis.o sds.o async.o read.o
+EXAMPLES=hiredis-example hiredis-example-libevent hiredis-example-libev hiredis-example-glib
TESTS=hiredis-test
LIBNAME=libhiredis
+PKGCONFNAME=hiredis.pc
-HIREDIS_MAJOR=0
-HIREDIS_MINOR=11
+HIREDIS_MAJOR=$(shell grep HIREDIS_MAJOR hiredis.h | awk '{print $$3}')
+HIREDIS_MINOR=$(shell grep HIREDIS_MINOR hiredis.h | awk '{print $$3}')
+HIREDIS_PATCH=$(shell grep HIREDIS_PATCH hiredis.h | awk '{print $$3}')
+HIREDIS_SONAME=$(shell grep HIREDIS_SONAME hiredis.h | awk '{print $$3}')
+
+# Installation related variables and target
+PREFIX?=/usr/local
+INCLUDE_PATH?=include/hiredis
+LIBRARY_PATH?=lib
+PKGCONF_PATH?=pkgconfig
+INSTALL_INCLUDE_PATH= $(DESTDIR)$(PREFIX)/$(INCLUDE_PATH)
+INSTALL_LIBRARY_PATH= $(DESTDIR)$(PREFIX)/$(LIBRARY_PATH)
+INSTALL_PKGCONF_PATH= $(INSTALL_LIBRARY_PATH)/$(PKGCONF_PATH)
# redis-server configuration used for testing
REDIS_PORT=56379
@@ -25,15 +37,16 @@ export REDIS_TEST_CONFIG
# Fallback to gcc when $CC is not in $PATH.
CC:=$(shell sh -c 'type $(CC) >/dev/null 2>/dev/null && echo $(CC) || echo gcc')
+CXX:=$(shell sh -c 'type $(CXX) >/dev/null 2>/dev/null && echo $(CXX) || echo g++')
OPTIMIZATION?=-O3
WARNINGS=-Wall -W -Wstrict-prototypes -Wwrite-strings
-DEBUG?= -g -ggdb
-REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG) $(ARCH)
+DEBUG_FLAGS?= -g -ggdb
+REAL_CFLAGS=$(OPTIMIZATION) -fPIC $(CFLAGS) $(WARNINGS) $(DEBUG_FLAGS) $(ARCH)
REAL_LDFLAGS=$(LDFLAGS) $(ARCH)
DYLIBSUFFIX=so
STLIBSUFFIX=a
-DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR).$(HIREDIS_MINOR)
+DYLIB_MINOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_SONAME)
DYLIB_MAJOR_NAME=$(LIBNAME).$(DYLIBSUFFIX).$(HIREDIS_MAJOR)
DYLIBNAME=$(LIBNAME).$(DYLIBSUFFIX)
DYLIB_MAKE_CMD=$(CC) -shared -Wl,-soname,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS)
@@ -49,19 +62,20 @@ ifeq ($(uname_S),SunOS)
endif
ifeq ($(uname_S),Darwin)
DYLIBSUFFIX=dylib
- DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(DYLIBSUFFIX)
- DYLIB_MAJOR_NAME=$(LIBNAME).$(HIREDIS_MAJOR).$(DYLIBSUFFIX)
+ DYLIB_MINOR_NAME=$(LIBNAME).$(HIREDIS_SONAME).$(DYLIBSUFFIX)
DYLIB_MAKE_CMD=$(CC) -shared -Wl,-install_name,$(DYLIB_MINOR_NAME) -o $(DYLIBNAME) $(LDFLAGS)
endif
-all: $(DYLIBNAME)
+all: $(DYLIBNAME) $(STLIBNAME) hiredis-test $(PKGCONFNAME)
# Deps (use make dep to generate this)
-net.o: net.c fmacros.h net.h hiredis.h
-async.o: async.c async.h hiredis.h sds.h dict.c dict.h
-hiredis.o: hiredis.c fmacros.h hiredis.h net.h sds.h
+async.o: async.c fmacros.h async.h hiredis.h read.h sds.h net.h dict.c dict.h
+dict.o: dict.c fmacros.h dict.h
+hiredis.o: hiredis.c fmacros.h hiredis.h read.h sds.h net.h
+net.o: net.c fmacros.h net.h hiredis.h read.h sds.h
+read.o: read.c fmacros.h read.h sds.h
sds.o: sds.c sds.h
-test.o: test.c hiredis.h
+test.o: test.c fmacros.h hiredis.h read.h sds.h
$(DYLIBNAME): $(OBJ)
$(DYLIB_MAKE_CMD) $(OBJ)
@@ -79,6 +93,15 @@ hiredis-example-libevent: examples/example-libevent.c adapters/libevent.h $(STLI
hiredis-example-libev: examples/example-libev.c adapters/libev.h $(STLIBNAME)
$(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -lev $(STLIBNAME)
+hiredis-example-glib: examples/example-glib.c adapters/glib.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) $(shell pkg-config --cflags --libs glib-2.0) -I. $< $(STLIBNAME)
+
+hiredis-example-ivykis: examples/example-ivykis.c adapters/ivykis.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -livykis $(STLIBNAME)
+
+hiredis-example-macosx: examples/example-macosx.c adapters/macosx.h $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. $< -framework CoreFoundation $(STLIBNAME)
+
ifndef AE_DIR
hiredis-example-ae:
@echo "Please specify AE_DIR (e.g. <redis repository>/src)"
@@ -94,7 +117,20 @@ hiredis-example-libuv:
@false
else
hiredis-example-libuv: examples/example-libuv.c adapters/libuv.h $(STLIBNAME)
- $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread $(STLIBNAME)
+ $(CC) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(LIBUV_DIR)/include $< $(LIBUV_DIR)/.libs/libuv.a -lpthread -lrt $(STLIBNAME)
+endif
+
+ifeq ($(and $(QT_MOC),$(QT_INCLUDE_DIR),$(QT_LIBRARY_DIR)),)
+hiredis-example-qt:
+ @echo "Please specify QT_MOC, QT_INCLUDE_DIR AND QT_LIBRARY_DIR"
+ @false
+else
+hiredis-example-qt: examples/example-qt.cpp adapters/qt.h $(STLIBNAME)
+ $(QT_MOC) adapters/qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \
+ $(CXX) -x c++ -o qt-adapter-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore
+ $(QT_MOC) examples/example-qt.h -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore | \
+ $(CXX) -x c++ -o qt-example-moc.o -c - $(REAL_CFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore
+ $(CXX) -o examples/$@ $(REAL_CFLAGS) $(REAL_LDFLAGS) -I. -I$(QT_INCLUDE_DIR) -I$(QT_INCLUDE_DIR)/QtCore -L$(QT_LIBRARY_DIR) qt-adapter-moc.o qt-example-moc.o $< -pthread $(STLIBNAME) -lQtCore
endif
hiredis-example: examples/example.c $(STLIBNAME)
@@ -103,14 +139,16 @@ hiredis-example: examples/example.c $(STLIBNAME)
examples: $(EXAMPLES)
hiredis-test: test.o $(STLIBNAME)
- $(CC) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME)
+
+hiredis-%: %.o $(STLIBNAME)
+ $(CC) $(REAL_CFLAGS) -o $@ $(REAL_LDFLAGS) $< $(STLIBNAME)
test: hiredis-test
./hiredis-test
check: hiredis-test
@echo "$$REDIS_TEST_CONFIG" | $(REDIS_SERVER) -
- ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \
+ $(PRE) ./hiredis-test -h 127.0.0.1 -p $(REDIS_PORT) -s /tmp/hiredis-test-redis.sock || \
( kill `cat /tmp/hiredis-test-redis.pid` && false )
kill `cat /tmp/hiredis-test-redis.pid`
@@ -118,29 +156,38 @@ check: hiredis-test
$(CC) -std=c99 -pedantic -c $(REAL_CFLAGS) $<
clean:
- rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) examples/hiredis-example* *.o *.gcda *.gcno *.gcov
+ rm -rf $(DYLIBNAME) $(STLIBNAME) $(TESTS) $(PKGCONFNAME) examples/hiredis-example* *.o *.gcda *.gcno *.gcov
dep:
$(CC) -MM *.c
-# Installation related variables and target
-PREFIX?=/usr/local
-INSTALL_INCLUDE_PATH= $(PREFIX)/include/hiredis
-INSTALL_LIBRARY_PATH= $(PREFIX)/lib
-
ifeq ($(uname_S),SunOS)
INSTALL?= cp -r
endif
INSTALL?= cp -a
-install: $(DYLIBNAME) $(STLIBNAME)
+$(PKGCONFNAME): hiredis.h
+ @echo "Generating $@ for pkgconfig..."
+ @echo prefix=$(PREFIX) > $@
+ @echo exec_prefix=\$${prefix} >> $@
+ @echo libdir=$(PREFIX)/$(LIBRARY_PATH) >> $@
+ @echo includedir=$(PREFIX)/$(INCLUDE_PATH) >> $@
+ @echo >> $@
+ @echo Name: hiredis >> $@
+ @echo Description: Minimalistic C client library for Redis. >> $@
+ @echo Version: $(HIREDIS_MAJOR).$(HIREDIS_MINOR).$(HIREDIS_PATCH) >> $@
+ @echo Libs: -L\$${libdir} -lhiredis >> $@
+ @echo Cflags: -I\$${includedir} -D_FILE_OFFSET_BITS=64 >> $@
+
+install: $(DYLIBNAME) $(STLIBNAME) $(PKGCONFNAME)
mkdir -p $(INSTALL_INCLUDE_PATH) $(INSTALL_LIBRARY_PATH)
- $(INSTALL) hiredis.h async.h adapters $(INSTALL_INCLUDE_PATH)
+ $(INSTALL) hiredis.h async.h read.h sds.h adapters $(INSTALL_INCLUDE_PATH)
$(INSTALL) $(DYLIBNAME) $(INSTALL_LIBRARY_PATH)/$(DYLIB_MINOR_NAME)
- cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIB_MAJOR_NAME)
- cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MAJOR_NAME) $(DYLIBNAME)
+ cd $(INSTALL_LIBRARY_PATH) && ln -sf $(DYLIB_MINOR_NAME) $(DYLIBNAME)
$(INSTALL) $(STLIBNAME) $(INSTALL_LIBRARY_PATH)
+ mkdir -p $(INSTALL_PKGCONF_PATH)
+ $(INSTALL) $(PKGCONFNAME) $(INSTALL_PKGCONF_PATH)
32bit:
@echo ""
@@ -148,6 +195,10 @@ install: $(DYLIBNAME) $(STLIBNAME)
@echo ""
$(MAKE) CFLAGS="-m32" LDFLAGS="-m32"
+32bit-vars:
+ $(eval CFLAGS=-m32)
+ $(eval LDFLAGS=-m32)
+
gprof:
$(MAKE) CFLAGS="-pg" LDFLAGS="-pg"
@@ -163,4 +214,4 @@ coverage: gcov
noopt:
$(MAKE) OPTIMIZATION=""
-.PHONY: all test check clean dep install 32bit gprof gcov noopt
+.PHONY: all test check clean dep install 32bit 32bit-vars gprof gcov noopt
diff --git a/deps/hiredis/README.md b/deps/hiredis/README.md
index dba4a8c8e..01223ea59 100644
--- a/deps/hiredis/README.md
+++ b/deps/hiredis/README.md
@@ -1,11 +1,13 @@
[![Build Status](https://travis-ci.org/redis/hiredis.png)](https://travis-ci.org/redis/hiredis)
+**This Readme reflects the latest changed in the master branch. See [v0.13.3](https://github.com/redis/hiredis/tree/v0.13.3) for the Readme and documentation for the latest release.**
+
# HIREDIS
Hiredis is a minimalistic C client library for the [Redis](http://redis.io/) database.
It is minimalistic because it just adds minimal support for the protocol, but
-at the same time it uses an high level printf-alike API in order to make it
+at the same time it uses a high level printf-alike API in order to make it
much higher level than otherwise suggested by its minimal code base and the
lack of explicit bindings for every Redis command.
@@ -20,7 +22,15 @@ Redis version >= 1.2.0.
The library comes with multiple APIs. There is the
*synchronous API*, the *asynchronous API* and the *reply parsing API*.
-## UPGRADING
+## Upgrading to `1.0.0`
+
+Version 1.0.0 marks a stable release of hiredis.
+It includes some minor breaking changes, mostly to make the exposed API more uniform and self-explanatory.
+It also bundles the updated `sds` library, to sync up with upstream and Redis.
+For most applications a recompile against the new hiredis should be enough.
+For code changes see the [Changelog](CHANGELOG.md).
+
+## Upgrading from `<0.9.0`
Version 0.9.0 is a major overhaul of hiredis in every aspect. However, upgrading existing
code using hiredis should not be a big pain. The key thing to keep in mind when
@@ -31,51 +41,62 @@ the stateless 0.0.1 that only has a file descriptor to work with.
To consume the synchronous API, there are only a few function calls that need to be introduced:
- redisContext *redisConnect(const char *ip, int port);
- void *redisCommand(redisContext *c, const char *format, ...);
- void freeReplyObject(void *reply);
+```c
+redisContext *redisConnect(const char *ip, int port);
+void *redisCommand(redisContext *c, const char *format, ...);
+void freeReplyObject(void *reply);
+```
### Connecting
The function `redisConnect` is used to create a so-called `redisContext`. The
context is where Hiredis holds state for a connection. The `redisContext`
-struct has an integer `err` field that is non-zero when an the connection is in
+struct has an integer `err` field that is non-zero when the connection is in
an error state. The field `errstr` will contain a string with a description of
the error. More information on errors can be found in the **Errors** section.
After trying to connect to Redis using `redisConnect` you should
check the `err` field to see if establishing the connection was successful:
-
- redisContext *c = redisConnect("127.0.0.1", 6379);
- if (c != NULL && c->err) {
+```c
+redisContext *c = redisConnect("127.0.0.1", 6379);
+if (c == NULL || c->err) {
+ if (c) {
printf("Error: %s\n", c->errstr);
// handle error
+ } else {
+ printf("Can't allocate redis context\n");
}
+}
+```
+
+*Note: A `redisContext` is not thread-safe.*
### Sending commands
There are several ways to issue commands to Redis. The first that will be introduced is
`redisCommand`. This function takes a format similar to printf. In the simplest form,
it is used like this:
-
- reply = redisCommand(context, "SET foo bar");
+```c
+reply = redisCommand(context, "SET foo bar");
+```
The specifier `%s` interpolates a string in the command, and uses `strlen` to
determine the length of the string:
-
- reply = redisCommand(context, "SET foo %s", value);
-
+```c
+reply = redisCommand(context, "SET foo %s", value);
+```
When you need to pass binary safe strings in a command, the `%b` specifier can be
used. Together with a pointer to the string, it requires a `size_t` length argument
of the string:
-
- reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen);
-
+```c
+reply = redisCommand(context, "SET foo %b", value, (size_t) valuelen);
+```
Internally, Hiredis splits the command in different arguments and will
convert it to the protocol used to communicate with Redis.
One or more spaces separates arguments, so you can use the specifiers
anywhere in an argument:
-
- reply = redisCommand(context, "SET key:%s %s", myid, value);
+```c
+reply = redisCommand(context, "SET key:%s %s", myid, value);
+```
### Using replies
@@ -114,11 +135,11 @@ was received:
Redis may reply with nested arrays but this is fully supported.
Replies should be freed using the `freeReplyObject()` function.
-Note that this function will take care of freeing sub-replies objects
+Note that this function will take care of freeing sub-reply objects
contained in arrays and nested arrays, so there is no need for the user to
free the sub replies (it is actually harmful and will corrupt the memory).
-**Important:** the current version of hiredis (0.10.0) free's replies when the
+**Important:** the current version of hiredis (0.10.0) frees replies when the
asynchronous API is used. This means you should not call `freeReplyObject` when
you use this API. The reply is cleaned up by hiredis _after_ the callback
returns. This behavior will probably change in future releases, so make sure to
@@ -127,19 +148,19 @@ keep an eye on the changelog when upgrading (see issue #39).
### Cleaning up
To disconnect and free the context the following function can be used:
-
- void redisFree(redisContext *c);
-
-This function immediately closes the socket and then free's the allocations done in
+```c
+void redisFree(redisContext *c);
+```
+This function immediately closes the socket and then frees the allocations done in
creating the context.
### Sending commands (cont'd)
Together with `redisCommand`, the function `redisCommandArgv` can be used to issue commands.
It has the following prototype:
-
- void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
-
+```c
+void *redisCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
+```
It takes the number of arguments `argc`, an array of strings `argv` and the lengths of the
arguments `argvlen`. For convenience, `argvlen` may be set to `NULL` and the function will
use `strlen(3)` on every argument to determine its length. Obviously, when any of the arguments
@@ -169,10 +190,10 @@ The function `redisGetReply` is exported as part of the Hiredis API and can be u
is expected on the socket. To pipeline commands, the only things that needs to be done is
filling up the output buffer. For this cause, two commands can be used that are identical
to the `redisCommand` family, apart from not returning a reply:
-
- void redisAppendCommand(redisContext *c, const char *format, ...);
- void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
-
+```c
+void redisAppendCommand(redisContext *c, const char *format, ...);
+void redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen);
+```
After calling either function one or more times, `redisGetReply` can be used to receive the
subsequent replies. The return value for this function is either `REDIS_OK` or `REDIS_ERR`, where
the latter means an error occurred while reading a reply. Just as with the other commands,
@@ -180,24 +201,24 @@ the `err` field in the context can be used to find out what the cause of this er
The following examples shows a simple pipeline (resulting in only a single call to `write(2)` and
a single call to `read(2)`):
-
- redisReply *reply;
- redisAppendCommand(context,"SET foo bar");
- redisAppendCommand(context,"GET foo");
- redisGetReply(context,&reply); // reply for SET
- freeReplyObject(reply);
- redisGetReply(context,&reply); // reply for GET
- freeReplyObject(reply);
-
+```c
+redisReply *reply;
+redisAppendCommand(context,"SET foo bar");
+redisAppendCommand(context,"GET foo");
+redisGetReply(context,&reply); // reply for SET
+freeReplyObject(reply);
+redisGetReply(context,&reply); // reply for GET
+freeReplyObject(reply);
+```
This API can also be used to implement a blocking subscriber:
-
- reply = redisCommand(context,"SUBSCRIBE foo");
+```c
+reply = redisCommand(context,"SUBSCRIBE foo");
+freeReplyObject(reply);
+while(redisGetReply(context,&reply) == REDIS_OK) {
+ // consume message
freeReplyObject(reply);
- while(redisGetReply(context,&reply) == REDIS_OK) {
- // consume message
- freeReplyObject(reply);
- }
-
+}
+```
### Errors
When a function call is not successful, depending on the function either `NULL` or `REDIS_ERR` is
@@ -237,58 +258,62 @@ should be checked after creation to see if there were errors creating the connec
Because the connection that will be created is non-blocking, the kernel is not able to
instantly return if the specified host and port is able to accept a connection.
- redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
- if (c->err) {
- printf("Error: %s\n", c->errstr);
- // handle error
- }
+*Note: A `redisAsyncContext` is not thread-safe.*
+
+```c
+redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+if (c->err) {
+ printf("Error: %s\n", c->errstr);
+ // handle error
+}
+```
The asynchronous context can hold a disconnect callback function that is called when the
connection is disconnected (either because of an error or per user request). This function should
have the following prototype:
-
- void(const redisAsyncContext *c, int status);
-
+```c
+void(const redisAsyncContext *c, int status);
+```
On a disconnect, the `status` argument is set to `REDIS_OK` when disconnection was initiated by the
user, or `REDIS_ERR` when the disconnection was caused by an error. When it is `REDIS_ERR`, the `err`
field in the context can be accessed to find out the cause of the error.
-The context object is always free'd after the disconnect callback fired. When a reconnect is needed,
+The context object is always freed after the disconnect callback fired. When a reconnect is needed,
the disconnect callback is a good point to do so.
Setting the disconnect callback can only be done once per context. For subsequent calls it will
return `REDIS_ERR`. The function to set the disconnect callback has the following prototype:
-
- int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
-
+```c
+int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
+```
### Sending commands and their callbacks
In an asynchronous context, commands are automatically pipelined due to the nature of an event loop.
Therefore, unlike the synchronous API, there is only a single way to send commands.
Because commands are sent to Redis asynchronously, issuing a command requires a callback function
that is called when the reply is received. Reply callbacks should have the following prototype:
-
- void(redisAsyncContext *c, void *reply, void *privdata);
-
+```c
+void(redisAsyncContext *c, void *reply, void *privdata);
+```
The `privdata` argument can be used to curry arbitrary data to the callback from the point where
the command is initially queued for execution.
The functions that can be used to issue commands in an asynchronous context are:
-
- int redisAsyncCommand(
- redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
- const char *format, ...);
- int redisAsyncCommandArgv(
- redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
- int argc, const char **argv, const size_t *argvlen);
-
+```c
+int redisAsyncCommand(
+ redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
+ const char *format, ...);
+int redisAsyncCommandArgv(
+ redisAsyncContext *ac, redisCallbackFn *fn, void *privdata,
+ int argc, const char **argv, const size_t *argvlen);
+```
Both functions work like their blocking counterparts. The return value is `REDIS_OK` when the command
was successfully added to the output buffer and `REDIS_ERR` otherwise. Example: when the connection
is being disconnected per user-request, no new commands may be added to the output buffer and `REDIS_ERR` is
returned on calls to the `redisAsyncCommand` family.
-If the reply for a command with a `NULL` callback is read, it is immediately free'd. When the callback
-for a command is non-`NULL`, the memory is free'd immediately following the callback: the reply is only
+If the reply for a command with a `NULL` callback is read, it is immediately freed. When the callback
+for a command is non-`NULL`, the memory is freed immediately following the callback: the reply is only
valid for the duration of the callback.
All pending callbacks are called with a `NULL` reply when the context encountered an error.
@@ -296,14 +321,14 @@ All pending callbacks are called with a `NULL` reply when the context encountere
### Disconnecting
An asynchronous connection can be terminated using:
-
- void redisAsyncDisconnect(redisAsyncContext *ac);
-
+```c
+void redisAsyncDisconnect(redisAsyncContext *ac);
+```
When this function is called, the connection is **not** immediately terminated. Instead, new
commands are no longer accepted and the connection is only terminated when all pending commands
have been written to the socket, their respective replies have been read and their respective
callbacks have been executed. After this, the disconnection callback is executed with the
-`REDIS_OK` status and the context object is free'd.
+`REDIS_OK` status and the context object is freed.
### Hooking it up to event library *X*
@@ -316,12 +341,12 @@ Hiredis comes with a reply parsing API that makes it easy for writing higher
level language bindings.
The reply parsing API consists of the following functions:
-
- redisReader *redisReaderCreate(void);
- void redisReaderFree(redisReader *reader);
- int redisReaderFeed(redisReader *reader, const char *buf, size_t len);
- int redisReaderGetReply(redisReader *reader, void **reply);
-
+```c
+redisReader *redisReaderCreate(void);
+void redisReaderFree(redisReader *reader);
+int redisReaderFeed(redisReader *reader, const char *buf, size_t len);
+int redisReaderGetReply(redisReader *reader, void **reply);
+```
The same set of functions are used internally by hiredis when creating a
normal Redis context, the above API just exposes it to the user for a direct
usage.
@@ -361,7 +386,7 @@ Both when using the Reader API directly or when using it indirectly via a
normal Redis context, the redisReader structure uses a buffer in order to
accumulate data from the server.
Usually this buffer is destroyed when it is empty and is larger than 16
-kb in order to avoid wasting memory in unused buffers
+KiB in order to avoid wasting memory in unused buffers
However when working with very big payloads destroying the buffer may slow
down performances considerably, so it is possible to modify the max size of
@@ -371,9 +396,9 @@ value for an idle buffer, so the buffer will never get freed.
For instance if you have a normal Redis context you can set the maximum idle
buffer to zero (unlimited) just with:
-
- context->reader->maxbuf = 0;
-
+```c
+context->reader->maxbuf = 0;
+```
This should be done only in order to maximize performances when working with
large payloads. The context should be set back to `REDIS_READER_MAX_BUF` again
as soon as possible in order to prevent allocation of useless memory.
@@ -381,4 +406,6 @@ as soon as possible in order to prevent allocation of useless memory.
## AUTHORS
Hiredis was written by Salvatore Sanfilippo (antirez at gmail) and
-Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license.
+Pieter Noordhuis (pcnoordhuis at gmail) and is released under the BSD license.
+Hiredis is currently maintained by Matt Stancliff (matt at genges dot com) and
+Jan-Erik Rediger (janerik at fnordig dot com)
diff --git a/deps/hiredis/adapters/glib.h b/deps/hiredis/adapters/glib.h
new file mode 100644
index 000000000..e0a6411d3
--- /dev/null
+++ b/deps/hiredis/adapters/glib.h
@@ -0,0 +1,153 @@
+#ifndef __HIREDIS_GLIB_H__
+#define __HIREDIS_GLIB_H__
+
+#include <glib.h>
+
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct
+{
+ GSource source;
+ redisAsyncContext *ac;
+ GPollFD poll_fd;
+} RedisSource;
+
+static void
+redis_source_add_read (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events |= G_IO_IN;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_del_read (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events &= ~G_IO_IN;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_add_write (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events |= G_IO_OUT;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_del_write (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+ g_return_if_fail(source);
+ source->poll_fd.events &= ~G_IO_OUT;
+ g_main_context_wakeup(g_source_get_context((GSource *)data));
+}
+
+static void
+redis_source_cleanup (gpointer data)
+{
+ RedisSource *source = (RedisSource *)data;
+
+ g_return_if_fail(source);
+
+ redis_source_del_read(source);
+ redis_source_del_write(source);
+ /*
+ * It is not our responsibility to remove ourself from the
+ * current main loop. However, we will remove the GPollFD.
+ */
+ if (source->poll_fd.fd >= 0) {
+ g_source_remove_poll((GSource *)data, &source->poll_fd);
+ source->poll_fd.fd = -1;
+ }
+}
+
+static gboolean
+redis_source_prepare (GSource *source,
+ gint *timeout_)
+{
+ RedisSource *redis = (RedisSource *)source;
+ *timeout_ = -1;
+ return !!(redis->poll_fd.events & redis->poll_fd.revents);
+}
+
+static gboolean
+redis_source_check (GSource *source)
+{
+ RedisSource *redis = (RedisSource *)source;
+ return !!(redis->poll_fd.events & redis->poll_fd.revents);
+}
+
+static gboolean
+redis_source_dispatch (GSource *source,
+ GSourceFunc callback,
+ gpointer user_data)
+{
+ RedisSource *redis = (RedisSource *)source;
+
+ if ((redis->poll_fd.revents & G_IO_OUT)) {
+ redisAsyncHandleWrite(redis->ac);
+ redis->poll_fd.revents &= ~G_IO_OUT;
+ }
+
+ if ((redis->poll_fd.revents & G_IO_IN)) {
+ redisAsyncHandleRead(redis->ac);
+ redis->poll_fd.revents &= ~G_IO_IN;
+ }
+
+ if (callback) {
+ return callback(user_data);
+ }
+
+ return TRUE;
+}
+
+static void
+redis_source_finalize (GSource *source)
+{
+ RedisSource *redis = (RedisSource *)source;
+
+ if (redis->poll_fd.fd >= 0) {
+ g_source_remove_poll(source, &redis->poll_fd);
+ redis->poll_fd.fd = -1;
+ }
+}
+
+static GSource *
+redis_source_new (redisAsyncContext *ac)
+{
+ static GSourceFuncs source_funcs = {
+ .prepare = redis_source_prepare,
+ .check = redis_source_check,
+ .dispatch = redis_source_dispatch,
+ .finalize = redis_source_finalize,
+ };
+ redisContext *c = &ac->c;
+ RedisSource *source;
+
+ g_return_val_if_fail(ac != NULL, NULL);
+
+ source = (RedisSource *)g_source_new(&source_funcs, sizeof *source);
+ source->ac = ac;
+ source->poll_fd.fd = c->fd;
+ source->poll_fd.events = 0;
+ source->poll_fd.revents = 0;
+ g_source_add_poll((GSource *)source, &source->poll_fd);
+
+ ac->ev.addRead = redis_source_add_read;
+ ac->ev.delRead = redis_source_del_read;
+ ac->ev.addWrite = redis_source_add_write;
+ ac->ev.delWrite = redis_source_del_write;
+ ac->ev.cleanup = redis_source_cleanup;
+ ac->ev.data = source;
+
+ return (GSource *)source;
+}
+
+#endif /* __HIREDIS_GLIB_H__ */
diff --git a/deps/hiredis/adapters/ivykis.h b/deps/hiredis/adapters/ivykis.h
new file mode 100644
index 000000000..6a12a868a
--- /dev/null
+++ b/deps/hiredis/adapters/ivykis.h
@@ -0,0 +1,81 @@
+#ifndef __HIREDIS_IVYKIS_H__
+#define __HIREDIS_IVYKIS_H__
+#include <iv.h>
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct redisIvykisEvents {
+ redisAsyncContext *context;
+ struct iv_fd fd;
+} redisIvykisEvents;
+
+static void redisIvykisReadEvent(void *arg) {
+ redisAsyncContext *context = (redisAsyncContext *)arg;
+ redisAsyncHandleRead(context);
+}
+
+static void redisIvykisWriteEvent(void *arg) {
+ redisAsyncContext *context = (redisAsyncContext *)arg;
+ redisAsyncHandleWrite(context);
+}
+
+static void redisIvykisAddRead(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_in(&e->fd, redisIvykisReadEvent);
+}
+
+static void redisIvykisDelRead(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_in(&e->fd, NULL);
+}
+
+static void redisIvykisAddWrite(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_out(&e->fd, redisIvykisWriteEvent);
+}
+
+static void redisIvykisDelWrite(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+ iv_fd_set_handler_out(&e->fd, NULL);
+}
+
+static void redisIvykisCleanup(void *privdata) {
+ redisIvykisEvents *e = (redisIvykisEvents*)privdata;
+
+ iv_fd_unregister(&e->fd);
+ free(e);
+}
+
+static int redisIvykisAttach(redisAsyncContext *ac) {
+ redisContext *c = &(ac->c);
+ redisIvykisEvents *e;
+
+ /* Nothing should be attached when something is already attached */
+ if (ac->ev.data != NULL)
+ return REDIS_ERR;
+
+ /* Create container for context and r/w events */
+ e = (redisIvykisEvents*)malloc(sizeof(*e));
+ e->context = ac;
+
+ /* Register functions to start/stop listening for events */
+ ac->ev.addRead = redisIvykisAddRead;
+ ac->ev.delRead = redisIvykisDelRead;
+ ac->ev.addWrite = redisIvykisAddWrite;
+ ac->ev.delWrite = redisIvykisDelWrite;
+ ac->ev.cleanup = redisIvykisCleanup;
+ ac->ev.data = e;
+
+ /* Initialize and install read/write events */
+ IV_FD_INIT(&e->fd);
+ e->fd.fd = c->fd;
+ e->fd.handler_in = redisIvykisReadEvent;
+ e->fd.handler_out = redisIvykisWriteEvent;
+ e->fd.handler_err = NULL;
+ e->fd.cookie = e->context;
+
+ iv_fd_register(&e->fd);
+
+ return REDIS_OK;
+}
+#endif
diff --git a/deps/hiredis/adapters/libevent.h b/deps/hiredis/adapters/libevent.h
index 1c2b271bb..273d8b2dd 100644
--- a/deps/hiredis/adapters/libevent.h
+++ b/deps/hiredis/adapters/libevent.h
@@ -30,13 +30,13 @@
#ifndef __HIREDIS_LIBEVENT_H__
#define __HIREDIS_LIBEVENT_H__
-#include <event.h>
+#include <event2/event.h>
#include "../hiredis.h"
#include "../async.h"
typedef struct redisLibeventEvents {
redisAsyncContext *context;
- struct event rev, wev;
+ struct event *rev, *wev;
} redisLibeventEvents;
static void redisLibeventReadEvent(int fd, short event, void *arg) {
@@ -53,28 +53,28 @@ static void redisLibeventWriteEvent(int fd, short event, void *arg) {
static void redisLibeventAddRead(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_add(&e->rev,NULL);
+ event_add(e->rev,NULL);
}
static void redisLibeventDelRead(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->rev);
+ event_del(e->rev);
}
static void redisLibeventAddWrite(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_add(&e->wev,NULL);
+ event_add(e->wev,NULL);
}
static void redisLibeventDelWrite(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->wev);
+ event_del(e->wev);
}
static void redisLibeventCleanup(void *privdata) {
redisLibeventEvents *e = (redisLibeventEvents*)privdata;
- event_del(&e->rev);
- event_del(&e->wev);
+ event_del(e->rev);
+ event_del(e->wev);
free(e);
}
@@ -99,10 +99,10 @@ static int redisLibeventAttach(redisAsyncContext *ac, struct event_base *base) {
ac->ev.data = e;
/* Initialize and install read/write events */
- event_set(&e->rev,c->fd,EV_READ,redisLibeventReadEvent,e);
- event_set(&e->wev,c->fd,EV_WRITE,redisLibeventWriteEvent,e);
- event_base_set(base,&e->rev);
- event_base_set(base,&e->wev);
+ e->rev = event_new(base, c->fd, EV_READ, redisLibeventReadEvent, e);
+ e->wev = event_new(base, c->fd, EV_WRITE, redisLibeventWriteEvent, e);
+ event_add(e->rev, NULL);
+ event_add(e->wev, NULL);
return REDIS_OK;
}
#endif
diff --git a/deps/hiredis/adapters/libuv.h b/deps/hiredis/adapters/libuv.h
index a1967f4fd..ff08c25e1 100644
--- a/deps/hiredis/adapters/libuv.h
+++ b/deps/hiredis/adapters/libuv.h
@@ -1,5 +1,6 @@
#ifndef __HIREDIS_LIBUV_H__
#define __HIREDIS_LIBUV_H__
+#include <stdlib.h>
#include <uv.h>
#include "../hiredis.h"
#include "../async.h"
@@ -11,7 +12,6 @@ typedef struct redisLibuvEvents {
int events;
} redisLibuvEvents;
-int redisLibuvAttach(redisAsyncContext*, uv_loop_t*);
static void redisLibuvPoll(uv_poll_t* handle, int status, int events) {
redisLibuvEvents* p = (redisLibuvEvents*)handle->data;
@@ -20,10 +20,10 @@ static void redisLibuvPoll(uv_poll_t* handle, int status, int events) {
return;
}
- if (events & UV_READABLE) {
+ if (p->context != NULL && (events & UV_READABLE)) {
redisAsyncHandleRead(p->context);
}
- if (events & UV_WRITABLE) {
+ if (p->context != NULL && (events & UV_WRITABLE)) {
redisAsyncHandleWrite(p->context);
}
}
@@ -83,6 +83,7 @@ static void on_close(uv_handle_t* handle) {
static void redisLibuvCleanup(void *privdata) {
redisLibuvEvents* p = (redisLibuvEvents*)privdata;
+ p->context = NULL; // indicate that context might no longer exist
uv_close((uv_handle_t*)&p->handle, on_close);
}
diff --git a/deps/hiredis/adapters/macosx.h b/deps/hiredis/adapters/macosx.h
new file mode 100644
index 000000000..72121f606
--- /dev/null
+++ b/deps/hiredis/adapters/macosx.h
@@ -0,0 +1,114 @@
+//
+// Created by Дмитрий Бахвалов on 13.07.15.
+// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved.
+//
+
+#ifndef __HIREDIS_MACOSX_H__
+#define __HIREDIS_MACOSX_H__
+
+#include <CoreFoundation/CoreFoundation.h>
+
+#include "../hiredis.h"
+#include "../async.h"
+
+typedef struct {
+ redisAsyncContext *context;
+ CFSocketRef socketRef;
+ CFRunLoopSourceRef sourceRef;
+} RedisRunLoop;
+
+static int freeRedisRunLoop(RedisRunLoop* redisRunLoop) {
+ if( redisRunLoop != NULL ) {
+ if( redisRunLoop->sourceRef != NULL ) {
+ CFRunLoopSourceInvalidate(redisRunLoop->sourceRef);
+ CFRelease(redisRunLoop->sourceRef);
+ }
+ if( redisRunLoop->socketRef != NULL ) {
+ CFSocketInvalidate(redisRunLoop->socketRef);
+ CFRelease(redisRunLoop->socketRef);
+ }
+ free(redisRunLoop);
+ }
+ return REDIS_ERR;
+}
+
+static void redisMacOSAddRead(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack);
+}
+
+static void redisMacOSDelRead(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketReadCallBack);
+}
+
+static void redisMacOSAddWrite(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketEnableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack);
+}
+
+static void redisMacOSDelWrite(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ CFSocketDisableCallBacks(redisRunLoop->socketRef, kCFSocketWriteCallBack);
+}
+
+static void redisMacOSCleanup(void *privdata) {
+ RedisRunLoop *redisRunLoop = (RedisRunLoop*)privdata;
+ freeRedisRunLoop(redisRunLoop);
+}
+
+static void redisMacOSAsyncCallback(CFSocketRef __unused s, CFSocketCallBackType callbackType, CFDataRef __unused address, const void __unused *data, void *info) {
+ redisAsyncContext* context = (redisAsyncContext*) info;
+
+ switch (callbackType) {
+ case kCFSocketReadCallBack:
+ redisAsyncHandleRead(context);
+ break;
+
+ case kCFSocketWriteCallBack:
+ redisAsyncHandleWrite(context);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int redisMacOSAttach(redisAsyncContext *redisAsyncCtx, CFRunLoopRef runLoop) {
+ redisContext *redisCtx = &(redisAsyncCtx->c);
+
+ /* Nothing should be attached when something is already attached */
+ if( redisAsyncCtx->ev.data != NULL ) return REDIS_ERR;
+
+ RedisRunLoop* redisRunLoop = (RedisRunLoop*) calloc(1, sizeof(RedisRunLoop));
+ if( !redisRunLoop ) return REDIS_ERR;
+
+ /* Setup redis stuff */
+ redisRunLoop->context = redisAsyncCtx;
+
+ redisAsyncCtx->ev.addRead = redisMacOSAddRead;
+ redisAsyncCtx->ev.delRead = redisMacOSDelRead;
+ redisAsyncCtx->ev.addWrite = redisMacOSAddWrite;
+ redisAsyncCtx->ev.delWrite = redisMacOSDelWrite;
+ redisAsyncCtx->ev.cleanup = redisMacOSCleanup;
+ redisAsyncCtx->ev.data = redisRunLoop;
+
+ /* Initialize and install read/write events */
+ CFSocketContext socketCtx = { 0, redisAsyncCtx, NULL, NULL, NULL };
+
+ redisRunLoop->socketRef = CFSocketCreateWithNative(NULL, redisCtx->fd,
+ kCFSocketReadCallBack | kCFSocketWriteCallBack,
+ redisMacOSAsyncCallback,
+ &socketCtx);
+ if( !redisRunLoop->socketRef ) return freeRedisRunLoop(redisRunLoop);
+
+ redisRunLoop->sourceRef = CFSocketCreateRunLoopSource(NULL, redisRunLoop->socketRef, 0);
+ if( !redisRunLoop->sourceRef ) return freeRedisRunLoop(redisRunLoop);
+
+ CFRunLoopAddSource(runLoop, redisRunLoop->sourceRef, kCFRunLoopDefaultMode);
+
+ return REDIS_OK;
+}
+
+#endif
+
diff --git a/deps/hiredis/adapters/qt.h b/deps/hiredis/adapters/qt.h
new file mode 100644
index 000000000..5cc02e6ce
--- /dev/null
+++ b/deps/hiredis/adapters/qt.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (C) 2014 Pietro Cerutti <gahr@gahr.ch>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __HIREDIS_QT_H__
+#define __HIREDIS_QT_H__
+#include <QSocketNotifier>
+#include "../async.h"
+
+static void RedisQtAddRead(void *);
+static void RedisQtDelRead(void *);
+static void RedisQtAddWrite(void *);
+static void RedisQtDelWrite(void *);
+static void RedisQtCleanup(void *);
+
+class RedisQtAdapter : public QObject {
+
+ Q_OBJECT
+
+ friend
+ void RedisQtAddRead(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->addRead();
+ }
+
+ friend
+ void RedisQtDelRead(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->delRead();
+ }
+
+ friend
+ void RedisQtAddWrite(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->addWrite();
+ }
+
+ friend
+ void RedisQtDelWrite(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->delWrite();
+ }
+
+ friend
+ void RedisQtCleanup(void * adapter) {
+ RedisQtAdapter * a = static_cast<RedisQtAdapter *>(adapter);
+ a->cleanup();
+ }
+
+ public:
+ RedisQtAdapter(QObject * parent = 0)
+ : QObject(parent), m_ctx(0), m_read(0), m_write(0) { }
+
+ ~RedisQtAdapter() {
+ if (m_ctx != 0) {
+ m_ctx->ev.data = NULL;
+ }
+ }
+
+ int setContext(redisAsyncContext * ac) {
+ if (ac->ev.data != NULL) {
+ return REDIS_ERR;
+ }
+ m_ctx = ac;
+ m_ctx->ev.data = this;
+ m_ctx->ev.addRead = RedisQtAddRead;
+ m_ctx->ev.delRead = RedisQtDelRead;
+ m_ctx->ev.addWrite = RedisQtAddWrite;
+ m_ctx->ev.delWrite = RedisQtDelWrite;
+ m_ctx->ev.cleanup = RedisQtCleanup;
+ return REDIS_OK;
+ }
+
+ private:
+ void addRead() {
+ if (m_read) return;
+ m_read = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Read, 0);
+ connect(m_read, SIGNAL(activated(int)), this, SLOT(read()));
+ }
+
+ void delRead() {
+ if (!m_read) return;
+ delete m_read;
+ m_read = 0;
+ }
+
+ void addWrite() {
+ if (m_write) return;
+ m_write = new QSocketNotifier(m_ctx->c.fd, QSocketNotifier::Write, 0);
+ connect(m_write, SIGNAL(activated(int)), this, SLOT(write()));
+ }
+
+ void delWrite() {
+ if (!m_write) return;
+ delete m_write;
+ m_write = 0;
+ }
+
+ void cleanup() {
+ delRead();
+ delWrite();
+ }
+
+ private slots:
+ void read() { redisAsyncHandleRead(m_ctx); }
+ void write() { redisAsyncHandleWrite(m_ctx); }
+
+ private:
+ redisAsyncContext * m_ctx;
+ QSocketNotifier * m_read;
+ QSocketNotifier * m_write;
+};
+
+#endif /* !__HIREDIS_QT_H__ */
diff --git a/deps/hiredis/appveyor.yml b/deps/hiredis/appveyor.yml
new file mode 100644
index 000000000..06bbef117
--- /dev/null
+++ b/deps/hiredis/appveyor.yml
@@ -0,0 +1,36 @@
+# Appveyor configuration file for CI build of hiredis on Windows (under Cygwin)
+environment:
+ matrix:
+ - CYG_ROOT: C:\cygwin64
+ CYG_SETUP: setup-x86_64.exe
+ CYG_MIRROR: http://cygwin.mirror.constant.com
+ CYG_CACHE: C:\cygwin64\var\cache\setup
+ CYG_BASH: C:\cygwin64\bin\bash
+ CC: gcc
+ - CYG_ROOT: C:\cygwin
+ CYG_SETUP: setup-x86.exe
+ CYG_MIRROR: http://cygwin.mirror.constant.com
+ CYG_CACHE: C:\cygwin\var\cache\setup
+ CYG_BASH: C:\cygwin\bin\bash
+ CC: gcc
+ TARGET: 32bit
+ TARGET_VARS: 32bit-vars
+
+# Cache Cygwin files to speed up build
+cache:
+ - '%CYG_CACHE%'
+clone_depth: 1
+
+# Attempt to ensure we don't try to convert line endings to Win32 CRLF as this will cause build to fail
+init:
+ - git config --global core.autocrlf input
+
+# Install needed build dependencies
+install:
+ - ps: 'Start-FileDownload "http://cygwin.com/$env:CYG_SETUP" -FileName "$env:CYG_SETUP"'
+ - '%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages automake,bison,gcc-core,libtool,make,gettext-devel,gettext,intltool,pkg-config,clang,llvm > NUL 2>&1'
+ - '%CYG_BASH% -lc "cygcheck -dc cygwin"'
+
+build_script:
+ - 'echo building...'
+ - '%CYG_BASH% -lc "cd $APPVEYOR_BUILD_FOLDER; exec 0</dev/null; make LDFLAGS=$LDFLAGS CC=$CC $TARGET CFLAGS=$CFLAGS && make LDFLAGS=$LDFLAGS CC=$CC $TARGET_VARS hiredis-example"'
diff --git a/deps/hiredis/async.c b/deps/hiredis/async.c
index f7f343bef..d955203f8 100644
--- a/deps/hiredis/async.c
+++ b/deps/hiredis/async.c
@@ -58,7 +58,7 @@
} while(0);
/* Forward declaration of function in hiredis.c */
-void __redisAppendCommand(redisContext *c, char *cmd, size_t len);
+int __redisAppendCommand(redisContext *c, const char *cmd, size_t len);
/* Functions managing dictionary of callbacks for pub/sub. */
static unsigned int callbackHash(const void *key) {
@@ -142,6 +142,9 @@ static redisAsyncContext *redisAsyncInitialize(redisContext *c) {
/* We want the error field to be accessible directly instead of requiring
* an indirection to the redisContext struct. */
static void __redisAsyncCopyError(redisAsyncContext *ac) {
+ if (!ac)
+ return;
+
redisContext *c = &(ac->c);
ac->err = c->err;
ac->errstr = c->errstr;
@@ -173,6 +176,14 @@ redisAsyncContext *redisAsyncConnectBind(const char *ip, int port,
return ac;
}
+redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port,
+ const char *source_addr) {
+ redisContext *c = redisConnectBindNonBlockWithReuse(ip,port,source_addr);
+ redisAsyncContext *ac = redisAsyncInitialize(c);
+ __redisAsyncCopyError(ac);
+ return ac;
+}
+
redisAsyncContext *redisAsyncConnectUnix(const char *path) {
redisContext *c;
redisAsyncContext *ac;
@@ -407,7 +418,8 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
if (reply == NULL) {
/* When the connection is being disconnected and there are
* no more replies, this is the cue to really disconnect. */
- if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0) {
+ if (c->flags & REDIS_DISCONNECTING && sdslen(c->obuf) == 0
+ && ac->replies.head == NULL) {
__redisAsyncDisconnect(ac);
return;
}
@@ -443,6 +455,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
if (((redisReply*)reply)->type == REDIS_REPLY_ERROR) {
c->err = REDIS_ERR_OTHER;
snprintf(c->errstr,sizeof(c->errstr),"%s",((redisReply*)reply)->str);
+ c->reader->fn->freeObject(reply);
__redisAsyncDisconnect(ac);
return;
}
@@ -476,7 +489,7 @@ void redisProcessCallbacks(redisAsyncContext *ac) {
}
/* Internal helper function to detect socket status the first time a read or
- * write event fires. When connecting was not succesful, the connect callback
+ * write event fires. When connecting was not successful, the connect callback
* is called with a REDIS_ERR status and the context is free'd. */
static int __redisAsyncHandleConnect(redisAsyncContext *ac) {
redisContext *c = &(ac->c);
@@ -550,8 +563,8 @@ void redisAsyncHandleWrite(redisAsyncContext *ac) {
/* Sets a pointer to the first argument and its length starting at p. Returns
* the number of bytes to skip to get to the following argument. */
-static char *nextArgument(char *start, char **str, size_t *len) {
- char *p = start;
+static const char *nextArgument(const char *start, const char **str, size_t *len) {
+ const char *p = start;
if (p[0] != '$') {
p = strchr(p,'$');
if (p == NULL) return NULL;
@@ -567,14 +580,15 @@ static char *nextArgument(char *start, char **str, size_t *len) {
/* Helper function for the redisAsyncCommand* family of functions. Writes a
* formatted command to the output buffer and registers the provided callback
* function with the context. */
-static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, char *cmd, size_t len) {
+static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) {
redisContext *c = &(ac->c);
redisCallback cb;
int pvariant, hasnext;
- char *cstr, *astr;
+ const char *cstr, *astr;
size_t clen, alen;
- char *p;
+ const char *p;
sds sname;
+ int ret;
/* Don't accept new commands when the connection is about to be closed. */
if (c->flags & (REDIS_DISCONNECTING | REDIS_FREEING)) return REDIS_ERR;
@@ -598,9 +612,11 @@ static int __redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void
while ((p = nextArgument(p,&astr,&alen)) != NULL) {
sname = sdsnewlen(astr,alen);
if (pvariant)
- dictReplace(ac->sub.patterns,sname,&cb);
+ ret = dictReplace(ac->sub.patterns,sname,&cb);
else
- dictReplace(ac->sub.channels,sname,&cb);
+ ret = dictReplace(ac->sub.channels,sname,&cb);
+
+ if (ret == 0) sdsfree(sname);
}
} else if (strncasecmp(cstr,"unsubscribe\r\n",13) == 0) {
/* It is only useful to call (P)UNSUBSCRIBE when the context is
@@ -636,6 +652,11 @@ int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdat
int len;
int status;
len = redisvFormatCommand(&cmd,format,ap);
+
+ /* We don't want to pass -1 or -2 to future functions as a length. */
+ if (len < 0)
+ return REDIS_ERR;
+
status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
free(cmd);
return status;
@@ -651,11 +672,16 @@ int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata
}
int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen) {
- char *cmd;
+ sds cmd;
int len;
int status;
- len = redisFormatCommandArgv(&cmd,argc,argv,argvlen);
+ len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen);
status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
- free(cmd);
+ sdsfree(cmd);
+ return status;
+}
+
+int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len) {
+ int status = __redisAsyncCommand(ac,fn,privdata,cmd,len);
return status;
}
diff --git a/deps/hiredis/async.h b/deps/hiredis/async.h
index 8a2cf1ecd..59cbf469b 100644
--- a/deps/hiredis/async.h
+++ b/deps/hiredis/async.h
@@ -103,6 +103,8 @@ typedef struct redisAsyncContext {
/* Functions that proxy to hiredis */
redisAsyncContext *redisAsyncConnect(const char *ip, int port);
redisAsyncContext *redisAsyncConnectBind(const char *ip, int port, const char *source_addr);
+redisAsyncContext *redisAsyncConnectBindWithReuse(const char *ip, int port,
+ const char *source_addr);
redisAsyncContext *redisAsyncConnectUnix(const char *path);
int redisAsyncSetConnectCallback(redisAsyncContext *ac, redisConnectCallback *fn);
int redisAsyncSetDisconnectCallback(redisAsyncContext *ac, redisDisconnectCallback *fn);
@@ -118,6 +120,7 @@ void redisAsyncHandleWrite(redisAsyncContext *ac);
int redisvAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, va_list ap);
int redisAsyncCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *format, ...);
int redisAsyncCommandArgv(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, int argc, const char **argv, const size_t *argvlen);
+int redisAsyncFormattedCommand(redisAsyncContext *ac, redisCallbackFn *fn, void *privdata, const char *cmd, size_t len);
#ifdef __cplusplus
}
diff --git a/deps/hiredis/dict.c b/deps/hiredis/dict.c
index 79b1041ca..e17a62546 100644
--- a/deps/hiredis/dict.c
+++ b/deps/hiredis/dict.c
@@ -161,7 +161,7 @@ static int dictReplace(dict *ht, void *key, void *val) {
dictEntry *entry, auxentry;
/* Try to add the element. If the key
- * does not exists dictAdd will suceed. */
+ * does not exists dictAdd will succeed. */
if (dictAdd(ht, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
@@ -293,7 +293,7 @@ static void dictReleaseIterator(dictIterator *iter) {
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *ht) {
- /* If the hash table is empty expand it to the intial size,
+ /* If the hash table is empty expand it to the initial size,
* if the table is "full" dobule its size. */
if (ht->size == 0)
return dictExpand(ht, DICT_HT_INITIAL_SIZE);
diff --git a/deps/hiredis/examples/example-glib.c b/deps/hiredis/examples/example-glib.c
new file mode 100644
index 000000000..d6e10f8e8
--- /dev/null
+++ b/deps/hiredis/examples/example-glib.c
@@ -0,0 +1,73 @@
+#include <stdlib.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/glib.h>
+
+static GMainLoop *mainloop;
+
+static void
+connect_cb (const redisAsyncContext *ac G_GNUC_UNUSED,
+ int status)
+{
+ if (status != REDIS_OK) {
+ g_printerr("Failed to connect: %s\n", ac->errstr);
+ g_main_loop_quit(mainloop);
+ } else {
+ g_printerr("Connected...\n");
+ }
+}
+
+static void
+disconnect_cb (const redisAsyncContext *ac G_GNUC_UNUSED,
+ int status)
+{
+ if (status != REDIS_OK) {
+ g_error("Failed to disconnect: %s", ac->errstr);
+ } else {
+ g_printerr("Disconnected...\n");
+ g_main_loop_quit(mainloop);
+ }
+}
+
+static void
+command_cb(redisAsyncContext *ac,
+ gpointer r,
+ gpointer user_data G_GNUC_UNUSED)
+{
+ redisReply *reply = r;
+
+ if (reply) {
+ g_print("REPLY: %s\n", reply->str);
+ }
+
+ redisAsyncDisconnect(ac);
+}
+
+gint
+main (gint argc G_GNUC_UNUSED,
+ gchar *argv[] G_GNUC_UNUSED)
+{
+ redisAsyncContext *ac;
+ GMainContext *context = NULL;
+ GSource *source;
+
+ ac = redisAsyncConnect("127.0.0.1", 6379);
+ if (ac->err) {
+ g_printerr("%s\n", ac->errstr);
+ exit(EXIT_FAILURE);
+ }
+
+ source = redis_source_new(ac);
+ mainloop = g_main_loop_new(context, FALSE);
+ g_source_attach(source, context);
+
+ redisAsyncSetConnectCallback(ac, connect_cb);
+ redisAsyncSetDisconnectCallback(ac, disconnect_cb);
+ redisAsyncCommand(ac, command_cb, NULL, "SET key 1234");
+ redisAsyncCommand(ac, command_cb, NULL, "GET key");
+
+ g_main_loop_run(mainloop);
+
+ return EXIT_SUCCESS;
+}
diff --git a/deps/hiredis/examples/example-ivykis.c b/deps/hiredis/examples/example-ivykis.c
new file mode 100644
index 000000000..67affcef3
--- /dev/null
+++ b/deps/hiredis/examples/example-ivykis.c
@@ -0,0 +1,58 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/ivykis.h>
+
+void getCallback(redisAsyncContext *c, void *r, void *privdata) {
+ redisReply *reply = r;
+ if (reply == NULL) return;
+ printf("argv[%s]: %s\n", (char*)privdata, reply->str);
+
+ /* Disconnect after receiving the reply to GET */
+ redisAsyncDisconnect(c);
+}
+
+void connectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Connected...\n");
+}
+
+void disconnectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Disconnected...\n");
+}
+
+int main (int argc, char **argv) {
+ signal(SIGPIPE, SIG_IGN);
+
+ iv_init();
+
+ redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+ if (c->err) {
+ /* Let *c leak for now... */
+ printf("Error: %s\n", c->errstr);
+ return 1;
+ }
+
+ redisIvykisAttach(c);
+ redisAsyncSetConnectCallback(c,connectCallback);
+ redisAsyncSetDisconnectCallback(c,disconnectCallback);
+ redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1]));
+ redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key");
+
+ iv_main();
+
+ iv_deinit();
+
+ return 0;
+}
diff --git a/deps/hiredis/examples/example-macosx.c b/deps/hiredis/examples/example-macosx.c
new file mode 100644
index 000000000..bc84ed5ba
--- /dev/null
+++ b/deps/hiredis/examples/example-macosx.c
@@ -0,0 +1,66 @@
+//
+// Created by Дмитрий Бахвалов on 13.07.15.
+// Copyright (c) 2015 Dmitry Bakhvalov. All rights reserved.
+//
+
+#include <stdio.h>
+
+#include <hiredis.h>
+#include <async.h>
+#include <adapters/macosx.h>
+
+void getCallback(redisAsyncContext *c, void *r, void *privdata) {
+ redisReply *reply = r;
+ if (reply == NULL) return;
+ printf("argv[%s]: %s\n", (char*)privdata, reply->str);
+
+ /* Disconnect after receiving the reply to GET */
+ redisAsyncDisconnect(c);
+}
+
+void connectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ printf("Connected...\n");
+}
+
+void disconnectCallback(const redisAsyncContext *c, int status) {
+ if (status != REDIS_OK) {
+ printf("Error: %s\n", c->errstr);
+ return;
+ }
+ CFRunLoopStop(CFRunLoopGetCurrent());
+ printf("Disconnected...\n");
+}
+
+int main (int argc, char **argv) {
+ signal(SIGPIPE, SIG_IGN);
+
+ CFRunLoopRef loop = CFRunLoopGetCurrent();
+ if( !loop ) {
+ printf("Error: Cannot get current run loop\n");
+ return 1;
+ }
+
+ redisAsyncContext *c = redisAsyncConnect("127.0.0.1", 6379);
+ if (c->err) {
+ /* Let *c leak for now... */
+ printf("Error: %s\n", c->errstr);
+ return 1;
+ }
+
+ redisMacOSAttach(c, loop);
+
+ redisAsyncSetConnectCallback(c,connectCallback);
+ redisAsyncSetDisconnectCallback(c,disconnectCallback);
+
+ redisAsyncCommand(c, NULL, NULL, "SET key %b", argv[argc-1], strlen(argv[argc-1]));
+ redisAsyncCommand(c, getCallback, (char*)"end-1", "GET key");
+
+ CFRunLoopRun();
+
+ return 0;
+}
+
diff --git a/deps/hiredis/examples/example-qt.cpp b/deps/hiredis/examples/example-qt.cpp
new file mode 100644
index 000000000..f524c3f3d
--- /dev/null
+++ b/deps/hiredis/examples/example-qt.cpp
@@ -0,0 +1,46 @@
+#include <iostream>
+using namespace std;
+
+#include <QCoreApplication>
+#include <QTimer>
+
+#include "example-qt.h"
+
+void getCallback(redisAsyncContext *, void * r, void * privdata) {
+
+ redisReply * reply = static_cast<redisReply *>(r);
+ ExampleQt * ex = static_cast<ExampleQt *>(privdata);
+ if (reply == nullptr || ex == nullptr) return;
+
+ cout << "key: " << reply->str << endl;
+
+ ex->finish();
+}
+
+void ExampleQt::run() {
+
+ m_ctx = redisAsyncConnect("localhost", 6379);
+
+ if (m_ctx->err) {
+ cerr << "Error: " << m_ctx->errstr << endl;
+ redisAsyncFree(m_ctx);
+ emit finished();
+ }
+
+ m_adapter.setContext(m_ctx);
+
+ redisAsyncCommand(m_ctx, NULL, NULL, "SET key %s", m_value);
+ redisAsyncCommand(m_ctx, getCallback, this, "GET key");
+}
+
+int main (int argc, char **argv) {
+
+ QCoreApplication app(argc, argv);
+
+ ExampleQt example(argv[argc-1]);
+
+ QObject::connect(&example, SIGNAL(finished()), &app, SLOT(quit()));
+ QTimer::singleShot(0, &example, SLOT(run()));
+
+ return app.exec();
+}
diff --git a/deps/hiredis/examples/example-qt.h b/deps/hiredis/examples/example-qt.h
new file mode 100644
index 000000000..374f47666
--- /dev/null
+++ b/deps/hiredis/examples/example-qt.h
@@ -0,0 +1,32 @@
+#ifndef __HIREDIS_EXAMPLE_QT_H
+#define __HIREDIS_EXAMPLE_QT_H
+
+#include <adapters/qt.h>
+
+class ExampleQt : public QObject {
+
+ Q_OBJECT
+
+ public:
+ ExampleQt(const char * value, QObject * parent = 0)
+ : QObject(parent), m_value(value) {}
+
+ signals:
+ void finished();
+
+ public slots:
+ void run();
+
+ private:
+ void finish() { emit finished(); }
+
+ private:
+ const char * m_value;
+ redisAsyncContext * m_ctx;
+ RedisQtAdapter m_adapter;
+
+ friend
+ void getCallback(redisAsyncContext *, void *, void *);
+};
+
+#endif /* !__HIREDIS_EXAMPLE_QT_H */
diff --git a/deps/hiredis/examples/example.c b/deps/hiredis/examples/example.c
index 25226a807..4d494c55a 100644
--- a/deps/hiredis/examples/example.c
+++ b/deps/hiredis/examples/example.c
@@ -57,7 +57,7 @@ int main(int argc, char **argv) {
for (j = 0; j < 10; j++) {
char buf[64];
- snprintf(buf,64,"%d",j);
+ snprintf(buf,64,"%u",j);
reply = redisCommand(c,"LPUSH mylist element-%s", buf);
freeReplyObject(reply);
}
diff --git a/deps/hiredis/fmacros.h b/deps/hiredis/fmacros.h
index 6a41aa176..9a56643df 100644
--- a/deps/hiredis/fmacros.h
+++ b/deps/hiredis/fmacros.h
@@ -1,23 +1,24 @@
#ifndef __HIREDIS_FMACRO_H
#define __HIREDIS_FMACRO_H
-#if !defined(_BSD_SOURCE)
+#if defined(__linux__)
#define _BSD_SOURCE
+#define _DEFAULT_SOURCE
#endif
-#if defined(_AIX)
-#define _ALL_SOURCE
+#if defined(__CYGWIN__)
+#include <sys/cdefs.h>
#endif
#if defined(__sun__)
#define _POSIX_C_SOURCE 200112L
-#elif defined(__linux__) || defined(__OpenBSD__) || defined(__NetBSD__)
-#define _XOPEN_SOURCE 600
#else
-#define _XOPEN_SOURCE
+#if !(defined(__APPLE__) && defined(__MACH__)) && !(defined(__FreeBSD__))
+#define _XOPEN_SOURCE 600
+#endif
#endif
-#if __APPLE__ && __MACH__
+#if defined(__APPLE__) && defined(__MACH__)
#define _OSX
#endif
diff --git a/deps/hiredis/hiredis.c b/deps/hiredis/hiredis.c
index 2afee5666..18bdfc99c 100644
--- a/deps/hiredis/hiredis.c
+++ b/deps/hiredis/hiredis.c
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -73,6 +75,9 @@ void freeReplyObject(void *reply) {
redisReply *r = reply;
size_t j;
+ if (r == NULL)
+ return;
+
switch(r->type) {
case REDIS_REPLY_INTEGER:
break; /* Nothing to free */
@@ -183,504 +188,23 @@ static void *createNilObject(const redisReadTask *task) {
return r;
}
-static void __redisReaderSetError(redisReader *r, int type, const char *str) {
- size_t len;
-
- if (r->reply != NULL && r->fn && r->fn->freeObject) {
- r->fn->freeObject(r->reply);
- r->reply = NULL;
- }
-
- /* Clear input buffer on errors. */
- if (r->buf != NULL) {
- sdsfree(r->buf);
- r->buf = NULL;
- r->pos = r->len = 0;
- }
-
- /* Reset task stack. */
- r->ridx = -1;
-
- /* Set error. */
- r->err = type;
- len = strlen(str);
- len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1);
- memcpy(r->errstr,str,len);
- r->errstr[len] = '\0';
-}
-
-static size_t chrtos(char *buf, size_t size, char byte) {
- size_t len = 0;
-
- switch(byte) {
- case '\\':
- case '"':
- len = snprintf(buf,size,"\"\\%c\"",byte);
- break;
- case '\n': len = snprintf(buf,size,"\"\\n\""); break;
- case '\r': len = snprintf(buf,size,"\"\\r\""); break;
- case '\t': len = snprintf(buf,size,"\"\\t\""); break;
- case '\a': len = snprintf(buf,size,"\"\\a\""); break;
- case '\b': len = snprintf(buf,size,"\"\\b\""); break;
- default:
- if (isprint(byte))
- len = snprintf(buf,size,"\"%c\"",byte);
- else
- len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte);
- break;
- }
-
- return len;
-}
-
-static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) {
- char cbuf[8], sbuf[128];
-
- chrtos(cbuf,sizeof(cbuf),byte);
- snprintf(sbuf,sizeof(sbuf),
- "Protocol error, got %s as reply type byte", cbuf);
- __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf);
-}
-
-static void __redisReaderSetErrorOOM(redisReader *r) {
- __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory");
-}
-
-static char *readBytes(redisReader *r, unsigned int bytes) {
- char *p;
- if (r->len-r->pos >= bytes) {
- p = r->buf+r->pos;
- r->pos += bytes;
- return p;
- }
- return NULL;
-}
-
-/* Find pointer to \r\n. */
-static char *seekNewline(char *s, size_t len) {
- int pos = 0;
- int _len = len-1;
-
- /* Position should be < len-1 because the character at "pos" should be
- * followed by a \n. Note that strchr cannot be used because it doesn't
- * allow to search a limited length and the buffer that is being searched
- * might not have a trailing NULL character. */
- while (pos < _len) {
- while(pos < _len && s[pos] != '\r') pos++;
- if (s[pos] != '\r') {
- /* Not found. */
- return NULL;
- } else {
- if (s[pos+1] == '\n') {
- /* Found. */
- return s+pos;
- } else {
- /* Continue searching. */
- pos++;
- }
- }
- }
- return NULL;
-}
-
-/* Read a long long value starting at *s, under the assumption that it will be
- * terminated by \r\n. Ambiguously returns -1 for unexpected input. */
-static long long readLongLong(char *s) {
- long long v = 0;
- int dec, mult = 1;
- char c;
-
- if (*s == '-') {
- mult = -1;
- s++;
- } else if (*s == '+') {
- mult = 1;
- s++;
- }
-
- while ((c = *(s++)) != '\r') {
- dec = c - '0';
- if (dec >= 0 && dec < 10) {
- v *= 10;
- v += dec;
- } else {
- /* Should not happen... */
- return -1;
- }
- }
-
- return mult*v;
-}
-
-static char *readLine(redisReader *r, int *_len) {
- char *p, *s;
- int len;
-
- p = r->buf+r->pos;
- s = seekNewline(p,(r->len-r->pos));
- if (s != NULL) {
- len = s-(r->buf+r->pos);
- r->pos += len+2; /* skip \r\n */
- if (_len) *_len = len;
- return p;
- }
- return NULL;
-}
-
-static void moveToNextTask(redisReader *r) {
- redisReadTask *cur, *prv;
- while (r->ridx >= 0) {
- /* Return a.s.a.p. when the stack is now empty. */
- if (r->ridx == 0) {
- r->ridx--;
- return;
- }
-
- cur = &(r->rstack[r->ridx]);
- prv = &(r->rstack[r->ridx-1]);
- assert(prv->type == REDIS_REPLY_ARRAY);
- if (cur->idx == prv->elements-1) {
- r->ridx--;
- } else {
- /* Reset the type because the next item can be anything */
- assert(cur->idx < prv->elements);
- cur->type = -1;
- cur->elements = -1;
- cur->idx++;
- return;
- }
- }
-}
-
-static int processLineItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj;
- char *p;
- int len;
-
- if ((p = readLine(r,&len)) != NULL) {
- if (cur->type == REDIS_REPLY_INTEGER) {
- if (r->fn && r->fn->createInteger)
- obj = r->fn->createInteger(cur,readLongLong(p));
- else
- obj = (void*)REDIS_REPLY_INTEGER;
- } else {
- /* Type will be error or status. */
- if (r->fn && r->fn->createString)
- obj = r->fn->createString(cur,p,len);
- else
- obj = (void*)(size_t)(cur->type);
- }
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- /* Set reply if this is the root object. */
- if (r->ridx == 0) r->reply = obj;
- moveToNextTask(r);
- return REDIS_OK;
- }
-
- return REDIS_ERR;
-}
-
-static int processBulkItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj = NULL;
- char *p, *s;
- long len;
- unsigned long bytelen;
- int success = 0;
-
- p = r->buf+r->pos;
- s = seekNewline(p,r->len-r->pos);
- if (s != NULL) {
- p = r->buf+r->pos;
- bytelen = s-(r->buf+r->pos)+2; /* include \r\n */
- len = readLongLong(p);
-
- if (len < 0) {
- /* The nil object can always be created. */
- if (r->fn && r->fn->createNil)
- obj = r->fn->createNil(cur);
- else
- obj = (void*)REDIS_REPLY_NIL;
- success = 1;
- } else {
- /* Only continue when the buffer contains the entire bulk item. */
- bytelen += len+2; /* include \r\n */
- if (r->pos+bytelen <= r->len) {
- if (r->fn && r->fn->createString)
- obj = r->fn->createString(cur,s+2,len);
- else
- obj = (void*)REDIS_REPLY_STRING;
- success = 1;
- }
- }
-
- /* Proceed when obj was created. */
- if (success) {
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- r->pos += bytelen;
-
- /* Set reply if this is the root object. */
- if (r->ridx == 0) r->reply = obj;
- moveToNextTask(r);
- return REDIS_OK;
- }
- }
-
- return REDIS_ERR;
-}
-
-static int processMultiBulkItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- void *obj;
- char *p;
- long elements;
- int root = 0;
-
- /* Set error for nested multi bulks with depth > 7 */
- if (r->ridx == 8) {
- __redisReaderSetError(r,REDIS_ERR_PROTOCOL,
- "No support for nested multi bulk replies with depth > 7");
- return REDIS_ERR;
- }
-
- if ((p = readLine(r,NULL)) != NULL) {
- elements = readLongLong(p);
- root = (r->ridx == 0);
-
- if (elements == -1) {
- if (r->fn && r->fn->createNil)
- obj = r->fn->createNil(cur);
- else
- obj = (void*)REDIS_REPLY_NIL;
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- moveToNextTask(r);
- } else {
- if (r->fn && r->fn->createArray)
- obj = r->fn->createArray(cur,elements);
- else
- obj = (void*)REDIS_REPLY_ARRAY;
-
- if (obj == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- /* Modify task stack when there are more than 0 elements. */
- if (elements > 0) {
- cur->elements = elements;
- cur->obj = obj;
- r->ridx++;
- r->rstack[r->ridx].type = -1;
- r->rstack[r->ridx].elements = -1;
- r->rstack[r->ridx].idx = 0;
- r->rstack[r->ridx].obj = NULL;
- r->rstack[r->ridx].parent = cur;
- r->rstack[r->ridx].privdata = r->privdata;
- } else {
- moveToNextTask(r);
- }
- }
-
- /* Set reply if this is the root object. */
- if (root) r->reply = obj;
- return REDIS_OK;
- }
-
- return REDIS_ERR;
-}
-
-static int processItem(redisReader *r) {
- redisReadTask *cur = &(r->rstack[r->ridx]);
- char *p;
-
- /* check if we need to read type */
- if (cur->type < 0) {
- if ((p = readBytes(r,1)) != NULL) {
- switch (p[0]) {
- case '-':
- cur->type = REDIS_REPLY_ERROR;
- break;
- case '+':
- cur->type = REDIS_REPLY_STATUS;
- break;
- case ':':
- cur->type = REDIS_REPLY_INTEGER;
- break;
- case '$':
- cur->type = REDIS_REPLY_STRING;
- break;
- case '*':
- cur->type = REDIS_REPLY_ARRAY;
- break;
- default:
- __redisReaderSetErrorProtocolByte(r,*p);
- return REDIS_ERR;
- }
- } else {
- /* could not consume 1 byte */
- return REDIS_ERR;
- }
- }
-
- /* process typed item */
- switch(cur->type) {
- case REDIS_REPLY_ERROR:
- case REDIS_REPLY_STATUS:
- case REDIS_REPLY_INTEGER:
- return processLineItem(r);
- case REDIS_REPLY_STRING:
- return processBulkItem(r);
- case REDIS_REPLY_ARRAY:
- return processMultiBulkItem(r);
- default:
- assert(NULL);
- return REDIS_ERR; /* Avoid warning. */
- }
-}
-
-redisReader *redisReaderCreate(void) {
- redisReader *r;
-
- r = calloc(sizeof(redisReader),1);
- if (r == NULL)
- return NULL;
-
- r->err = 0;
- r->errstr[0] = '\0';
- r->fn = &defaultFunctions;
- r->buf = sdsempty();
- r->maxbuf = REDIS_READER_MAX_BUF;
- if (r->buf == NULL) {
- free(r);
- return NULL;
- }
-
- r->ridx = -1;
- return r;
-}
-
-void redisReaderFree(redisReader *r) {
- if (r->reply != NULL && r->fn && r->fn->freeObject)
- r->fn->freeObject(r->reply);
- if (r->buf != NULL)
- sdsfree(r->buf);
- free(r);
-}
-
-int redisReaderFeed(redisReader *r, const char *buf, size_t len) {
- sds newbuf;
-
- /* Return early when this reader is in an erroneous state. */
- if (r->err)
- return REDIS_ERR;
-
- /* Copy the provided buffer. */
- if (buf != NULL && len >= 1) {
- /* Destroy internal buffer when it is empty and is quite large. */
- if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) {
- sdsfree(r->buf);
- r->buf = sdsempty();
- r->pos = 0;
-
- /* r->buf should not be NULL since we just free'd a larger one. */
- assert(r->buf != NULL);
- }
-
- newbuf = sdscatlen(r->buf,buf,len);
- if (newbuf == NULL) {
- __redisReaderSetErrorOOM(r);
- return REDIS_ERR;
- }
-
- r->buf = newbuf;
- r->len = sdslen(r->buf);
- }
-
- return REDIS_OK;
-}
-
-int redisReaderGetReply(redisReader *r, void **reply) {
- /* Default target pointer to NULL. */
- if (reply != NULL)
- *reply = NULL;
-
- /* Return early when this reader is in an erroneous state. */
- if (r->err)
- return REDIS_ERR;
-
- /* When the buffer is empty, there will never be a reply. */
- if (r->len == 0)
- return REDIS_OK;
-
- /* Set first item to process when the stack is empty. */
- if (r->ridx == -1) {
- r->rstack[0].type = -1;
- r->rstack[0].elements = -1;
- r->rstack[0].idx = -1;
- r->rstack[0].obj = NULL;
- r->rstack[0].parent = NULL;
- r->rstack[0].privdata = r->privdata;
- r->ridx = 0;
- }
-
- /* Process items in reply. */
- while (r->ridx >= 0)
- if (processItem(r) != REDIS_OK)
- break;
-
- /* Return ASAP when an error occurred. */
- if (r->err)
- return REDIS_ERR;
-
- /* Discard part of the buffer when we've consumed at least 1k, to avoid
- * doing unnecessary calls to memmove() in sds.c. */
- if (r->pos >= 1024) {
- sdsrange(r->buf,r->pos,-1);
- r->pos = 0;
- r->len = sdslen(r->buf);
- }
-
- /* Emit a reply when there is one. */
- if (r->ridx == -1) {
- if (reply != NULL)
- *reply = r->reply;
- r->reply = NULL;
- }
- return REDIS_OK;
-}
-
-/* Calculate the number of bytes needed to represent an integer as string. */
-static int intlen(int i) {
- int len = 0;
- if (i < 0) {
- len++;
- i = -i;
- }
- do {
- len++;
- i /= 10;
- } while(i);
- return len;
+/* Return the number of digits of 'v' when converted to string in radix 10.
+ * Implementation borrowed from link in redis/src/util.c:string2ll(). */
+static uint32_t countDigits(uint64_t v) {
+ uint32_t result = 1;
+ for (;;) {
+ if (v < 10) return result;
+ if (v < 100) return result + 1;
+ if (v < 1000) return result + 2;
+ if (v < 10000) return result + 3;
+ v /= 10000U;
+ result += 4;
+ }
}
/* Helper that calculates the bulk length given a certain string length. */
static size_t bulklen(size_t len) {
- return 1+intlen(len)+2+len+2;
+ return 1+countDigits(len)+2+len+2;
}
int redisvFormatCommand(char **target, const char *format, va_list ap) {
@@ -692,6 +216,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
char **curargv = NULL, **newargv = NULL;
int argc = 0;
int totlen = 0;
+ int error_type = 0; /* 0 = no error; -1 = memory error; -2 = format error */
int j;
/* Abort if there is not target to set */
@@ -708,19 +233,19 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
if (*c == ' ') {
if (touched) {
newargv = realloc(curargv,sizeof(char*)*(argc+1));
- if (newargv == NULL) goto err;
+ if (newargv == NULL) goto memory_err;
curargv = newargv;
curargv[argc++] = curarg;
totlen += bulklen(sdslen(curarg));
/* curarg is put in argv so it can be overwritten. */
curarg = sdsempty();
- if (curarg == NULL) goto err;
+ if (curarg == NULL) goto memory_err;
touched = 0;
}
} else {
newarg = sdscatlen(curarg,c,1);
- if (newarg == NULL) goto err;
+ if (newarg == NULL) goto memory_err;
curarg = newarg;
touched = 1;
}
@@ -751,17 +276,14 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
/* Try to detect printf format */
{
static const char intfmts[] = "diouxX";
+ static const char flags[] = "#0-+ ";
char _format[16];
const char *_p = c+1;
size_t _l = 0;
va_list _cpy;
/* Flags */
- if (*_p != '\0' && *_p == '#') _p++;
- if (*_p != '\0' && *_p == '0') _p++;
- if (*_p != '\0' && *_p == '-') _p++;
- if (*_p != '\0' && *_p == ' ') _p++;
- if (*_p != '\0' && *_p == '+') _p++;
+ while (*_p != '\0' && strchr(flags,*_p) != NULL) _p++;
/* Field width */
while (*_p != '\0' && isdigit(*_p)) _p++;
@@ -829,7 +351,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
fmt_invalid:
va_end(_cpy);
- goto err;
+ goto format_err;
fmt_valid:
_l = (_p+1)-c;
@@ -848,7 +370,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
}
}
- if (newarg == NULL) goto err;
+ if (newarg == NULL) goto memory_err;
curarg = newarg;
touched = 1;
@@ -860,7 +382,7 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
/* Add the last argument if needed */
if (touched) {
newargv = realloc(curargv,sizeof(char*)*(argc+1));
- if (newargv == NULL) goto err;
+ if (newargv == NULL) goto memory_err;
curargv = newargv;
curargv[argc++] = curarg;
totlen += bulklen(sdslen(curarg));
@@ -872,11 +394,11 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
curarg = NULL;
/* Add bytes needed to hold multi bulk count */
- totlen += 1+intlen(argc)+2;
+ totlen += 1+countDigits(argc)+2;
/* Build the command at protocol level */
cmd = malloc(totlen+1);
- if (cmd == NULL) goto err;
+ if (cmd == NULL) goto memory_err;
pos = sprintf(cmd,"*%d\r\n",argc);
for (j = 0; j < argc; j++) {
@@ -894,20 +416,29 @@ int redisvFormatCommand(char **target, const char *format, va_list ap) {
*target = cmd;
return totlen;
-err:
- while(argc--)
- sdsfree(curargv[argc]);
- free(curargv);
+format_err:
+ error_type = -2;
+ goto cleanup;
- if (curarg != NULL)
- sdsfree(curarg);
+memory_err:
+ error_type = -1;
+ goto cleanup;
+
+cleanup:
+ if (curargv) {
+ while(argc--)
+ sdsfree(curargv[argc]);
+ free(curargv);
+ }
+
+ sdsfree(curarg);
/* No need to check cmd since it is the last statement that can fail,
* but do it anyway to be as defensive as possible. */
if (cmd != NULL)
free(cmd);
- return -1;
+ return error_type;
}
/* Format a command according to the Redis protocol. This function
@@ -928,9 +459,69 @@ int redisFormatCommand(char **target, const char *format, ...) {
va_start(ap,format);
len = redisvFormatCommand(target,format,ap);
va_end(ap);
+
+ /* The API says "-1" means bad result, but we now also return "-2" in some
+ * cases. Force the return value to always be -1. */
+ if (len < 0)
+ len = -1;
+
return len;
}
+/* Format a command according to the Redis protocol using an sds string and
+ * sdscatfmt for the processing of arguments. This function takes the
+ * number of arguments, an array with arguments and an array with their
+ * lengths. If the latter is set to NULL, strlen will be used to compute the
+ * argument lengths.
+ */
+int redisFormatSdsCommandArgv(sds *target, int argc, const char **argv,
+ const size_t *argvlen)
+{
+ sds cmd;
+ unsigned long long totlen;
+ int j;
+ size_t len;
+
+ /* Abort on a NULL target */
+ if (target == NULL)
+ return -1;
+
+ /* Calculate our total size */
+ totlen = 1+countDigits(argc)+2;
+ for (j = 0; j < argc; j++) {
+ len = argvlen ? argvlen[j] : strlen(argv[j]);
+ totlen += bulklen(len);
+ }
+
+ /* Use an SDS string for command construction */
+ cmd = sdsempty();
+ if (cmd == NULL)
+ return -1;
+
+ /* We already know how much storage we need */
+ cmd = sdsMakeRoomFor(cmd, totlen);
+ if (cmd == NULL)
+ return -1;
+
+ /* Construct command */
+ cmd = sdscatfmt(cmd, "*%i\r\n", argc);
+ for (j=0; j < argc; j++) {
+ len = argvlen ? argvlen[j] : strlen(argv[j]);
+ cmd = sdscatfmt(cmd, "$%u\r\n", len);
+ cmd = sdscatlen(cmd, argv[j], len);
+ cmd = sdscatlen(cmd, "\r\n", sizeof("\r\n")-1);
+ }
+
+ assert(sdslen(cmd)==totlen);
+
+ *target = cmd;
+ return totlen;
+}
+
+void redisFreeSdsCommand(sds cmd) {
+ sdsfree(cmd);
+}
+
/* Format a command according to the Redis protocol. This function takes the
* number of arguments, an array with arguments and an array with their
* lengths. If the latter is set to NULL, strlen will be used to compute the
@@ -942,8 +533,12 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz
size_t len;
int totlen, j;
+ /* Abort on a NULL target */
+ if (target == NULL)
+ return -1;
+
/* Calculate number of bytes needed for the command */
- totlen = 1+intlen(argc)+2;
+ totlen = 1+countDigits(argc)+2;
for (j = 0; j < argc; j++) {
len = argvlen ? argvlen[j] : strlen(argv[j]);
totlen += bulklen(len);
@@ -970,6 +565,10 @@ int redisFormatCommandArgv(char **target, int argc, const char **argv, const siz
return totlen;
}
+void redisFreeCommand(char *cmd) {
+ free(cmd);
+}
+
void __redisSetError(redisContext *c, int type, const char *str) {
size_t len;
@@ -982,10 +581,14 @@ void __redisSetError(redisContext *c, int type, const char *str) {
} else {
/* Only REDIS_ERR_IO may lack a description! */
assert(type == REDIS_ERR_IO);
- strerror_r(errno,c->errstr,sizeof(c->errstr));
+ __redis_strerror_r(errno, c->errstr, sizeof(c->errstr));
}
}
+redisReader *redisReaderCreate(void) {
+ return redisReaderCreateWithFunctions(&defaultFunctions);
+}
+
static redisContext *redisContextInit(void) {
redisContext *c;
@@ -997,24 +600,72 @@ static redisContext *redisContextInit(void) {
c->errstr[0] = '\0';
c->obuf = sdsempty();
c->reader = redisReaderCreate();
+ c->tcp.host = NULL;
+ c->tcp.source_addr = NULL;
+ c->unix_sock.path = NULL;
+ c->timeout = NULL;
+
+ if (c->obuf == NULL || c->reader == NULL) {
+ redisFree(c);
+ return NULL;
+ }
+
return c;
}
void redisFree(redisContext *c) {
+ if (c == NULL)
+ return;
if (c->fd > 0)
close(c->fd);
if (c->obuf != NULL)
sdsfree(c->obuf);
if (c->reader != NULL)
redisReaderFree(c->reader);
+ if (c->tcp.host)
+ free(c->tcp.host);
+ if (c->tcp.source_addr)
+ free(c->tcp.source_addr);
+ if (c->unix_sock.path)
+ free(c->unix_sock.path);
+ if (c->timeout)
+ free(c->timeout);
free(c);
}
int redisFreeKeepFd(redisContext *c) {
- int fd = c->fd;
- c->fd = -1;
- redisFree(c);
- return fd;
+ int fd = c->fd;
+ c->fd = -1;
+ redisFree(c);
+ return fd;
+}
+
+int redisReconnect(redisContext *c) {
+ c->err = 0;
+ memset(c->errstr, '\0', strlen(c->errstr));
+
+ if (c->fd > 0) {
+ close(c->fd);
+ }
+
+ sdsfree(c->obuf);
+ redisReaderFree(c->reader);
+
+ c->obuf = sdsempty();
+ c->reader = redisReaderCreate();
+
+ if (c->connection_type == REDIS_CONN_TCP) {
+ return redisContextConnectBindTcp(c, c->tcp.host, c->tcp.port,
+ c->timeout, c->tcp.source_addr);
+ } else if (c->connection_type == REDIS_CONN_UNIX) {
+ return redisContextConnectUnix(c, c->unix_sock.path, c->timeout);
+ } else {
+ /* Something bad happened here and shouldn't have. There isn't
+ enough information in the context to reconnect. */
+ __redisSetError(c,REDIS_ERR_OTHER,"Not enough information to reconnect");
+ }
+
+ return REDIS_ERR;
}
/* Connect to a Redis instance. On error the field error in the returned
@@ -1064,6 +715,15 @@ redisContext *redisConnectBindNonBlock(const char *ip, int port,
return c;
}
+redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port,
+ const char *source_addr) {
+ redisContext *c = redisContextInit();
+ c->flags &= ~REDIS_BLOCK;
+ c->flags |= REDIS_REUSEADDR;
+ redisContextConnectBindTcp(c,ip,port,NULL,source_addr);
+ return c;
+}
+
redisContext *redisConnectUnix(const char *path) {
redisContext *c;
@@ -1162,10 +822,10 @@ int redisBufferRead(redisContext *c) {
/* Write the output buffer to the socket.
*
* Returns REDIS_OK when the buffer is empty, or (a part of) the buffer was
- * succesfully written to the socket. When the buffer is empty after the
+ * successfully written to the socket. When the buffer is empty after the
* write operation, "done" is set to 1 (if given).
*
- * Returns REDIS_ERR if an error occured trying to write and sets
+ * Returns REDIS_ERR if an error occurred trying to write and sets
* c->errstr to hold the appropriate error string.
*/
int redisBufferWrite(redisContext *c, int *done) {
@@ -1274,6 +934,9 @@ int redisvAppendCommand(redisContext *c, const char *format, va_list ap) {
if (len == -1) {
__redisSetError(c,REDIS_ERR_OOM,"Out of memory");
return REDIS_ERR;
+ } else if (len == -2) {
+ __redisSetError(c,REDIS_ERR_OTHER,"Invalid format string");
+ return REDIS_ERR;
}
if (__redisAppendCommand(c,cmd,len) != REDIS_OK) {
@@ -1296,21 +959,21 @@ int redisAppendCommand(redisContext *c, const char *format, ...) {
}
int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const size_t *argvlen) {
- char *cmd;
+ sds cmd;
int len;
- len = redisFormatCommandArgv(&cmd,argc,argv,argvlen);
+ len = redisFormatSdsCommandArgv(&cmd,argc,argv,argvlen);
if (len == -1) {
__redisSetError(c,REDIS_ERR_OOM,"Out of memory");
return REDIS_ERR;
}
if (__redisAppendCommand(c,cmd,len) != REDIS_OK) {
- free(cmd);
+ sdsfree(cmd);
return REDIS_ERR;
}
- free(cmd);
+ sdsfree(cmd);
return REDIS_OK;
}
@@ -1321,7 +984,7 @@ int redisAppendCommandArgv(redisContext *c, int argc, const char **argv, const s
* context is non-blocking, the "reply" pointer will not be used and the
* command is simply appended to the write buffer.
*
- * Returns the reply when a reply was succesfully retrieved. Returns NULL
+ * Returns the reply when a reply was successfully retrieved. Returns NULL
* otherwise. When NULL is returned in a blocking context, the error field
* in the context will be set.
*/
diff --git a/deps/hiredis/hiredis.h b/deps/hiredis/hiredis.h
index 7700f4b89..423d5e504 100644
--- a/deps/hiredis/hiredis.h
+++ b/deps/hiredis/hiredis.h
@@ -1,6 +1,8 @@
/*
* Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -31,26 +33,16 @@
#ifndef __HIREDIS_H
#define __HIREDIS_H
-#include <stdio.h> /* for size_t */
+#include "read.h"
#include <stdarg.h> /* for va_list */
#include <sys/time.h> /* for struct timeval */
+#include <stdint.h> /* uintXX_t, etc */
+#include "sds.h" /* for sds */
#define HIREDIS_MAJOR 0
-#define HIREDIS_MINOR 11
-#define HIREDIS_PATCH 0
-
-#define REDIS_ERR -1
-#define REDIS_OK 0
-
-/* When an error occurs, the err flag in a context is set to hold the type of
- * error that occured. REDIS_ERR_IO means there was an I/O error and you
- * should use the "errno" variable to find out what is wrong.
- * For other values, the "errstr" field will hold a description. */
-#define REDIS_ERR_IO 1 /* Error in read or write */
-#define REDIS_ERR_EOF 3 /* End of file */
-#define REDIS_ERR_PROTOCOL 4 /* Protocol error */
-#define REDIS_ERR_OOM 5 /* Out of memory */
-#define REDIS_ERR_OTHER 2 /* Everything else... */
+#define HIREDIS_MINOR 13
+#define HIREDIS_PATCH 3
+#define HIREDIS_SONAME 0.13
/* Connection type can be blocking or non-blocking and is set in the
* least significant bit of the flags field in redisContext. */
@@ -79,17 +71,39 @@
/* Flag that is set when monitor mode is active */
#define REDIS_MONITORING 0x40
-#define REDIS_REPLY_STRING 1
-#define REDIS_REPLY_ARRAY 2
-#define REDIS_REPLY_INTEGER 3
-#define REDIS_REPLY_NIL 4
-#define REDIS_REPLY_STATUS 5
-#define REDIS_REPLY_ERROR 6
-
-#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */
+/* Flag that is set when we should set SO_REUSEADDR before calling bind() */
+#define REDIS_REUSEADDR 0x80
#define REDIS_KEEPALIVE_INTERVAL 15 /* seconds */
+/* number of times we retry to connect in the case of EADDRNOTAVAIL and
+ * SO_REUSEADDR is being used. */
+#define REDIS_CONNECT_RETRIES 10
+
+/* strerror_r has two completely different prototypes and behaviors
+ * depending on system issues, so we need to operate on the error buffer
+ * differently depending on which strerror_r we're using. */
+#ifndef _GNU_SOURCE
+/* "regular" POSIX strerror_r that does the right thing. */
+#define __redis_strerror_r(errno, buf, len) \
+ do { \
+ strerror_r((errno), (buf), (len)); \
+ } while (0)
+#else
+/* "bad" GNU strerror_r we need to clean up after. */
+#define __redis_strerror_r(errno, buf, len) \
+ do { \
+ char *err_str = strerror_r((errno), (buf), (len)); \
+ /* If return value _isn't_ the start of the buffer we passed in, \
+ * then GNU strerror_r returned an internal static buffer and we \
+ * need to copy the result into our private buffer. */ \
+ if (err_str != (buf)) { \
+ strncpy((buf), err_str, ((len) - 1)); \
+ buf[(len)-1] = '\0'; \
+ } \
+ } while (0)
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -98,61 +112,13 @@ extern "C" {
typedef struct redisReply {
int type; /* REDIS_REPLY_* */
long long integer; /* The integer when type is REDIS_REPLY_INTEGER */
- int len; /* Length of string */
+ size_t len; /* Length of string */
char *str; /* Used for both REDIS_REPLY_ERROR and REDIS_REPLY_STRING */
size_t elements; /* number of elements, for REDIS_REPLY_ARRAY */
struct redisReply **element; /* elements vector for REDIS_REPLY_ARRAY */
} redisReply;
-typedef struct redisReadTask {
- int type;
- int elements; /* number of elements in multibulk container */
- int idx; /* index in parent (array) object */
- void *obj; /* holds user-generated value for a read task */
- struct redisReadTask *parent; /* parent task */
- void *privdata; /* user-settable arbitrary field */
-} redisReadTask;
-
-typedef struct redisReplyObjectFunctions {
- void *(*createString)(const redisReadTask*, char*, size_t);
- void *(*createArray)(const redisReadTask*, int);
- void *(*createInteger)(const redisReadTask*, long long);
- void *(*createNil)(const redisReadTask*);
- void (*freeObject)(void*);
-} redisReplyObjectFunctions;
-
-/* State for the protocol parser */
-typedef struct redisReader {
- int err; /* Error flags, 0 when there is no error */
- char errstr[128]; /* String representation of error when applicable */
-
- char *buf; /* Read buffer */
- size_t pos; /* Buffer cursor */
- size_t len; /* Buffer length */
- size_t maxbuf; /* Max length of unused buffer */
-
- redisReadTask rstack[9];
- int ridx; /* Index of current read task */
- void *reply; /* Temporary reply pointer */
-
- redisReplyObjectFunctions *fn;
- void *privdata;
-} redisReader;
-
-/* Public API for the protocol parser. */
redisReader *redisReaderCreate(void);
-void redisReaderFree(redisReader *r);
-int redisReaderFeed(redisReader *r, const char *buf, size_t len);
-int redisReaderGetReply(redisReader *r, void **reply);
-
-/* Backwards compatibility, can be removed on big version bump. */
-#define redisReplyReaderCreate redisReaderCreate
-#define redisReplyReaderFree redisReaderFree
-#define redisReplyReaderFeed redisReaderFeed
-#define redisReplyReaderGetReply redisReaderGetReply
-#define redisReplyReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p))
-#define redisReplyReaderGetObject(_r) (((redisReader*)(_r))->reply)
-#define redisReplyReaderGetError(_r) (((redisReader*)(_r))->errstr)
/* Function to free the reply objects hiredis returns by default. */
void freeReplyObject(void *reply);
@@ -161,6 +127,14 @@ void freeReplyObject(void *reply);
int redisvFormatCommand(char **target, const char *format, va_list ap);
int redisFormatCommand(char **target, const char *format, ...);
int redisFormatCommandArgv(char **target, int argc, const char **argv, const size_t *argvlen);
+int redisFormatSdsCommandArgv(sds *target, int argc, const char ** argv, const size_t *argvlen);
+void redisFreeCommand(char *cmd);
+void redisFreeSdsCommand(sds cmd);
+
+enum redisConnectionType {
+ REDIS_CONN_TCP,
+ REDIS_CONN_UNIX
+};
/* Context for a connection to Redis */
typedef struct redisContext {
@@ -170,16 +144,45 @@ typedef struct redisContext {
int flags;
char *obuf; /* Write buffer */
redisReader *reader; /* Protocol reader */
+
+ enum redisConnectionType connection_type;
+ struct timeval *timeout;
+
+ struct {
+ char *host;
+ char *source_addr;
+ int port;
+ } tcp;
+
+ struct {
+ char *path;
+ } unix_sock;
+
} redisContext;
redisContext *redisConnect(const char *ip, int port);
redisContext *redisConnectWithTimeout(const char *ip, int port, const struct timeval tv);
redisContext *redisConnectNonBlock(const char *ip, int port);
-redisContext *redisConnectBindNonBlock(const char *ip, int port, const char *source_addr);
+redisContext *redisConnectBindNonBlock(const char *ip, int port,
+ const char *source_addr);
+redisContext *redisConnectBindNonBlockWithReuse(const char *ip, int port,
+ const char *source_addr);
redisContext *redisConnectUnix(const char *path);
redisContext *redisConnectUnixWithTimeout(const char *path, const struct timeval tv);
redisContext *redisConnectUnixNonBlock(const char *path);
redisContext *redisConnectFd(int fd);
+
+/**
+ * Reconnect the given context using the saved information.
+ *
+ * This re-uses the exact same connect options as in the initial connection.
+ * host, ip (or path), timeout and bind address are reused,
+ * flags are used unmodified from the existing context.
+ *
+ * Returns REDIS_OK on successful connect or REDIS_ERR otherwise.
+ */
+int redisReconnect(redisContext *c);
+
int redisSetTimeout(redisContext *c, const struct timeval tv);
int redisEnableKeepAlive(redisContext *c);
void redisFree(redisContext *c);
diff --git a/deps/hiredis/net.c b/deps/hiredis/net.c
index bdb84ceed..7d4120985 100644
--- a/deps/hiredis/net.c
+++ b/deps/hiredis/net.c
@@ -1,7 +1,9 @@
/* Extracted from anet.c to work properly with Hiredis error reporting.
*
- * Copyright (c) 2006-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -47,6 +49,7 @@
#include <stdio.h>
#include <poll.h>
#include <limits.h>
+#include <stdlib.h>
#include "net.h"
#include "sds.h"
@@ -67,7 +70,7 @@ static void __redisSetErrorFromErrno(redisContext *c, int type, const char *pref
if (prefix != NULL)
len = snprintf(buf,sizeof(buf),"%s: ",prefix);
- strerror_r(errno,buf+len,sizeof(buf)-len);
+ __redis_strerror_r(errno, (char *)(buf + len), sizeof(buf) - len);
__redisSetError(c,type,buf);
}
@@ -138,7 +141,7 @@ int redisKeepAlive(redisContext *c, int interval) {
return REDIS_ERR;
}
#else
-#ifndef __sun
+#if defined(__GLIBC__) && !defined(__FreeBSD_kernel__)
val = interval;
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
__redisSetError(c,REDIS_ERR_OTHER,strerror(errno));
@@ -175,19 +178,15 @@ static int redisSetTcpNoDelay(redisContext *c) {
#define __MAX_MSEC (((LONG_MAX) - 999) / 1000)
-static int redisContextWaitReady(redisContext *c, const struct timeval *timeout) {
- struct pollfd wfd[1];
- long msec;
-
- msec = -1;
- wfd[0].fd = c->fd;
- wfd[0].events = POLLOUT;
+static int redisContextTimeoutMsec(redisContext *c, long *result)
+{
+ const struct timeval *timeout = c->timeout;
+ long msec = -1;
/* Only use timeout when not NULL. */
if (timeout != NULL) {
if (timeout->tv_usec > 1000000 || timeout->tv_sec > __MAX_MSEC) {
- __redisSetErrorFromErrno(c, REDIS_ERR_IO, NULL);
- redisContextCloseFd(c);
+ *result = msec;
return REDIS_ERR;
}
@@ -198,6 +197,16 @@ static int redisContextWaitReady(redisContext *c, const struct timeval *timeout)
}
}
+ *result = msec;
+ return REDIS_OK;
+}
+
+static int redisContextWaitReady(redisContext *c, long msec) {
+ struct pollfd wfd[1];
+
+ wfd[0].fd = c->fd;
+ wfd[0].events = POLLOUT;
+
if (errno == EINPROGRESS) {
int res;
@@ -256,10 +265,57 @@ int redisContextSetTimeout(redisContext *c, const struct timeval tv) {
static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
const struct timeval *timeout,
const char *source_addr) {
- int s, rv;
+ int s, rv, n;
char _port[6]; /* strlen("65535"); */
struct addrinfo hints, *servinfo, *bservinfo, *p, *b;
int blocking = (c->flags & REDIS_BLOCK);
+ int reuseaddr = (c->flags & REDIS_REUSEADDR);
+ int reuses = 0;
+ long timeout_msec = -1;
+
+ servinfo = NULL;
+ c->connection_type = REDIS_CONN_TCP;
+ c->tcp.port = port;
+
+ /* We need to take possession of the passed parameters
+ * to make them reusable for a reconnect.
+ * We also carefully check we don't free data we already own,
+ * as in the case of the reconnect method.
+ *
+ * This is a bit ugly, but atleast it works and doesn't leak memory.
+ **/
+ if (c->tcp.host != addr) {
+ if (c->tcp.host)
+ free(c->tcp.host);
+
+ c->tcp.host = strdup(addr);
+ }
+
+ if (timeout) {
+ if (c->timeout != timeout) {
+ if (c->timeout == NULL)
+ c->timeout = malloc(sizeof(struct timeval));
+
+ memcpy(c->timeout, timeout, sizeof(struct timeval));
+ }
+ } else {
+ if (c->timeout)
+ free(c->timeout);
+ c->timeout = NULL;
+ }
+
+ if (redisContextTimeoutMsec(c, &timeout_msec) != REDIS_OK) {
+ __redisSetError(c, REDIS_ERR_IO, "Invalid timeout specified");
+ goto error;
+ }
+
+ if (source_addr == NULL) {
+ free(c->tcp.source_addr);
+ c->tcp.source_addr = NULL;
+ } else if (c->tcp.source_addr != source_addr) {
+ free(c->tcp.source_addr);
+ c->tcp.source_addr = strdup(source_addr);
+ }
snprintf(_port, 6, "%d", port);
memset(&hints,0,sizeof(hints));
@@ -271,7 +327,7 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
* as this would add latency to every connect. Otherwise a more sensible
* route could be: Use IPv6 if both addresses are available and there is IPv6
* connectivity. */
- if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) {
+ if ((rv = getaddrinfo(c->tcp.host,_port,&hints,&servinfo)) != 0) {
hints.ai_family = AF_INET6;
if ((rv = getaddrinfo(addr,_port,&hints,&servinfo)) != 0) {
__redisSetError(c,REDIS_ERR_OTHER,gai_strerror(rv));
@@ -279,21 +335,31 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
}
}
for (p = servinfo; p != NULL; p = p->ai_next) {
+addrretry:
if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
continue;
c->fd = s;
if (redisSetBlocking(c,0) != REDIS_OK)
goto error;
- if (source_addr) {
+ if (c->tcp.source_addr) {
int bound = 0;
/* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */
- if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) {
+ if ((rv = getaddrinfo(c->tcp.source_addr, NULL, &hints, &bservinfo)) != 0) {
char buf[128];
snprintf(buf,sizeof(buf),"Can't get addr: %s",gai_strerror(rv));
__redisSetError(c,REDIS_ERR_OTHER,buf);
goto error;
}
+
+ if (reuseaddr) {
+ n = 1;
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char*) &n,
+ sizeof(n)) < 0) {
+ goto error;
+ }
+ }
+
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
bound = 1;
@@ -314,8 +380,15 @@ static int _redisContextConnectTcp(redisContext *c, const char *addr, int port,
continue;
} else if (errno == EINPROGRESS && !blocking) {
/* This is ok. */
+ } else if (errno == EADDRNOTAVAIL && reuseaddr) {
+ if (++reuses >= REDIS_CONNECT_RETRIES) {
+ goto error;
+ } else {
+ redisContextCloseFd(c);
+ goto addrretry;
+ }
} else {
- if (redisContextWaitReady(c,timeout) != REDIS_OK)
+ if (redisContextWaitReady(c,timeout_msec) != REDIS_OK)
goto error;
}
}
@@ -356,19 +429,40 @@ int redisContextConnectBindTcp(redisContext *c, const char *addr, int port,
int redisContextConnectUnix(redisContext *c, const char *path, const struct timeval *timeout) {
int blocking = (c->flags & REDIS_BLOCK);
struct sockaddr_un sa;
+ long timeout_msec = -1;
if (redisCreateSocket(c,AF_LOCAL) < 0)
return REDIS_ERR;
if (redisSetBlocking(c,0) != REDIS_OK)
return REDIS_ERR;
+ c->connection_type = REDIS_CONN_UNIX;
+ if (c->unix_sock.path != path)
+ c->unix_sock.path = strdup(path);
+
+ if (timeout) {
+ if (c->timeout != timeout) {
+ if (c->timeout == NULL)
+ c->timeout = malloc(sizeof(struct timeval));
+
+ memcpy(c->timeout, timeout, sizeof(struct timeval));
+ }
+ } else {
+ if (c->timeout)
+ free(c->timeout);
+ c->timeout = NULL;
+ }
+
+ if (redisContextTimeoutMsec(c,&timeout_msec) != REDIS_OK)
+ return REDIS_ERR;
+
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (connect(c->fd, (struct sockaddr*)&sa, sizeof(sa)) == -1) {
if (errno == EINPROGRESS && !blocking) {
/* This is ok. */
} else {
- if (redisContextWaitReady(c,timeout) != REDIS_OK)
+ if (redisContextWaitReady(c,timeout_msec) != REDIS_OK)
return REDIS_ERR;
}
}
diff --git a/deps/hiredis/net.h b/deps/hiredis/net.h
index 3763ab089..2f1a0bf85 100644
--- a/deps/hiredis/net.h
+++ b/deps/hiredis/net.h
@@ -1,7 +1,9 @@
/* Extracted from anet.c to work properly with Hiredis error reporting.
*
- * Copyright (c) 2006-2011, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2014, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2015, Matt Stancliff <matt at genges dot com>,
+ * Jan-Erik Rediger <janerik at fnordig dot com>
*
* All rights reserved.
*
@@ -35,7 +37,7 @@
#include "hiredis.h"
-#if defined(__sun) || defined(_AIX)
+#if defined(__sun)
#define AF_LOCAL AF_UNIX
#endif
diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c
new file mode 100644
index 000000000..50333b534
--- /dev/null
+++ b/deps/hiredis/read.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "fmacros.h"
+#include <string.h>
+#include <stdlib.h>
+#ifndef _MSC_VER
+#include <unistd.h>
+#endif
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h>
+
+#include "read.h"
+#include "sds.h"
+
+static void __redisReaderSetError(redisReader *r, int type, const char *str) {
+ size_t len;
+
+ if (r->reply != NULL && r->fn && r->fn->freeObject) {
+ r->fn->freeObject(r->reply);
+ r->reply = NULL;
+ }
+
+ /* Clear input buffer on errors. */
+ if (r->buf != NULL) {
+ sdsfree(r->buf);
+ r->buf = NULL;
+ r->pos = r->len = 0;
+ }
+
+ /* Reset task stack. */
+ r->ridx = -1;
+
+ /* Set error. */
+ r->err = type;
+ len = strlen(str);
+ len = len < (sizeof(r->errstr)-1) ? len : (sizeof(r->errstr)-1);
+ memcpy(r->errstr,str,len);
+ r->errstr[len] = '\0';
+}
+
+static size_t chrtos(char *buf, size_t size, char byte) {
+ size_t len = 0;
+
+ switch(byte) {
+ case '\\':
+ case '"':
+ len = snprintf(buf,size,"\"\\%c\"",byte);
+ break;
+ case '\n': len = snprintf(buf,size,"\"\\n\""); break;
+ case '\r': len = snprintf(buf,size,"\"\\r\""); break;
+ case '\t': len = snprintf(buf,size,"\"\\t\""); break;
+ case '\a': len = snprintf(buf,size,"\"\\a\""); break;
+ case '\b': len = snprintf(buf,size,"\"\\b\""); break;
+ default:
+ if (isprint(byte))
+ len = snprintf(buf,size,"\"%c\"",byte);
+ else
+ len = snprintf(buf,size,"\"\\x%02x\"",(unsigned char)byte);
+ break;
+ }
+
+ return len;
+}
+
+static void __redisReaderSetErrorProtocolByte(redisReader *r, char byte) {
+ char cbuf[8], sbuf[128];
+
+ chrtos(cbuf,sizeof(cbuf),byte);
+ snprintf(sbuf,sizeof(sbuf),
+ "Protocol error, got %s as reply type byte", cbuf);
+ __redisReaderSetError(r,REDIS_ERR_PROTOCOL,sbuf);
+}
+
+static void __redisReaderSetErrorOOM(redisReader *r) {
+ __redisReaderSetError(r,REDIS_ERR_OOM,"Out of memory");
+}
+
+static char *readBytes(redisReader *r, unsigned int bytes) {
+ char *p;
+ if (r->len-r->pos >= bytes) {
+ p = r->buf+r->pos;
+ r->pos += bytes;
+ return p;
+ }
+ return NULL;
+}
+
+/* Find pointer to \r\n. */
+static char *seekNewline(char *s, size_t len) {
+ int pos = 0;
+ int _len = len-1;
+
+ /* Position should be < len-1 because the character at "pos" should be
+ * followed by a \n. Note that strchr cannot be used because it doesn't
+ * allow to search a limited length and the buffer that is being searched
+ * might not have a trailing NULL character. */
+ while (pos < _len) {
+ while(pos < _len && s[pos] != '\r') pos++;
+ if (pos==_len) {
+ /* Not found. */
+ return NULL;
+ } else {
+ if (s[pos+1] == '\n') {
+ /* Found. */
+ return s+pos;
+ } else {
+ /* Continue searching. */
+ pos++;
+ }
+ }
+ }
+ return NULL;
+}
+
+/* Read a long long value starting at *s, under the assumption that it will be
+ * terminated by \r\n. Ambiguously returns -1 for unexpected input. */
+static long long readLongLong(char *s) {
+ long long v = 0;
+ int dec, mult = 1;
+ char c;
+
+ if (*s == '-') {
+ mult = -1;
+ s++;
+ } else if (*s == '+') {
+ mult = 1;
+ s++;
+ }
+
+ while ((c = *(s++)) != '\r') {
+ dec = c - '0';
+ if (dec >= 0 && dec < 10) {
+ v *= 10;
+ v += dec;
+ } else {
+ /* Should not happen... */
+ return -1;
+ }
+ }
+
+ return mult*v;
+}
+
+static char *readLine(redisReader *r, int *_len) {
+ char *p, *s;
+ int len;
+
+ p = r->buf+r->pos;
+ s = seekNewline(p,(r->len-r->pos));
+ if (s != NULL) {
+ len = s-(r->buf+r->pos);
+ r->pos += len+2; /* skip \r\n */
+ if (_len) *_len = len;
+ return p;
+ }
+ return NULL;
+}
+
+static void moveToNextTask(redisReader *r) {
+ redisReadTask *cur, *prv;
+ while (r->ridx >= 0) {
+ /* Return a.s.a.p. when the stack is now empty. */
+ if (r->ridx == 0) {
+ r->ridx--;
+ return;
+ }
+
+ cur = &(r->rstack[r->ridx]);
+ prv = &(r->rstack[r->ridx-1]);
+ assert(prv->type == REDIS_REPLY_ARRAY);
+ if (cur->idx == prv->elements-1) {
+ r->ridx--;
+ } else {
+ /* Reset the type because the next item can be anything */
+ assert(cur->idx < prv->elements);
+ cur->type = -1;
+ cur->elements = -1;
+ cur->idx++;
+ return;
+ }
+ }
+}
+
+static int processLineItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj;
+ char *p;
+ int len;
+
+ if ((p = readLine(r,&len)) != NULL) {
+ if (cur->type == REDIS_REPLY_INTEGER) {
+ if (r->fn && r->fn->createInteger)
+ obj = r->fn->createInteger(cur,readLongLong(p));
+ else
+ obj = (void*)REDIS_REPLY_INTEGER;
+ } else {
+ /* Type will be error or status. */
+ if (r->fn && r->fn->createString)
+ obj = r->fn->createString(cur,p,len);
+ else
+ obj = (void*)(size_t)(cur->type);
+ }
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ /* Set reply if this is the root object. */
+ if (r->ridx == 0) r->reply = obj;
+ moveToNextTask(r);
+ return REDIS_OK;
+ }
+
+ return REDIS_ERR;
+}
+
+static int processBulkItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj = NULL;
+ char *p, *s;
+ long len;
+ unsigned long bytelen;
+ int success = 0;
+
+ p = r->buf+r->pos;
+ s = seekNewline(p,r->len-r->pos);
+ if (s != NULL) {
+ p = r->buf+r->pos;
+ bytelen = s-(r->buf+r->pos)+2; /* include \r\n */
+ len = readLongLong(p);
+
+ if (len < 0) {
+ /* The nil object can always be created. */
+ if (r->fn && r->fn->createNil)
+ obj = r->fn->createNil(cur);
+ else
+ obj = (void*)REDIS_REPLY_NIL;
+ success = 1;
+ } else {
+ /* Only continue when the buffer contains the entire bulk item. */
+ bytelen += len+2; /* include \r\n */
+ if (r->pos+bytelen <= r->len) {
+ if (r->fn && r->fn->createString)
+ obj = r->fn->createString(cur,s+2,len);
+ else
+ obj = (void*)REDIS_REPLY_STRING;
+ success = 1;
+ }
+ }
+
+ /* Proceed when obj was created. */
+ if (success) {
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ r->pos += bytelen;
+
+ /* Set reply if this is the root object. */
+ if (r->ridx == 0) r->reply = obj;
+ moveToNextTask(r);
+ return REDIS_OK;
+ }
+ }
+
+ return REDIS_ERR;
+}
+
+static int processMultiBulkItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ void *obj;
+ char *p;
+ long elements;
+ int root = 0;
+
+ /* Set error for nested multi bulks with depth > 7 */
+ if (r->ridx == 8) {
+ __redisReaderSetError(r,REDIS_ERR_PROTOCOL,
+ "No support for nested multi bulk replies with depth > 7");
+ return REDIS_ERR;
+ }
+
+ if ((p = readLine(r,NULL)) != NULL) {
+ elements = readLongLong(p);
+ root = (r->ridx == 0);
+
+ if (elements == -1) {
+ if (r->fn && r->fn->createNil)
+ obj = r->fn->createNil(cur);
+ else
+ obj = (void*)REDIS_REPLY_NIL;
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ moveToNextTask(r);
+ } else {
+ if (r->fn && r->fn->createArray)
+ obj = r->fn->createArray(cur,elements);
+ else
+ obj = (void*)REDIS_REPLY_ARRAY;
+
+ if (obj == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ /* Modify task stack when there are more than 0 elements. */
+ if (elements > 0) {
+ cur->elements = elements;
+ cur->obj = obj;
+ r->ridx++;
+ r->rstack[r->ridx].type = -1;
+ r->rstack[r->ridx].elements = -1;
+ r->rstack[r->ridx].idx = 0;
+ r->rstack[r->ridx].obj = NULL;
+ r->rstack[r->ridx].parent = cur;
+ r->rstack[r->ridx].privdata = r->privdata;
+ } else {
+ moveToNextTask(r);
+ }
+ }
+
+ /* Set reply if this is the root object. */
+ if (root) r->reply = obj;
+ return REDIS_OK;
+ }
+
+ return REDIS_ERR;
+}
+
+static int processItem(redisReader *r) {
+ redisReadTask *cur = &(r->rstack[r->ridx]);
+ char *p;
+
+ /* check if we need to read type */
+ if (cur->type < 0) {
+ if ((p = readBytes(r,1)) != NULL) {
+ switch (p[0]) {
+ case '-':
+ cur->type = REDIS_REPLY_ERROR;
+ break;
+ case '+':
+ cur->type = REDIS_REPLY_STATUS;
+ break;
+ case ':':
+ cur->type = REDIS_REPLY_INTEGER;
+ break;
+ case '$':
+ cur->type = REDIS_REPLY_STRING;
+ break;
+ case '*':
+ cur->type = REDIS_REPLY_ARRAY;
+ break;
+ default:
+ __redisReaderSetErrorProtocolByte(r,*p);
+ return REDIS_ERR;
+ }
+ } else {
+ /* could not consume 1 byte */
+ return REDIS_ERR;
+ }
+ }
+
+ /* process typed item */
+ switch(cur->type) {
+ case REDIS_REPLY_ERROR:
+ case REDIS_REPLY_STATUS:
+ case REDIS_REPLY_INTEGER:
+ return processLineItem(r);
+ case REDIS_REPLY_STRING:
+ return processBulkItem(r);
+ case REDIS_REPLY_ARRAY:
+ return processMultiBulkItem(r);
+ default:
+ assert(NULL);
+ return REDIS_ERR; /* Avoid warning. */
+ }
+}
+
+redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn) {
+ redisReader *r;
+
+ r = calloc(sizeof(redisReader),1);
+ if (r == NULL)
+ return NULL;
+
+ r->err = 0;
+ r->errstr[0] = '\0';
+ r->fn = fn;
+ r->buf = sdsempty();
+ r->maxbuf = REDIS_READER_MAX_BUF;
+ if (r->buf == NULL) {
+ free(r);
+ return NULL;
+ }
+
+ r->ridx = -1;
+ return r;
+}
+
+void redisReaderFree(redisReader *r) {
+ if (r->reply != NULL && r->fn && r->fn->freeObject)
+ r->fn->freeObject(r->reply);
+ if (r->buf != NULL)
+ sdsfree(r->buf);
+ free(r);
+}
+
+int redisReaderFeed(redisReader *r, const char *buf, size_t len) {
+ sds newbuf;
+
+ /* Return early when this reader is in an erroneous state. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* Copy the provided buffer. */
+ if (buf != NULL && len >= 1) {
+ /* Destroy internal buffer when it is empty and is quite large. */
+ if (r->len == 0 && r->maxbuf != 0 && sdsavail(r->buf) > r->maxbuf) {
+ sdsfree(r->buf);
+ r->buf = sdsempty();
+ r->pos = 0;
+
+ /* r->buf should not be NULL since we just free'd a larger one. */
+ assert(r->buf != NULL);
+ }
+
+ newbuf = sdscatlen(r->buf,buf,len);
+ if (newbuf == NULL) {
+ __redisReaderSetErrorOOM(r);
+ return REDIS_ERR;
+ }
+
+ r->buf = newbuf;
+ r->len = sdslen(r->buf);
+ }
+
+ return REDIS_OK;
+}
+
+int redisReaderGetReply(redisReader *r, void **reply) {
+ /* Default target pointer to NULL. */
+ if (reply != NULL)
+ *reply = NULL;
+
+ /* Return early when this reader is in an erroneous state. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* When the buffer is empty, there will never be a reply. */
+ if (r->len == 0)
+ return REDIS_OK;
+
+ /* Set first item to process when the stack is empty. */
+ if (r->ridx == -1) {
+ r->rstack[0].type = -1;
+ r->rstack[0].elements = -1;
+ r->rstack[0].idx = -1;
+ r->rstack[0].obj = NULL;
+ r->rstack[0].parent = NULL;
+ r->rstack[0].privdata = r->privdata;
+ r->ridx = 0;
+ }
+
+ /* Process items in reply. */
+ while (r->ridx >= 0)
+ if (processItem(r) != REDIS_OK)
+ break;
+
+ /* Return ASAP when an error occurred. */
+ if (r->err)
+ return REDIS_ERR;
+
+ /* Discard part of the buffer when we've consumed at least 1k, to avoid
+ * doing unnecessary calls to memmove() in sds.c. */
+ if (r->pos >= 1024) {
+ sdsrange(r->buf,r->pos,-1);
+ r->pos = 0;
+ r->len = sdslen(r->buf);
+ }
+
+ /* Emit a reply when there is one. */
+ if (r->ridx == -1) {
+ if (reply != NULL)
+ *reply = r->reply;
+ r->reply = NULL;
+ }
+ return REDIS_OK;
+}
diff --git a/deps/hiredis/read.h b/deps/hiredis/read.h
new file mode 100644
index 000000000..2988aa453
--- /dev/null
+++ b/deps/hiredis/read.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2009-2011, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2011, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef __HIREDIS_READ_H
+#define __HIREDIS_READ_H
+#include <stdio.h> /* for size_t */
+
+#define REDIS_ERR -1
+#define REDIS_OK 0
+
+/* When an error occurs, the err flag in a context is set to hold the type of
+ * error that occurred. REDIS_ERR_IO means there was an I/O error and you
+ * should use the "errno" variable to find out what is wrong.
+ * For other values, the "errstr" field will hold a description. */
+#define REDIS_ERR_IO 1 /* Error in read or write */
+#define REDIS_ERR_EOF 3 /* End of file */
+#define REDIS_ERR_PROTOCOL 4 /* Protocol error */
+#define REDIS_ERR_OOM 5 /* Out of memory */
+#define REDIS_ERR_OTHER 2 /* Everything else... */
+
+#define REDIS_REPLY_STRING 1
+#define REDIS_REPLY_ARRAY 2
+#define REDIS_REPLY_INTEGER 3
+#define REDIS_REPLY_NIL 4
+#define REDIS_REPLY_STATUS 5
+#define REDIS_REPLY_ERROR 6
+
+#define REDIS_READER_MAX_BUF (1024*16) /* Default max unused reader buffer. */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct redisReadTask {
+ int type;
+ int elements; /* number of elements in multibulk container */
+ int idx; /* index in parent (array) object */
+ void *obj; /* holds user-generated value for a read task */
+ struct redisReadTask *parent; /* parent task */
+ void *privdata; /* user-settable arbitrary field */
+} redisReadTask;
+
+typedef struct redisReplyObjectFunctions {
+ void *(*createString)(const redisReadTask*, char*, size_t);
+ void *(*createArray)(const redisReadTask*, int);
+ void *(*createInteger)(const redisReadTask*, long long);
+ void *(*createNil)(const redisReadTask*);
+ void (*freeObject)(void*);
+} redisReplyObjectFunctions;
+
+typedef struct redisReader {
+ int err; /* Error flags, 0 when there is no error */
+ char errstr[128]; /* String representation of error when applicable */
+
+ char *buf; /* Read buffer */
+ size_t pos; /* Buffer cursor */
+ size_t len; /* Buffer length */
+ size_t maxbuf; /* Max length of unused buffer */
+
+ redisReadTask rstack[9];
+ int ridx; /* Index of current read task */
+ void *reply; /* Temporary reply pointer */
+
+ redisReplyObjectFunctions *fn;
+ void *privdata;
+} redisReader;
+
+/* Public API for the protocol parser. */
+redisReader *redisReaderCreateWithFunctions(redisReplyObjectFunctions *fn);
+void redisReaderFree(redisReader *r);
+int redisReaderFeed(redisReader *r, const char *buf, size_t len);
+int redisReaderGetReply(redisReader *r, void **reply);
+
+#define redisReaderSetPrivdata(_r, _p) (int)(((redisReader*)(_r))->privdata = (_p))
+#define redisReaderGetObject(_r) (((redisReader*)(_r))->reply)
+#define redisReaderGetError(_r) (((redisReader*)(_r))->errstr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/deps/hiredis/sds.c b/deps/hiredis/sds.c
index 95454e997..923ffd82f 100644
--- a/deps/hiredis/sds.c
+++ b/deps/hiredis/sds.c
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,7 +36,35 @@
#include <ctype.h>
#include <assert.h>
#include "sds.h"
-#include "zmalloc.h"
+#include "sdsalloc.h"
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 32)
+ return SDS_TYPE_5;
+ if (string_size < 0xff)
+ return SDS_TYPE_8;
+ if (string_size < 0xffff)
+ return SDS_TYPE_16;
+ if (string_size < 0xffffffff)
+ return SDS_TYPE_32;
+ return SDS_TYPE_64;
+}
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
@@ -43,26 +73,65 @@
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
- * mystring = sdsnewlen("abc",3");
+ * mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
- struct sdshdr *sh;
-
- if (init) {
- sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
- } else {
- sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
- }
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+
+ sh = s_malloc(hdrlen+initlen+1);
if (sh == NULL) return NULL;
- sh->len = initlen;
- sh->free = 0;
+ if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ }
if (initlen && init)
- memcpy(sh->buf, init, initlen);
- sh->buf[initlen] = '\0';
- return (char*)sh->buf;
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
}
/* Create an empty (zero length) sds string. Even in this case the string
@@ -71,7 +140,7 @@ sds sdsempty(void) {
return sdsnewlen("",0);
}
-/* Create a new sds string starting from a null termined C string. */
+/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
@@ -85,7 +154,7 @@ sds sdsdup(const sds s) {
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
- zfree(s-sizeof(struct sdshdr));
+ s_free((char*)s-sdsHdrSize(s[-1]));
}
/* Set the sds string length to the length as obtained with strlen(), so
@@ -103,21 +172,17 @@ void sdsfree(sds s) {
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
void sdsupdatelen(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
int reallen = strlen(s);
- sh->free += (sh->len-reallen);
- sh->len = reallen;
+ sdssetlen(s, reallen);
}
-/* Modify an sds string on-place to make it empty (zero length).
+/* Modify an sds string in-place to make it empty (zero length).
* However all the existing buffer is not discarded but set as free space
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
void sdsclear(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- sh->free += sh->len;
- sh->len = 0;
- sh->buf[0] = '\0';
+ sdssetlen(s, 0);
+ s[0] = '\0';
}
/* Enlarge the free space at the end of the sds string so that the caller
@@ -127,23 +192,48 @@ void sdsclear(sds s) {
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
- struct sdshdr *sh, *newsh;
- size_t free = sdsavail(s);
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
size_t len, newlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
- if (free >= addlen) return s;
len = sdslen(s);
- sh = (void*) (s-(sizeof(struct sdshdr)));
+ sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
- newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
- if (newsh == NULL) return NULL;
- newsh->free = newlen - len;
- return newsh->buf;
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc(hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, newlen);
+ return s;
}
/* Reallocate the sds string so that it has no free space at the end. The
@@ -153,12 +243,29 @@ sds sdsMakeRoomFor(sds s, size_t addlen) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
- struct sdshdr *sh;
-
- sh = (void*) (s-(sizeof(struct sdshdr)));
- sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
- sh->free = 0;
- return sh->buf;
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+ size_t len = sdslen(s);
+ sh = (char*)s-sdsHdrSize(oldtype);
+
+ type = sdsReqType(len);
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ newsh = s_malloc(hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, len);
+ return s;
}
/* Return the total size of the allocation of the specifed sds string,
@@ -169,9 +276,14 @@ sds sdsRemoveFreeSpace(sds s) {
* 4) The implicit null term.
*/
size_t sdsAllocSize(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
- return sizeof(*sh)+sh->len+sh->free+1;
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
}
/* Increment the sds length and decrements the left free space at the
@@ -198,15 +310,44 @@ size_t sdsAllocSize(sds s) {
* sdsIncrLen(s, nread);
*/
void sdsIncrLen(sds s, int incr) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
-
- if (incr >= 0)
- assert(sh->free >= (unsigned int)incr);
- else
- assert(sh->len >= (unsigned int)(-incr));
- sh->len += incr;
- sh->free -= incr;
- s[sh->len] = '\0';
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
}
/* Grow the sds to have the specified length. Bytes that were not part of
@@ -215,19 +356,15 @@ void sdsIncrLen(sds s, int incr) {
* if the specified length is smaller than the current length, no operation
* is performed. */
sds sdsgrowzero(sds s, size_t len) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- size_t totlen, curlen = sh->len;
+ size_t curlen = sdslen(s);
if (len <= curlen) return s;
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;
/* Make sure added region doesn't contain garbage */
- sh = (void*)(s-(sizeof(struct sdshdr)));
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
- totlen = sh->len+sh->free;
- sh->len = len;
- sh->free = totlen-sh->len;
+ sdssetlen(s, len);
return s;
}
@@ -237,15 +374,12 @@ sds sdsgrowzero(sds s, size_t len) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
- struct sdshdr *sh;
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
- sh->len = curlen+len;
- sh->free = sh->free-len;
+ sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
@@ -269,19 +403,13 @@ sds sdscatsds(sds s, const sds t) {
/* Destructively modify the sds string 's' to hold the specified binary
* safe string pointed by 't' of length 'len' bytes. */
sds sdscpylen(sds s, const char *t, size_t len) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t totlen = sh->free+sh->len;
-
- if (totlen < len) {
- s = sdsMakeRoomFor(s,len-sh->len);
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
- totlen = sh->free+sh->len;
}
memcpy(s, t, len);
s[len] = '\0';
- sh->len = len;
- sh->free = totlen-len;
+ sdssetlen(s, len);
return s;
}
@@ -295,7 +423,7 @@ sds sdscpy(sds s, const char *t) {
* conversion. 's' must point to a string with room for at least
* SDS_LLSTR_SIZE bytes.
*
- * The function returns the lenght of the null-terminated string
+ * The function returns the length of the null-terminated string
* representation stored at 's'. */
#define SDS_LLSTR_SIZE 21
int sdsll2str(char *s, long long value) {
@@ -369,7 +497,7 @@ sds sdsfromlonglong(long long value) {
return sdsnewlen(buf,len);
}
-/* Like sdscatpritf() but gets va_list instead of being variadic. */
+/* Like sdscatprintf() but gets va_list instead of being variadic. */
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
@@ -378,7 +506,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
@@ -390,11 +518,11 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
buf[buflen-2] = '\0';
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);
- va_end(ap);
+ va_end(cpy);
if (buf[buflen-2] != '\0') {
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
buflen *= 2;
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
@@ -403,7 +531,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
return t;
}
@@ -415,7 +543,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
*
* Example:
*
- * s = sdsempty("Sum is: ");
+ * s = sdsnew("Sum is: ");
* s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).
*
* Often you need to create a string from scratch with the printf-alike
@@ -449,25 +577,21 @@ sds sdscatprintf(sds s, const char *fmt, ...) {
* %% - Verbatim "%" character.
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t initlen = sdslen(s);
const char *f = fmt;
int i;
va_list ap;
va_start(ap,fmt);
- f = fmt; /* Next format specifier byte to process. */
- i = initlen; /* Position of the next byte to write to dest str. */
+ i = sdslen(s); /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
- unsigned int l;
+ size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
- if (sh->free == 0) {
+ if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
@@ -479,13 +603,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
break;
case 'i':
@@ -497,13 +619,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
@@ -516,27 +636,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
f++;
@@ -557,25 +673,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
- * s = sdstrim(s,"A. :");
+ * s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/
sds sdstrim(sds s, const char *cset) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
- while(ep > start && strchr(cset, *ep)) ep--;
+ while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
- if (sh->buf != sp) memmove(sh->buf, sp, len);
- sh->buf[len] = '\0';
- sh->free = sh->free+(sh->len-len);
- sh->len = len;
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
return s;
}
@@ -596,7 +710,6 @@ sds sdstrim(sds s, const char *cset) {
* sdsrange(s,1,-1); => "ello World"
*/
void sdsrange(sds s, int start, int end) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t newlen, len = sdslen(s);
if (len == 0) return;
@@ -619,10 +732,9 @@ void sdsrange(sds s, int start, int end) {
} else {
start = 0;
}
- if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
- sh->buf[newlen] = 0;
- sh->free = sh->free+(sh->len-newlen);
- sh->len = newlen;
+ if (start && newlen) memmove(s, s+start, newlen);
+ s[newlen] = 0;
+ sdssetlen(s,newlen);
}
/* Apply tolower() to every character of the sds string 's'. */
@@ -643,8 +755,8 @@ void sdstoupper(sds s) {
*
* Return value:
*
- * 1 if s1 > s2.
- * -1 if s1 < s2.
+ * positive if s1 > s2.
+ * negative if s1 < s2.
* 0 if s1 and s2 are exactly the same binary string.
*
* If two strings share exactly the same prefix, but one of the two has
@@ -684,7 +796,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
if (seplen < 1 || len < 0) return NULL;
- tokens = zmalloc(sizeof(sds)*slots);
+ tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;
if (len == 0) {
@@ -697,7 +809,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
sds *newtokens;
slots *= 2;
- newtokens = zrealloc(tokens,sizeof(sds)*slots);
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
@@ -721,7 +833,7 @@ cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
- zfree(tokens);
+ s_free(tokens);
*count = 0;
return NULL;
}
@@ -732,7 +844,7 @@ void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
- zfree(tokens);
+ s_free(tokens);
}
/* Append to the sds string "s" an escaped string representation where
@@ -906,13 +1018,13 @@ sds *sdssplitargs(const char *line, int *argc) {
if (*p) p++;
}
/* add the token to the vector */
- vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
- if (vector == NULL) vector = zmalloc(sizeof(void*));
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
return vector;
}
}
@@ -920,7 +1032,7 @@ sds *sdssplitargs(const char *line, int *argc) {
err:
while((*argc)--)
sdsfree(vector[*argc]);
- zfree(vector);
+ s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
@@ -962,14 +1074,35 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#ifdef SDS_TEST_MAIN
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
-int main(void) {
+#define UNUSED(x) (void)(x)
+int sdsTest(void) {
{
- struct sdshdr *sh;
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
@@ -1005,6 +1138,7 @@ int main(void) {
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
"9223372036854775807--",60) == 0)
+ printf("[%s]\n",x);
sdsfree(x);
x = sdsnew("--");
@@ -1014,6 +1148,18 @@ int main(void) {
memcmp(x,"--4294967295,18446744073709551615--",35) == 0)
sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," x");
+ test_cond("sdstrim() works when all chars match",
+ sdslen(x) == 0)
+
+ sdsfree(x);
+ x = sdsnew(" x ");
+ sdstrim(x," ");
+ test_cond("sdstrim() works when a single char remains",
+ sdslen(x) == 1 && x[0] == 'x')
+
+ sdsfree(x);
x = sdsnew("xxciaoyyy");
sdstrim(x,"xy");
test_cond("sdstrim() correctly trims characters",
@@ -1080,24 +1226,47 @@ int main(void) {
memcmp(y,"\"\\a\\n\\x00foo\\r\"",15) == 0)
{
- int oldfree;
+ unsigned int oldfree;
+ char *p;
+ int step = 10, j, i;
sdsfree(x);
+ sdsfree(y);
x = sdsnew("0");
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
- x = sdsMakeRoomFor(x,1);
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0);
- oldfree = sh->free;
- x[1] = '1';
- sdsIncrLen(x,1);
- test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
- test_cond("sdsIncrLen() -- len", sh->len == 2);
- test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ int oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
}
}
test_report()
return 0;
}
#endif
+
+#ifdef SDS_TEST_MAIN
+int main(void) {
+ return sdsTest();
+}
+#endif
diff --git a/deps/hiredis/sds.h b/deps/hiredis/sds.h
index 37aaf7a28..13be75a9f 100644
--- a/deps/hiredis/sds.h
+++ b/deps/hiredis/sds.h
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,32 +37,188 @@
#include <sys/types.h>
#include <stdarg.h>
+#include <stdint.h>
typedef char *sds;
-struct sdshdr {
- unsigned int len;
- unsigned int free;
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
+
static inline size_t sdslen(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->len;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
}
static inline size_t sdsavail(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->free;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
}
sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
-size_t sdslen(const sds s);
sds sdsdup(const sds s);
void sdsfree(sds s);
-size_t sdsavail(const sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
@@ -91,11 +249,25 @@ sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, int incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
+
+#ifdef REDIS_TEST
+int sdsTest(int argc, char *argv[]);
+#endif
#endif
diff --git a/deps/hiredis/sdsalloc.h b/deps/hiredis/sdsalloc.h
new file mode 100644
index 000000000..f43023c48
--- /dev/null
+++ b/deps/hiredis/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#define s_malloc malloc
+#define s_realloc realloc
+#define s_free free
diff --git a/deps/hiredis/test.c b/deps/hiredis/test.c
index 2cc35a46f..a23d60676 100644
--- a/deps/hiredis/test.c
+++ b/deps/hiredis/test.c
@@ -11,6 +11,7 @@
#include <limits.h>
#include "hiredis.h"
+#include "net.h"
enum connection_type {
CONN_TCP,
@@ -29,7 +30,7 @@ struct config {
struct {
const char *path;
- } unix;
+ } unix_sock;
};
/* The following lines make up our testing "framework" :) */
@@ -43,6 +44,13 @@ static long long usec(void) {
return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
}
+/* The assert() calls below have side effects, so we need assert()
+ * even if we are compiling without asserts (-DNDEBUG). */
+#ifdef NDEBUG
+#undef assert
+#define assert(e) (void)(e)
+#endif
+
static redisContext *select_database(redisContext *c) {
redisReply *reply;
@@ -51,7 +59,7 @@ static redisContext *select_database(redisContext *c) {
assert(reply != NULL);
freeReplyObject(reply);
- /* Make sure the DB is empty */
+ /* Make sure the DB is emtpy */
reply = redisCommand(c,"DBSIZE");
assert(reply != NULL);
if (reply->type == REDIS_REPLY_INTEGER && reply->integer == 0) {
@@ -89,10 +97,10 @@ static redisContext *connect(struct config config) {
if (config.type == CONN_TCP) {
c = redisConnect(config.tcp.host, config.tcp.port);
} else if (config.type == CONN_UNIX) {
- c = redisConnectUnix(config.unix.path);
+ c = redisConnectUnix(config.unix_sock.path);
} else if (config.type == CONN_FD) {
/* Create a dummy connection just to get an fd to inherit */
- redisContext *dummy_ctx = redisConnectUnix(config.unix.path);
+ redisContext *dummy_ctx = redisConnectUnix(config.unix_sock.path);
if (dummy_ctx) {
int fd = disconnect(dummy_ctx, 1);
printf("Connecting to inherited fd %d\n", fd);
@@ -107,6 +115,7 @@ static redisContext *connect(struct config config) {
exit(1);
} else if (c->err) {
printf("Connection error: %s\n", c->errstr);
+ redisFree(c);
exit(1);
}
@@ -215,6 +224,22 @@ static void test_format_commands(void) {
test_cond(strncmp(cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 &&
len == 4+4+(3+2)+4+(7+2)+4+(3+2));
free(cmd);
+
+ sds sds_cmd;
+
+ sds_cmd = sdsempty();
+ test("Format command into sds by passing argc/argv without lengths: ");
+ len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,NULL);
+ test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$3\r\nfoo\r\n$3\r\nbar\r\n",len) == 0 &&
+ len == 4+4+(3+2)+4+(3+2)+4+(3+2));
+ sdsfree(sds_cmd);
+
+ sds_cmd = sdsempty();
+ test("Format command into sds by passing argc/argv with lengths: ");
+ len = redisFormatSdsCommandArgv(&sds_cmd,argc,argv,lens);
+ test_cond(strncmp(sds_cmd,"*3\r\n$3\r\nSET\r\n$7\r\nfoo\0xxx\r\n$3\r\nbar\r\n",len) == 0 &&
+ len == 4+4+(3+2)+4+(7+2)+4+(3+2));
+ sdsfree(sds_cmd);
}
static void test_append_formatted_commands(struct config config) {
@@ -318,16 +343,31 @@ static void test_reply_reader(void) {
redisReaderFree(reader);
}
+static void test_free_null(void) {
+ void *redisCtx = NULL;
+ void *reply = NULL;
+
+ test("Don't fail when redisFree is passed a NULL value: ");
+ redisFree(redisCtx);
+ test_cond(redisCtx == NULL);
+
+ test("Don't fail when freeReplyObject is passed a NULL value: ");
+ freeReplyObject(reply);
+ test_cond(reply == NULL);
+}
+
static void test_blocking_connection_errors(void) {
redisContext *c;
test("Returns error when host cannot be resolved: ");
- c = redisConnect((char*)"idontexist.local", 6379);
+ c = redisConnect((char*)"idontexist.test", 6379);
test_cond(c->err == REDIS_ERR_OTHER &&
(strcmp(c->errstr,"Name or service not known") == 0 ||
- strcmp(c->errstr,"Can't resolve: idontexist.local") == 0 ||
+ strcmp(c->errstr,"Can't resolve: idontexist.test") == 0 ||
strcmp(c->errstr,"nodename nor servname provided, or not known") == 0 ||
strcmp(c->errstr,"No address associated with hostname") == 0 ||
+ strcmp(c->errstr,"Temporary failure in name resolution") == 0 ||
+ strcmp(c->errstr,"hostname nor servname provided, or not known") == 0 ||
strcmp(c->errstr,"no address associated with name") == 0));
redisFree(c);
@@ -337,7 +377,7 @@ static void test_blocking_connection_errors(void) {
strcmp(c->errstr,"Connection refused") == 0);
redisFree(c);
- test("Returns error when the unix socket path doesn't accept connections: ");
+ test("Returns error when the unix_sock socket path doesn't accept connections: ");
c = redisConnectUnix((char*)"/tmp/idontexist.sock");
test_cond(c->err == REDIS_ERR_IO); /* Don't care about the message... */
redisFree(c);
@@ -421,6 +461,52 @@ static void test_blocking_connection(struct config config) {
disconnect(c, 0);
}
+static void test_blocking_connection_timeouts(struct config config) {
+ redisContext *c;
+ redisReply *reply;
+ ssize_t s;
+ const char *cmd = "DEBUG SLEEP 3\r\n";
+ struct timeval tv;
+
+ c = connect(config);
+ test("Successfully completes a command when the timeout is not exceeded: ");
+ reply = redisCommand(c,"SET foo fast");
+ freeReplyObject(reply);
+ tv.tv_sec = 0;
+ tv.tv_usec = 10000;
+ redisSetTimeout(c, tv);
+ reply = redisCommand(c, "GET foo");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STRING && memcmp(reply->str, "fast", 4) == 0);
+ freeReplyObject(reply);
+ disconnect(c, 0);
+
+ c = connect(config);
+ test("Does not return a reply when the command times out: ");
+ s = write(c->fd, cmd, strlen(cmd));
+ tv.tv_sec = 0;
+ tv.tv_usec = 10000;
+ redisSetTimeout(c, tv);
+ reply = redisCommand(c, "GET foo");
+ test_cond(s > 0 && reply == NULL && c->err == REDIS_ERR_IO && strcmp(c->errstr, "Resource temporarily unavailable") == 0);
+ freeReplyObject(reply);
+
+ test("Reconnect properly reconnects after a timeout: ");
+ redisReconnect(c);
+ reply = redisCommand(c, "PING");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0);
+ freeReplyObject(reply);
+
+ test("Reconnect properly uses owned parameters: ");
+ config.tcp.host = "foo";
+ config.unix_sock.path = "foo";
+ redisReconnect(c);
+ reply = redisCommand(c, "PING");
+ test_cond(reply != NULL && reply->type == REDIS_REPLY_STATUS && strcmp(reply->str, "PONG") == 0);
+ freeReplyObject(reply);
+
+ disconnect(c, 0);
+}
+
static void test_blocking_io_errors(struct config config) {
redisContext *c;
redisReply *reply;
@@ -444,7 +530,7 @@ static void test_blocking_io_errors(struct config config) {
test("Returns I/O error when the connection is lost: ");
reply = redisCommand(c,"QUIT");
- if (major >= 2 && minor > 0) {
+ if (major > 2 || (major == 2 && minor > 0)) {
/* > 2.0 returns OK on QUIT and read() should be issued once more
* to know the descriptor is at EOF. */
test_cond(strcasecmp(reply->str,"OK") == 0 &&
@@ -482,7 +568,8 @@ static void test_invalid_timeout_errors(struct config config) {
c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout);
- test_cond(c->err == REDIS_ERR_IO);
+ test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0);
+ redisFree(c);
test("Set error when an invalid timeout sec value is given to redisConnectWithTimeout: ");
@@ -491,8 +578,7 @@ static void test_invalid_timeout_errors(struct config config) {
c = redisConnectWithTimeout(config.tcp.host, config.tcp.port, config.tcp.timeout);
- test_cond(c->err == REDIS_ERR_IO);
-
+ test_cond(c->err == REDIS_ERR_IO && strcmp(c->errstr, "Invalid timeout specified") == 0);
redisFree(c);
}
@@ -666,7 +752,7 @@ int main(int argc, char **argv) {
.host = "127.0.0.1",
.port = 6379
},
- .unix = {
+ .unix_sock = {
.path = "/tmp/redis.sock"
}
};
@@ -687,7 +773,7 @@ int main(int argc, char **argv) {
cfg.tcp.port = atoi(argv[0]);
} else if (argc >= 2 && !strcmp(argv[0],"-s")) {
argv++; argc--;
- cfg.unix.path = argv[0];
+ cfg.unix_sock.path = argv[0];
} else if (argc >= 1 && !strcmp(argv[0],"--skip-throughput")) {
throughput = 0;
} else if (argc >= 1 && !strcmp(argv[0],"--skip-inherit-fd")) {
@@ -702,27 +788,31 @@ int main(int argc, char **argv) {
test_format_commands();
test_reply_reader();
test_blocking_connection_errors();
+ test_free_null();
printf("\nTesting against TCP connection (%s:%d):\n", cfg.tcp.host, cfg.tcp.port);
cfg.type = CONN_TCP;
test_blocking_connection(cfg);
+ test_blocking_connection_timeouts(cfg);
test_blocking_io_errors(cfg);
test_invalid_timeout_errors(cfg);
test_append_formatted_commands(cfg);
if (throughput) test_throughput(cfg);
- printf("\nTesting against Unix socket connection (%s):\n", cfg.unix.path);
+ printf("\nTesting against Unix socket connection (%s):\n", cfg.unix_sock.path);
cfg.type = CONN_UNIX;
test_blocking_connection(cfg);
+ test_blocking_connection_timeouts(cfg);
test_blocking_io_errors(cfg);
if (throughput) test_throughput(cfg);
if (test_inherit_fd) {
- printf("\nTesting against inherited fd (%s):\n", cfg.unix.path);
+ printf("\nTesting against inherited fd (%s):\n", cfg.unix_sock.path);
cfg.type = CONN_FD;
test_blocking_connection(cfg);
}
+
if (fails) {
printf("*** %d TESTS FAILED ***\n", fails);
return 1;
diff --git a/deps/hiredis/win32.h b/deps/hiredis/win32.h
new file mode 100644
index 000000000..1a27c18f2
--- /dev/null
+++ b/deps/hiredis/win32.h
@@ -0,0 +1,42 @@
+#ifndef _WIN32_HELPER_INCLUDE
+#define _WIN32_HELPER_INCLUDE
+#ifdef _MSC_VER
+
+#ifndef inline
+#define inline __inline
+#endif
+
+#ifndef va_copy
+#define va_copy(d,s) ((d) = (s))
+#endif
+
+#ifndef snprintf
+#define snprintf c99_snprintf
+
+__inline int c99_vsnprintf(char* str, size_t size, const char* format, va_list ap)
+{
+ int count = -1;
+
+ if (size != 0)
+ count = _vsnprintf_s(str, size, _TRUNCATE, format, ap);
+ if (count == -1)
+ count = _vscprintf(format, ap);
+
+ return count;
+}
+
+__inline int c99_snprintf(char* str, size_t size, const char* format, ...)
+{
+ int count;
+ va_list ap;
+
+ va_start(ap, format);
+ count = c99_vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ return count;
+}
+#endif
+
+#endif
+#endif \ No newline at end of file
diff --git a/deps/hiredis/zmalloc.h b/deps/hiredis/zmalloc.h
deleted file mode 100644
index 99b87ace9..000000000
--- a/deps/hiredis/zmalloc.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* Drop in replacement for zmalloc.h in order to just use libc malloc without
- * any wrappering. */
-
-#ifndef ZMALLOC_H
-#define ZMALLOC_H
-
-#define zmalloc malloc
-#define zrealloc realloc
-#define zcalloc(x) calloc(x,1)
-#define zfree free
-#define zstrdup strdup
-
-#endif
diff --git a/deps/jemalloc/.appveyor.yml b/deps/jemalloc/.appveyor.yml
new file mode 100644
index 000000000..9a7d00a99
--- /dev/null
+++ b/deps/jemalloc/.appveyor.yml
@@ -0,0 +1,42 @@
+version: '{build}'
+
+environment:
+ matrix:
+ - MSYSTEM: MINGW64
+ CPU: x86_64
+ MSVC: amd64
+ - MSYSTEM: MINGW32
+ CPU: i686
+ MSVC: x86
+ - MSYSTEM: MINGW64
+ CPU: x86_64
+ - MSYSTEM: MINGW32
+ CPU: i686
+ - MSYSTEM: MINGW64
+ CPU: x86_64
+ MSVC: amd64
+ CONFIG_FLAGS: --enable-debug
+ - MSYSTEM: MINGW32
+ CPU: i686
+ MSVC: x86
+ CONFIG_FLAGS: --enable-debug
+ - MSYSTEM: MINGW64
+ CPU: x86_64
+ CONFIG_FLAGS: --enable-debug
+ - MSYSTEM: MINGW32
+ CPU: i686
+ CONFIG_FLAGS: --enable-debug
+
+install:
+ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+ - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+ - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+ - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+ - bash -c "autoconf"
+ - bash -c "./configure $CONFIG_FLAGS"
+ - mingw32-make
+ - file lib/jemalloc.dll
+ - mingw32-make tests
+ - mingw32-make -k check
diff --git a/deps/jemalloc/.autom4te.cfg b/deps/jemalloc/.autom4te.cfg
new file mode 100644
index 000000000..fe2424db5
--- /dev/null
+++ b/deps/jemalloc/.autom4te.cfg
@@ -0,0 +1,3 @@
+begin-language: "Autoconf-without-aclocal-m4"
+args: --no-cache
+end-language: "Autoconf-without-aclocal-m4"
diff --git a/deps/jemalloc/.gitattributes b/deps/jemalloc/.gitattributes
new file mode 100644
index 000000000..6313b56c5
--- /dev/null
+++ b/deps/jemalloc/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore
index 4c408ec2c..19199ccb7 100644
--- a/deps/jemalloc/.gitignore
+++ b/deps/jemalloc/.gitignore
@@ -1,8 +1,6 @@
-/*.gcov.*
-
-/autom4te.cache/
-
+/bin/jemalloc-config
/bin/jemalloc.sh
+/bin/jeprof
/config.stamp
/config.log
@@ -15,14 +13,20 @@
/doc/jemalloc.html
/doc/jemalloc.3
+/jemalloc.pc
+
/lib/
/Makefile
-/include/jemalloc/internal/jemalloc_internal.h
+/include/jemalloc/internal/jemalloc_preamble.h
/include/jemalloc/internal/jemalloc_internal_defs.h
+/include/jemalloc/internal/private_namespace.gen.h
/include/jemalloc/internal/private_namespace.h
-/include/jemalloc/internal/private_unnamespace.h
+/include/jemalloc/internal/private_namespace_jet.gen.h
+/include/jemalloc/internal/private_namespace_jet.h
+/include/jemalloc/internal/private_symbols.awk
+/include/jemalloc/internal/private_symbols_jet.awk
/include/jemalloc/internal/public_namespace.h
/include/jemalloc/internal/public_symbols.txt
/include/jemalloc/internal/public_unnamespace.h
@@ -35,10 +39,12 @@
/include/jemalloc/jemalloc_protos.h
/include/jemalloc/jemalloc_protos_jet.h
/include/jemalloc/jemalloc_rename.h
+/include/jemalloc/jemalloc_typedefs.h
/src/*.[od]
-/src/*.gcda
-/src/*.gcno
+/src/*.sym
+
+/run_tests.out/
/test/test.sh
test/include/test/jemalloc_test.h
@@ -47,26 +53,41 @@ test/include/test/jemalloc_test_defs.h
/test/integration/[A-Za-z]*
!/test/integration/[A-Za-z]*.*
/test/integration/*.[od]
-/test/integration/*.gcda
-/test/integration/*.gcno
/test/integration/*.out
+/test/integration/cpp/[A-Za-z]*
+!/test/integration/cpp/[A-Za-z]*.*
+/test/integration/cpp/*.[od]
+/test/integration/cpp/*.out
+
/test/src/*.[od]
-/test/src/*.gcda
-/test/src/*.gcno
/test/stress/[A-Za-z]*
!/test/stress/[A-Za-z]*.*
/test/stress/*.[od]
-/test/stress/*.gcda
-/test/stress/*.gcno
/test/stress/*.out
/test/unit/[A-Za-z]*
!/test/unit/[A-Za-z]*.*
/test/unit/*.[od]
-/test/unit/*.gcda
-/test/unit/*.gcno
/test/unit/*.out
/VERSION
+
+*.pdb
+*.sdf
+*.opendb
+*.VC.db
+*.opensdf
+*.cachefile
+*.suo
+*.user
+*.sln.docstates
+*.tmp
+.vs/
+/msvc/Win32/
+/msvc/x64/
+/msvc/projects/*/*/Debug*/
+/msvc/projects/*/*/Release*/
+/msvc/projects/*/*/Win32/
+/msvc/projects/*/*/x64/
diff --git a/deps/jemalloc/.travis.yml b/deps/jemalloc/.travis.yml
new file mode 100644
index 000000000..4cc116e5f
--- /dev/null
+++ b/deps/jemalloc/.travis.yml
@@ -0,0 +1,156 @@
+language: generic
+dist: precise
+
+matrix:
+ include:
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ addons:
+ apt:
+ packages:
+ - gcc-multilib
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+
+
+before_script:
+ - autoconf
+ - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS" } $CONFIGURE_FLAGS
+ - make -j3
+ - make -j3 tests
+
+script:
+ - make check
+
diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING
index bdda0feb9..98458d971 100644
--- a/deps/jemalloc/COPYING
+++ b/deps/jemalloc/COPYING
@@ -1,10 +1,10 @@
Unless otherwise specified, files in the jemalloc source distribution are
subject to the following license:
--------------------------------------------------------------------------------
-Copyright (C) 2002-2014 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2018 Jason Evans <jasone@canonware.com>.
All rights reserved.
Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
-Copyright (C) 2009-2014 Facebook, Inc. All rights reserved.
+Copyright (C) 2009-2018 Facebook, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog
index d56ee999e..29a00fb78 100644
--- a/deps/jemalloc/ChangeLog
+++ b/deps/jemalloc/ChangeLog
@@ -1,10 +1,844 @@
Following are change highlights associated with official releases. Important
-bug fixes are all mentioned, but internal enhancements are omitted here for
-brevity (even though they are more fun to write about). Much more detail can be
-found in the git revision history:
+bug fixes are all mentioned, but some internal enhancements are omitted here for
+brevity. Much more detail can be found in the git revision history:
https://github.com/jemalloc/jemalloc
+* 5.1.0 (May 4th, 2018)
+
+ This release is primarily about fine-tuning, ranging from several new features
+ to numerous notable performance and portability enhancements. The release and
+ prior dev versions have been running in multiple large scale applications for
+ months, and the cumulative improvements are substantial in many cases.
+
+ Given the long and successful production runs, this release is likely a good
+ candidate for applications to upgrade, from both jemalloc 5.0 and before. For
+ performance-critical applications, the newly added TUNING.md provides
+ guidelines on jemalloc tuning.
+
+ New features:
+ - Implement transparent huge page support for internal metadata. (@interwq)
+ - Add opt.thp to allow enabling / disabling transparent huge pages for all
+ mappings. (@interwq)
+ - Add maximum background thread count option. (@djwatson)
+ - Allow prof_active to control opt.lg_prof_interval and prof.gdump.
+ (@interwq)
+ - Allow arena index lookup based on allocation addresses via mallctl.
+ (@lionkov)
+ - Allow disabling initial-exec TLS model. (@davidtgoldblatt, @KenMacD)
+ - Add opt.lg_extent_max_active_fit to set the max ratio between the size of
+ the active extent selected (to split off from) and the size of the requested
+ allocation. (@interwq, @davidtgoldblatt)
+ - Add retain_grow_limit to set the max size when growing virtual address
+ space. (@interwq)
+ - Add mallctl interfaces:
+ + arena.<i>.retain_grow_limit (@interwq)
+ + arenas.lookup (@lionkov)
+ + max_background_threads (@djwatson)
+ + opt.lg_extent_max_active_fit (@interwq)
+ + opt.max_background_threads (@djwatson)
+ + opt.metadata_thp (@interwq)
+ + opt.thp (@interwq)
+ + stats.metadata_thp (@interwq)
+
+ Portability improvements:
+ - Support GNU/kFreeBSD configuration. (@paravoid)
+ - Support m68k, nios2 and SH3 architectures. (@paravoid)
+ - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable. (@zonyitoo)
+ - Fix symbol listing for cross-compiling. (@tamird)
+ - Fix high bits computation on ARM. (@davidtgoldblatt, @paravoid)
+ - Disable the CPU_SPINWAIT macro for Power. (@davidtgoldblatt, @marxin)
+ - Fix MSVC 2015 & 2017 builds. (@rustyx)
+ - Improve RISC-V support. (@EdSchouten)
+ - Set name mangling script in strict mode. (@nicolov)
+ - Avoid MADV_HUGEPAGE on ARM. (@marxin)
+ - Modify configure to determine return value of strerror_r.
+ (@davidtgoldblatt, @cferris1000)
+ - Make sure CXXFLAGS is tested with CPP compiler. (@nehaljwani)
+ - Fix 32-bit build on MSVC. (@rustyx)
+ - Fix external symbol on MSVC. (@maksqwe)
+ - Avoid a printf format specifier warning. (@jasone)
+ - Add configure option --disable-initial-exec-tls which can allow jemalloc to
+ be dynamically loaded after program startup. (@davidtgoldblatt, @KenMacD)
+ - AArch64: Add ILP32 support. (@cmuellner)
+ - Add --with-lg-vaddr configure option to support cross compiling.
+ (@cmuellner, @davidtgoldblatt)
+
+ Optimizations and refactors:
+ - Improve active extent fit with extent_max_active_fit. This considerably
+ reduces fragmentation over time and improves virtual memory and metadata
+ usage. (@davidtgoldblatt, @interwq)
+ - Eagerly coalesce large extents to reduce fragmentation. (@interwq)
+ - sdallocx: only read size info when page aligned (i.e. possibly sampled),
+ which speeds up the sized deallocation path significantly. (@interwq)
+ - Avoid attempting new mappings for in place expansion with retain, since
+ it rarely succeeds in practice and causes high overhead. (@interwq)
+ - Refactor OOM handling in newImpl. (@wqfish)
+ - Add internal fine-grained logging functionality for debugging use.
+ (@davidtgoldblatt)
+ - Refactor arena / tcache interactions. (@davidtgoldblatt)
+ - Refactor extent management with dumpable flag. (@davidtgoldblatt)
+ - Add runtime detection of lazy purging. (@interwq)
+ - Use pairing heap instead of red-black tree for extents_avail. (@djwatson)
+ - Use sysctl on startup in FreeBSD. (@trasz)
+ - Use thread local prng state instead of atomic. (@djwatson)
+ - Make decay to always purge one more extent than before, because in
+ practice large extents are usually the ones that cross the decay threshold.
+ Purging the additional extent helps save memory as well as reduce VM
+ fragmentation. (@interwq)
+ - Fast division by dynamic values. (@davidtgoldblatt)
+ - Improve the fit for aligned allocation. (@interwq, @edwinsmith)
+ - Refactor extent_t bitpacking. (@rkmisra)
+ - Optimize the generated assembly for ticker operations. (@davidtgoldblatt)
+ - Convert stats printing to use a structured text emitter. (@davidtgoldblatt)
+ - Remove preserve_lru feature for extents management. (@djwatson)
+ - Consolidate two memory loads into one on the fast deallocation path.
+ (@davidtgoldblatt, @interwq)
+
+ Bug fixes (most of the issues are only relevant to jemalloc 5.0):
+ - Fix deadlock with multithreaded fork in OS X. (@davidtgoldblatt)
+ - Validate returned file descriptor before use. (@zonyitoo)
+ - Fix a few background thread initialization and shutdown issues. (@interwq)
+ - Fix an extent coalesce + decay race by taking both coalescing extents off
+ the LRU list. (@interwq)
+ - Fix potentially unbound increase during decay, caused by one thread keep
+ stashing memory to purge while other threads generating new pages. The
+ number of pages to purge is checked to prevent this. (@interwq)
+ - Fix a FreeBSD bootstrap assertion. (@strejda, @interwq)
+ - Handle 32 bit mutex counters. (@rkmisra)
+ - Fix a indexing bug when creating background threads. (@davidtgoldblatt,
+ @binliu19)
+ - Fix arguments passed to extent_init. (@yuleniwo, @interwq)
+ - Fix addresses used for ordering mutexes. (@rkmisra)
+ - Fix abort_conf processing during bootstrap. (@interwq)
+ - Fix include path order for out-of-tree builds. (@cmuellner)
+
+ Incompatible changes:
+ - Remove --disable-thp. (@interwq)
+ - Remove mallctl interfaces:
+ + config.thp (@interwq)
+
+ Documentation:
+ - Add TUNING.md. (@interwq, @davidtgoldblatt, @djwatson)
+
+* 5.0.1 (July 1, 2017)
+
+ This bugfix release fixes several issues, most of which are obscure enough
+ that typical applications are not impacted.
+
+ Bug fixes:
+ - Update decay->nunpurged before purging, in order to avoid potential update
+ races and subsequent incorrect purging volume. (@interwq)
+ - Only abort on dlsym(3) error if the failure impacts an enabled feature (lazy
+ locking and/or background threads). This mitigates an initialization
+ failure bug for which we still do not have a clear reproduction test case.
+ (@interwq)
+ - Modify tsd management so that it neither crashes nor leaks if a thread's
+ only allocation activity is to call free() after TLS destructors have been
+ executed. This behavior was observed when operating with GNU libc, and is
+ unlikely to be an issue with other libc implementations. (@interwq)
+ - Mask signals during background thread creation. This prevents signals from
+ being inadvertently delivered to background threads. (@jasone,
+ @davidtgoldblatt, @interwq)
+ - Avoid inactivity checks within background threads, in order to prevent
+ recursive mutex acquisition. (@interwq)
+ - Fix extent_grow_retained() to use the specified hooks when the
+ arena.<i>.extent_hooks mallctl is used to override the default hooks.
+ (@interwq)
+ - Add missing reentrancy support for custom extent hooks which allocate.
+ (@interwq)
+ - Post-fork(2), re-initialize the list of tcaches associated with each arena
+ to contain no tcaches except the forking thread's. (@interwq)
+ - Add missing post-fork(2) mutex reinitialization for extent_grow_mtx. This
+ fixes potential deadlocks after fork(2). (@interwq)
+ - Enforce minimum autoconf version (currently 2.68), since 2.63 is known to
+ generate corrupt configure scripts. (@jasone)
+ - Ensure that the configured page size (--with-lg-page) is no larger than the
+ configured huge page size (--with-lg-hugepage). (@jasone)
+
+* 5.0.0 (June 13, 2017)
+
+ Unlike all previous jemalloc releases, this release does not use naturally
+ aligned "chunks" for virtual memory management, and instead uses page-aligned
+ "extents". This change has few externally visible effects, but the internal
+ impacts are... extensive. Many other internal changes combine to make this
+ the most cohesively designed version of jemalloc so far, with ample
+ opportunity for further enhancements.
+
+ Continuous integration is now an integral aspect of development thanks to the
+ efforts of @davidtgoldblatt, and the dev branch tends to remain reasonably
+ stable on the tested platforms (Linux, FreeBSD, macOS, and Windows). As a
+ side effect the official release frequency may decrease over time.
+
+ New features:
+ - Implement optional per-CPU arena support; threads choose which arena to use
+ based on current CPU rather than on fixed thread-->arena associations.
+ (@interwq)
+ - Implement two-phase decay of unused dirty pages. Pages transition from
+ dirty-->muzzy-->clean, where the first phase transition relies on
+ madvise(... MADV_FREE) semantics, and the second phase transition discards
+ pages such that they are replaced with demand-zeroed pages on next access.
+ (@jasone)
+ - Increase decay time resolution from seconds to milliseconds. (@jasone)
+ - Implement opt-in per CPU background threads, and use them for asynchronous
+ decay-driven unused dirty page purging. (@interwq)
+ - Add mutex profiling, which collects a variety of statistics useful for
+ diagnosing overhead/contention issues. (@interwq)
+ - Add C++ new/delete operator bindings. (@djwatson)
+ - Support manually created arena destruction, such that all data and metadata
+ are discarded. Add MALLCTL_ARENAS_DESTROYED for accessing merged stats
+ associated with destroyed arenas. (@jasone)
+ - Add MALLCTL_ARENAS_ALL as a fixed index for use in accessing
+ merged/destroyed arena statistics via mallctl. (@jasone)
+ - Add opt.abort_conf to optionally abort if invalid configuration options are
+ detected during initialization. (@interwq)
+ - Add opt.stats_print_opts, so that e.g. JSON output can be selected for the
+ stats dumped during exit if opt.stats_print is true. (@jasone)
+ - Add --with-version=VERSION for use when embedding jemalloc into another
+ project's git repository. (@jasone)
+ - Add --disable-thp to support cross compiling. (@jasone)
+ - Add --with-lg-hugepage to support cross compiling. (@jasone)
+ - Add mallctl interfaces (various authors):
+ + background_thread
+ + opt.abort_conf
+ + opt.retain
+ + opt.percpu_arena
+ + opt.background_thread
+ + opt.{dirty,muzzy}_decay_ms
+ + opt.stats_print_opts
+ + arena.<i>.initialized
+ + arena.<i>.destroy
+ + arena.<i>.{dirty,muzzy}_decay_ms
+ + arena.<i>.extent_hooks
+ + arenas.{dirty,muzzy}_decay_ms
+ + arenas.bin.<i>.slab_size
+ + arenas.nlextents
+ + arenas.lextent.<i>.size
+ + arenas.create
+ + stats.background_thread.{num_threads,num_runs,run_interval}
+ + stats.mutexes.{ctl,background_thread,prof,reset}.
+ {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+ num_owner_switch}
+ + stats.arenas.<i>.{dirty,muzzy}_decay_ms
+ + stats.arenas.<i>.uptime
+ + stats.arenas.<i>.{pmuzzy,base,internal,resident}
+ + stats.arenas.<i>.{dirty,muzzy}_{npurge,nmadvise,purged}
+ + stats.arenas.<i>.bins.<j>.{nslabs,reslabs,curslabs}
+ + stats.arenas.<i>.bins.<j>.mutex.
+ {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+ num_owner_switch}
+ + stats.arenas.<i>.lextents.<j>.{nmalloc,ndalloc,nrequests,curlextents}
+ + stats.arenas.i.mutexes.{large,extent_avail,extents_dirty,extents_muzzy,
+ extents_retained,decay_dirty,decay_muzzy,base,tcache_list}.
+ {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+ num_owner_switch}
+
+ Portability improvements:
+ - Improve reentrant allocation support, such that deadlock is less likely if
+ e.g. a system library call in turn allocates memory. (@davidtgoldblatt,
+ @interwq)
+ - Support static linking of jemalloc with glibc. (@djwatson)
+
+ Optimizations and refactors:
+ - Organize virtual memory as "extents" of virtual memory pages, rather than as
+ naturally aligned "chunks", and store all metadata in arbitrarily distant
+ locations. This reduces virtual memory external fragmentation, and will
+ interact better with huge pages (not yet explicitly supported). (@jasone)
+ - Fold large and huge size classes together; only small and large size classes
+ remain. (@jasone)
+ - Unify the allocation paths, and merge most fast-path branching decisions.
+ (@davidtgoldblatt, @interwq)
+ - Embed per thread automatic tcache into thread-specific data, which reduces
+ conditional branches and dereferences. Also reorganize tcache to increase
+ fast-path data locality. (@interwq)
+ - Rewrite atomics to closely model the C11 API, convert various
+ synchronization from mutex-based to atomic, and use the explicit memory
+ ordering control to resolve various hypothetical races without increasing
+ synchronization overhead. (@davidtgoldblatt)
+ - Extensively optimize rtree via various methods:
+ + Add multiple layers of rtree lookup caching, since rtree lookups are now
+ part of fast-path deallocation. (@interwq)
+ + Determine rtree layout at compile time. (@jasone)
+ + Make the tree shallower for common configurations. (@jasone)
+ + Embed the root node in the top-level rtree data structure, thus avoiding
+ one level of indirection. (@jasone)
+ + Further specialize leaf elements as compared to internal node elements,
+ and directly embed extent metadata needed for fast-path deallocation.
+ (@jasone)
+ + Ignore leading always-zero address bits (architecture-specific).
+ (@jasone)
+ - Reorganize headers (ongoing work) to make them hermetic, and disentangle
+ various module dependencies. (@davidtgoldblatt)
+ - Convert various internal data structures such as size class metadata from
+ boot-time-initialized to compile-time-initialized. Propagate resulting data
+ structure simplifications, such as making arena metadata fixed-size.
+ (@jasone)
+ - Simplify size class lookups when constrained to size classes that are
+ multiples of the page size. This speeds lookups, but the primary benefit is
+ complexity reduction in code that was the source of numerous regressions.
+ (@jasone)
+ - Lock individual extents when possible for localized extent operations,
+ rather than relying on a top-level arena lock. (@davidtgoldblatt, @jasone)
+ - Use first fit layout policy instead of best fit, in order to improve
+ packing. (@jasone)
+ - If munmap(2) is not in use, use an exponential series to grow each arena's
+ virtual memory, so that the number of disjoint virtual memory mappings
+ remains low. (@jasone)
+ - Implement per arena base allocators, so that arenas never share any virtual
+ memory pages. (@jasone)
+ - Automatically generate private symbol name mangling macros. (@jasone)
+
+ Incompatible changes:
+ - Replace chunk hooks with an expanded/normalized set of extent hooks.
+ (@jasone)
+ - Remove ratio-based purging. (@jasone)
+ - Remove --disable-tcache. (@jasone)
+ - Remove --disable-tls. (@jasone)
+ - Remove --enable-ivsalloc. (@jasone)
+ - Remove --with-lg-size-class-group. (@jasone)
+ - Remove --with-lg-tiny-min. (@jasone)
+ - Remove --disable-cc-silence. (@jasone)
+ - Remove --enable-code-coverage. (@jasone)
+ - Remove --disable-munmap (replaced by opt.retain). (@jasone)
+ - Remove Valgrind support. (@jasone)
+ - Remove quarantine support. (@jasone)
+ - Remove redzone support. (@jasone)
+ - Remove mallctl interfaces (various authors):
+ + config.munmap
+ + config.tcache
+ + config.tls
+ + config.valgrind
+ + opt.lg_chunk
+ + opt.purge
+ + opt.lg_dirty_mult
+ + opt.decay_time
+ + opt.quarantine
+ + opt.redzone
+ + opt.thp
+ + arena.<i>.lg_dirty_mult
+ + arena.<i>.decay_time
+ + arena.<i>.chunk_hooks
+ + arenas.initialized
+ + arenas.lg_dirty_mult
+ + arenas.decay_time
+ + arenas.bin.<i>.run_size
+ + arenas.nlruns
+ + arenas.lrun.<i>.size
+ + arenas.nhchunks
+ + arenas.hchunk.<i>.size
+ + arenas.extend
+ + stats.cactive
+ + stats.arenas.<i>.lg_dirty_mult
+ + stats.arenas.<i>.decay_time
+ + stats.arenas.<i>.metadata.{mapped,allocated}
+ + stats.arenas.<i>.{npurge,nmadvise,purged}
+ + stats.arenas.<i>.huge.{allocated,nmalloc,ndalloc,nrequests}
+ + stats.arenas.<i>.bins.<j>.{nruns,reruns,curruns}
+ + stats.arenas.<i>.lruns.<j>.{nmalloc,ndalloc,nrequests,curruns}
+ + stats.arenas.<i>.hchunks.<j>.{nmalloc,ndalloc,nrequests,curhchunks}
+
+ Bug fixes:
+ - Improve interval-based profile dump triggering to dump only one profile when
+ a single allocation's size exceeds the interval. (@jasone)
+ - Use prefixed function names (as controlled by --with-jemalloc-prefix) when
+ pruning backtrace frames in jeprof. (@jasone)
+
+* 4.5.0 (February 28, 2017)
+
+ This is the first release to benefit from much broader continuous integration
+ testing, thanks to @davidtgoldblatt. Had we had this testing infrastructure
+ in place for prior releases, it would have caught all of the most serious
+ regressions fixed by this release.
+
+ New features:
+ - Add --disable-thp and the opt.thp mallctl to provide opt-out mechanisms for
+ transparent huge page integration. (@jasone)
+ - Update zone allocator integration to work with macOS 10.12. (@glandium)
+ - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and
+ EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not
+ during configuration. (@jasone, @ronawho)
+
+ Bug fixes:
+ - Fix DSS (sbrk(2)-based) allocation. This regression was first released in
+ 4.3.0. (@jasone)
+ - Handle race in per size class utilization computation. This functionality
+ was first released in 4.0.0. (@interwq)
+ - Fix lock order reversal during gdump. (@jasone)
+ - Fix/refactor tcache synchronization. This regression was first released in
+ 4.0.0. (@jasone)
+ - Fix various JSON-formatted malloc_stats_print() bugs. This functionality
+ was first released in 4.3.0. (@jasone)
+ - Fix huge-aligned allocation. This regression was first released in 4.4.0.
+ (@jasone)
+ - When transparent huge page integration is enabled, detect what state pages
+ start in according to the kernel's current operating mode, and only convert
+ arena chunks to non-huge during purging if that is not their initial state.
+ This functionality was first released in 4.4.0. (@jasone)
+ - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case.
+ This regression was first released in 4.0.0. (@jasone, @428desmo)
+ - Properly detect sparc64 when building for Linux. (@glaubitz)
+
+* 4.4.0 (December 3, 2016)
+
+ New features:
+ - Add configure support for *-*-linux-android. (@cferris1000, @jasone)
+ - Add the --disable-syscall configure option, for use on systems that place
+ security-motivated limitations on syscall(2). (@jasone)
+ - Add support for Debian GNU/kFreeBSD. (@thesam)
+
+ Optimizations:
+ - Add extent serial numbers and use them where appropriate as a sort key that
+ is higher priority than address, so that the allocation policy prefers older
+ extents. This tends to improve locality (decrease fragmentation) when
+ memory grows downward. (@jasone)
+ - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized
+ on Linux 4.5 and newer. (@jasone)
+ - Mark partially purged arena chunks as non-huge-page. This improves
+ interaction with Linux's transparent huge page functionality. (@jasone)
+
+ Bug fixes:
+ - Fix size class computations for edge conditions involving extremely large
+ allocations. This regression was first released in 4.0.0. (@jasone,
+ @ingvarha)
+ - Remove overly restrictive assertions related to the cactive statistic. This
+ regression was first released in 4.1.0. (@jasone)
+ - Implement a more reliable detection scheme for os_unfair_lock on macOS.
+ (@jszakmeister)
+
+* 4.3.1 (November 7, 2016)
+
+ Bug fixes:
+ - Fix a severe virtual memory leak. This regression was first released in
+ 4.3.0. (@interwq, @jasone)
+ - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+ use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone)
+
+* 4.3.0 (November 4, 2016)
+
+ This is the first release that passes the test suite for multiple Windows
+ configurations, thanks in large part to @glandium setting up continuous
+ integration via AppVeyor (and Travis CI for Linux and OS X).
+
+ New features:
+ - Add "J" (JSON) support to malloc_stats_print(). (@jasone)
+ - Add Cray compiler support. (@ronawho)
+
+ Optimizations:
+ - Add/use adaptive spinning for bootstrapping and radix tree node
+ initialization. (@jasone)
+
+ Bug fixes:
+ - Fix large allocation to search starting in the optimal size class heap,
+ which can substantially reduce virtual memory churn and fragmentation. This
+ regression was first released in 4.0.0. (@mjp41, @jasone)
+ - Fix stats.arenas.<i>.nthreads accounting. (@interwq)
+ - Fix and simplify decay-based purging. (@jasone)
+ - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+ deadlocks during thread exit. (@jasone)
+ - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun,
+ @jasone)
+ - Fix over-sized allocation of arena_t (plus associated stats) data
+ structures. (@jasone, @interwq)
+ - Fix EXTRA_CFLAGS to not affect configuration. (@jasone)
+ - Fix a Valgrind integration bug. (@ronawho)
+ - Disallow 0x5a junk filling when running in Valgrind. (@jasone)
+ - Fix a file descriptor leak on Linux. This regression was first released in
+ 4.2.0. (@vsarunas, @jasone)
+ - Fix static linking of jemalloc with glibc. (@djwatson)
+ - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This
+ works around other libraries' system call wrappers performing reentrant
+ allocation. (@kspinka, @Whissi, @jasone)
+ - Fix OS X default zone replacement to work with OS X 10.12. (@glandium,
+ @jasone)
+ - Fix cached memory management to avoid needless commit/decommit operations
+ during purging, which resolves permanent virtual memory map fragmentation
+ issues on Windows. (@mjp41, @jasone)
+ - Fix TSD fetches to avoid (recursive) allocation. This is relevant to
+ non-TLS and Windows configurations. (@jasone)
+ - Fix malloc_conf overriding to work on Windows. (@jasone)
+ - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone)
+
+* 4.2.1 (June 8, 2016)
+
+ Bug fixes:
+ - Fix bootstrapping issues for configurations that require allocation during
+ tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone)
+ - Fix gettimeofday() version of nstime_update(). (@ronawho)
+ - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho)
+ - Fix potential VM map fragmentation regression. (@jasone)
+ - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone)
+ - Fix heap profiling context leaks in reallocation edge cases. (@jasone)
+
+* 4.2.0 (May 12, 2016)
+
+ New features:
+ - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+ an arena's allocations in a single operation. (@jasone)
+ - Add the stats.retained and stats.arenas.<i>.retained statistics. (@jasone)
+ - Add the --with-version configure option. (@jasone)
+ - Support --with-lg-page values larger than actual page size. (@jasone)
+
+ Optimizations:
+ - Use pairing heaps rather than red-black trees for various hot data
+ structures. (@djwatson, @jasone)
+ - Streamline fast paths of rtree operations. (@jasone)
+ - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone)
+ - Decommit unused virtual memory if the OS does not overcommit. (@jasone)
+ - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+ to avoid unfortunate interactions during fork(2). (@jasone)
+
+ Bug fixes:
+ - Fix chunk accounting related to triggering gdump profiles. (@jasone)
+ - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone)
+ - Scale leak report summary according to sampling probability. (@jasone)
+
+* 4.1.1 (May 3, 2016)
+
+ This bugfix release resolves a variety of mostly minor issues, though the
+ bitmap fix is critical for 64-bit Windows.
+
+ Bug fixes:
+ - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
+ even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
+ Windows. (@jasone)
+ - Fix hashing functions to avoid unaligned memory accesses (and resulting
+ crashes). This is relevant at least to some ARM-based platforms.
+ (@rkmisra)
+ - Fix fork()-related lock rank ordering reversals. These reversals were
+ unlikely to cause deadlocks in practice except when heap profiling was
+ enabled and active. (@jasone)
+ - Fix various chunk leaks in OOM code paths. (@jasone)
+ - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone)
+ - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin)
+ - Fix a variety of test failures that were due to test fragility rather than
+ core bugs. (@jasone)
+
+* 4.1.0 (February 28, 2016)
+
+ This release is primarily about optimizations, but it also incorporates a lot
+ of portability-motivated refactoring and enhancements. Many people worked on
+ this release, to an extent that even with the omission here of minor changes
+ (see git revision history), and of the people who reported and diagnosed
+ issues, so much of the work was contributed that starting with this release,
+ changes are annotated with author credits to help reflect the collaborative
+ effort involved.
+
+ New features:
+ - Implement decay-based unused dirty page purging, a major optimization with
+ mallctl API impact. This is an alternative to the existing ratio-based
+ unused dirty page purging, and is intended to eventually become the sole
+ purging mechanism. New mallctls:
+ + opt.purge
+ + opt.decay_time
+ + arena.<i>.decay
+ + arena.<i>.decay_time
+ + arenas.decay_time
+ + stats.arenas.<i>.decay_time
+ (@jasone, @cevans87)
+ - Add --with-malloc-conf, which makes it possible to embed a default
+ options string during configuration. This was motivated by the desire to
+ specify --with-malloc-conf=purge:decay , since the default must remain
+ purge:ratio until the 5.0.0 release. (@jasone)
+ - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin)
+ - Make *allocx() size class overflow behavior defined. The maximum
+ size class is now less than PTRDIFF_MAX to protect applications against
+ numerical overflow, and all allocation functions are guaranteed to indicate
+ errors rather than potentially crashing if the request size exceeds the
+ maximum size class. (@jasone)
+ - jeprof:
+ + Add raw heap profile support. (@jasone)
+ + Add --retain and --exclude for backtrace symbol filtering. (@jasone)
+
+ Optimizations:
+ - Optimize the fast path to combine various bootstrapping and configuration
+ checks and execute more streamlined code in the common case. (@interwq)
+ - Use linear scan for small bitmaps (used for small object tracking). In
+ addition to speeding up bitmap operations on 64-bit systems, this reduces
+ allocator metadata overhead by approximately 0.2%. (@djwatson)
+ - Separate arena_avail trees, which substantially speeds up run tree
+ operations. (@djwatson)
+ - Use memoization (boot-time-computed table) for run quantization. Separate
+ arena_avail trees reduced the importance of this optimization. (@jasone)
+ - Attempt mmap-based in-place huge reallocation. This can dramatically speed
+ up incremental huge reallocation. (@jasone)
+
+ Incompatible changes:
+ - Make opt.narenas unsigned rather than size_t. (@jasone)
+
+ Bug fixes:
+ - Fix stats.cactive accounting regression. (@rustyx, @jasone)
+ - Handle unaligned keys in hash(). This caused problems for some ARM systems.
+ (@jasone, @cferris1000)
+ - Refactor arenas array. In addition to fixing a fork-related deadlock, this
+ makes arena lookups faster and simpler. (@jasone)
+ - Move retained memory allocation out of the default chunk allocation
+ function, to a location that gets executed even if the application installs
+ a custom chunk allocation function. This resolves a virtual memory leak.
+ (@buchgr)
+ - Fix a potential tsd cleanup leak. (@cferris1000, @jasone)
+ - Fix run quantization. In practice this bug had no impact unless
+ applications requested memory with alignment exceeding one page.
+ (@jasone, @djwatson)
+ - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv)
+ - jeprof:
+ + Don't discard curl options if timeout is not defined. (@djwatson)
+ + Detect failed profile fetches. (@djwatson)
+ - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
+ --disable-stats case. (@jasone)
+
+* 4.0.4 (October 24, 2015)
+
+ This bugfix release fixes another xallocx() regression. No other regressions
+ have come to light in over a month, so this is likely a good starting point
+ for people who prefer to wait for "dot one" releases with all the major issues
+ shaken out.
+
+ Bug fixes:
+ - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large
+ allocations that have been randomly assigned an offset of 0 when
+ --enable-cache-oblivious configure option is enabled.
+
+* 4.0.3 (September 24, 2015)
+
+ This bugfix release continues the trend of xallocx() and heap profiling fixes.
+
+ Bug fixes:
+ - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large
+ allocations when --enable-cache-oblivious configure option is enabled.
+ - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations
+ when resizing from/to a size class that is not a multiple of the chunk size.
+ - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap
+ profile dumping started.
+ - Work around a potentially bad thread-specific data initialization
+ interaction with NPTL (glibc's pthreads implementation).
+
+* 4.0.2 (September 21, 2015)
+
+ This bugfix release addresses a few bugs specific to heap profiling.
+
+ Bug fixes:
+ - Fix ixallocx_prof_sample() to never modify nor create sampled small
+ allocations. xallocx() is in general incapable of moving small allocations,
+ so this fix removes buggy code without loss of generality.
+ - Fix irallocx_prof_sample() to always allocate large regions, even when
+ alignment is non-zero.
+ - Fix prof_alloc_rollback() to read tdata from thread-specific data rather
+ than dereferencing a potentially invalid tctx.
+
+* 4.0.1 (September 15, 2015)
+
+ This is a bugfix release that is somewhat high risk due to the amount of
+ refactoring required to address deep xallocx() problems. As a side effect of
+ these fixes, xallocx() now tries harder to partially fulfill requests for
+ optional extra space. Note that a couple of minor heap profiling
+ optimizations are included, but these are better thought of as performance
+ fixes that were integral to discovering most of the other bugs.
+
+ Optimizations:
+ - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the
+ fast path when heap profiling is enabled. Additionally, split a special
+ case out into arena_prof_tctx_reset(), which also avoids chunk metadata
+ reads.
+ - Optimize irallocx_prof() to optimistically update the sampler state. The
+ prior implementation appears to have been a holdover from when
+ rallocx()/xallocx() functionality was combined as rallocm().
+
+ Bug fixes:
+ - Fix TLS configuration such that it is enabled by default for platforms on
+ which it works correctly.
+ - Fix arenas_cache_cleanup() and arena_get_hard() to handle
+ allocation/deallocation within the application's thread-specific data
+ cleanup functions even after arenas_cache is torn down.
+ - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS.
+ - Fix chunk purge hook calls for in-place huge shrinking reallocation to
+ specify the old chunk size rather than the new chunk size. This bug caused
+ no correctness issues for the default chunk purge function, but was
+ visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
+ - Fix heap profiling bugs:
+ + Fix heap profiling to distinguish among otherwise identical sample sites
+ with interposed resets (triggered via the "prof.reset" mallctl). This bug
+ could cause data structure corruption that would most likely result in a
+ segfault.
+ + Fix irealloc_prof() to prof_alloc_rollback() on OOM.
+ + Make one call to prof_active_get_unlocked() per allocation event, and use
+ the result throughout the relevant functions that handle an allocation
+ event. Also add a missing check in prof_realloc(). These fixes protect
+ allocation events against concurrent prof_active changes.
+ + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample()
+ in the correct order.
+ + Fix prof_realloc() to call prof_free_sampled_object() after calling
+ prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were
+ the same, the tctx could have been prematurely destroyed.
+ - Fix portability bugs:
+ + Don't bitshift by negative amounts when encoding/decoding run sizes in
+ chunk header maps. This affected systems with page sizes greater than 8
+ KiB.
+ + Rename index_t to szind_t to avoid an existing type on Solaris.
+ + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
+ match glibc and avoid compilation errors when including both
+ jemalloc/jemalloc.h and malloc.h in C++ code.
+ + Don't assume that /bin/sh is appropriate when running size_classes.sh
+ during configuration.
+ + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM.
+ + Link tests to librt if it contains clock_gettime(2).
+
+* 4.0.0 (August 17, 2015)
+
+ This version contains many speed and space optimizations, both minor and
+ major. The major themes are generalization, unification, and simplification.
+ Although many of these optimizations cause no visible behavior change, their
+ cumulative effect is substantial.
+
+ New features:
+ - Normalize size class spacing to be consistent across the complete size
+ range. By default there are four size classes per size doubling, but this
+ is now configurable via the --with-lg-size-class-group option. Also add the
+ --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and
+ --with-lg-tiny-min options, which can be used to tweak page and size class
+ settings. Impacts:
+ + Worst case performance for incrementally growing/shrinking reallocation
+ is improved because there are far fewer size classes, and therefore
+ copying happens less often.
+ + Internal fragmentation is limited to 20% for all but the smallest size
+ classes (those less than four times the quantum). (1B + 4 KiB)
+ and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation.
+ + Chunk fragmentation tends to be lower because there are fewer distinct run
+ sizes to pack.
+ - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and
+ "tcache.destroy" mallctls control tcache lifetime and flushing, and the
+ MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API
+ control which tcache is used for each operation.
+ - Implement per thread heap profiling, as well as the ability to
+ enable/disable heap profiling on a per thread basis. Add the "prof.reset",
+ "prof.lg_sample", "thread.prof.name", "thread.prof.active",
+ "opt.prof_thread_active_init", "prof.thread_active_init", and
+ "thread.prof.active" mallctls.
+ - Add support for per arena application-specified chunk allocators, configured
+ via the "arena.<i>.chunk_hooks" mallctl.
+ - Refactor huge allocation to be managed by arenas, so that arenas now
+ function as general purpose independent allocators. This is important in
+ the context of user-specified chunk allocators, aside from the scalability
+ benefits. Related new statistics:
+ + The "stats.arenas.<i>.huge.allocated", "stats.arenas.<i>.huge.nmalloc",
+ "stats.arenas.<i>.huge.ndalloc", and "stats.arenas.<i>.huge.nrequests"
+ mallctls provide high level per arena huge allocation statistics.
+ + The "arenas.nhchunks", "arenas.hchunk.<i>.size",
+ "stats.arenas.<i>.hchunks.<j>.nmalloc",
+ "stats.arenas.<i>.hchunks.<j>.ndalloc",
+ "stats.arenas.<i>.hchunks.<j>.nrequests", and
+ "stats.arenas.<i>.hchunks.<j>.curhchunks" mallctls provide per size class
+ statistics.
+ - Add the 'util' column to malloc_stats_print() output, which reports the
+ proportion of available regions that are currently in use for each small
+ size class.
+ - Add "alloc" and "free" modes for for junk filling (see the "opt.junk"
+ mallctl), so that it is possible to separately enable junk filling for
+ allocation versus deallocation.
+ - Add the jemalloc-config script, which provides information about how
+ jemalloc was configured, and how to integrate it into application builds.
+ - Add metadata statistics, which are accessible via the "stats.metadata",
+ "stats.arenas.<i>.metadata.mapped", and
+ "stats.arenas.<i>.metadata.allocated" mallctls.
+ - Add the "stats.resident" mallctl, which reports the upper limit of
+ physically resident memory mapped by the allocator.
+ - Add per arena control over unused dirty page purging, via the
+ "arenas.lg_dirty_mult", "arena.<i>.lg_dirty_mult", and
+ "stats.arenas.<i>.lg_dirty_mult" mallctls.
+ - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump
+ feature on/off during program execution.
+ - Add sdallocx(), which implements sized deallocation. The primary
+ optimization over dallocx() is the removal of a metadata read, which often
+ suffers an L1 cache miss.
+ - Add missing header includes in jemalloc/jemalloc.h, so that applications
+ only have to #include <jemalloc/jemalloc.h>.
+ - Add support for additional platforms:
+ + Bitrig
+ + Cygwin
+ + DragonFlyBSD
+ + iOS
+ + OpenBSD
+ + OpenRISC/or1k
+
+ Optimizations:
+ - Maintain dirty runs in per arena LRUs rather than in per arena trees of
+ dirty-run-containing chunks. In practice this change significantly reduces
+ dirty page purging volume.
+ - Integrate whole chunks into the unused dirty page purging machinery. This
+ reduces the cost of repeated huge allocation/deallocation, because it
+ effectively introduces a cache of chunks.
+ - Split the arena chunk map into two separate arrays, in order to increase
+ cache locality for the frequently accessed bits.
+ - Move small run metadata out of runs, into arena chunk headers. This reduces
+ run fragmentation, smaller runs reduce external fragmentation for small size
+ classes, and packed (less uniformly aligned) metadata layout improves CPU
+ cache set distribution.
+ - Randomly distribute large allocation base pointer alignment relative to page
+ boundaries in order to more uniformly utilize CPU cache sets. This can be
+ disabled via the --disable-cache-oblivious configure option, and queried via
+ the "config.cache_oblivious" mallctl.
+ - Micro-optimize the fast paths for the public API functions.
+ - Refactor thread-specific data to reside in a single structure. This assures
+ that only a single TLS read is necessary per call into the public API.
+ - Implement in-place huge allocation growing and shrinking.
+ - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make
+ additional optimizations that reduce maximum lookup depth to one or two
+ levels. This resolves what was a concurrency bottleneck for per arena huge
+ allocation, because a global data structure is critical for determining
+ which arenas own which huge allocations.
+
+ Incompatible changes:
+ - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious
+ warnings by default.
+ - Assure that the constness of malloc_usable_size()'s return type matches that
+ of the system implementation.
+ - Change the heap profile dump format to support per thread heap profiling,
+ rename pprof to jeprof, and enhance it with the --thread=<n> option. As a
+ result, the bundled jeprof must now be used rather than the upstream
+ (gperftools) pprof.
+ - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can
+ internally deadlock on some platforms.
+ - Change the "arenas.nlruns" mallctl type from size_t to unsigned.
+ - Replace the "stats.arenas.<i>.bins.<j>.allocated" mallctl with
+ "stats.arenas.<i>.bins.<j>.curregs".
+ - Ignore MALLOC_CONF in set{uid,gid,cap} binaries.
+ - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the
+ MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage.
+
+ Removed features:
+ - Remove the *allocm() API, which is superseded by the *allocx() API.
+ - Remove the --enable-dss options, and make dss non-optional on all platforms
+ which support sbrk(2).
+ - Remove the "arenas.purge" mallctl, which was obsoleted by the
+ "arena.<i>.purge" mallctl in 3.1.0.
+ - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically
+ detects whether it is running inside Valgrind.
+ - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and
+ "stats.huge.ndalloc" mallctls.
+ - Remove the --enable-mremap option.
+ - Remove the "stats.chunks.current", "stats.chunks.total", and
+ "stats.chunks.high" mallctls.
+
+ Bug fixes:
+ - Fix the cactive statistic to decrease (rather than increase) when active
+ memory decreases. This regression was first released in 3.5.0.
+ - Fix OOM handling in memalign() and valloc(). A variant of this bug existed
+ in all releases since 2.0.0, which introduced these functions.
+ - Fix an OOM-related regression in arena_tcache_fill_small(), which could
+ cause cache corruption on OOM. This regression was present in all releases
+ from 2.2.0 through 3.6.0.
+ - Fix size class overflow handling for malloc(), posix_memalign(), memalign(),
+ calloc(), and realloc() when profiling is enabled.
+ - Fix the "arena.<i>.dss" mallctl to return an error if "primary" or
+ "secondary" precedence is specified, but sbrk(2) is not supported.
+ - Fix fallback lg_floor() implementations to handle extremely large inputs.
+ - Ensure the default purgeable zone is after the default zone on OS X.
+ - Fix latent bugs in atomic_*().
+ - Fix the "arena.<i>.dss" mallctl to handle read-only calls.
+ - Fix tls_model configuration to enable the initial-exec model when possible.
+ - Mark malloc_conf as a weak symbol so that the application can override it.
+ - Correctly detect glibc's adaptive pthread mutexes.
+ - Fix the --without-export configure option.
+
* 3.6.0 (March 31, 2014)
This version contains a critical bug fix for a regression present in 3.5.0 and
@@ -21,7 +855,7 @@ found in the git revision history:
backtracing to be reliable.
- Use dss allocation precedence for huge allocations as well as small/large
allocations.
- - Fix test assertion failure message formatting. This bug did not manifect on
+ - Fix test assertion failure message formatting. This bug did not manifest on
x86_64 systems because of implementation subtleties in va_list.
- Fix inconsequential test failures for hash and SFMT code.
@@ -516,7 +1350,7 @@ found in the git revision history:
- Make it possible for the application to manually flush a thread's cache, via
the "tcache.flush" mallctl.
- Base maximum dirty page count on proportion of active memory.
- - Compute various addtional run-time statistics, including per size class
+ - Compute various additional run-time statistics, including per size class
statistics for large objects.
- Expose malloc_stats_print(), which can be called repeatedly by the
application.
diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL
deleted file mode 100644
index 841704d2a..000000000
--- a/deps/jemalloc/INSTALL
+++ /dev/null
@@ -1,306 +0,0 @@
-Building and installing jemalloc can be as simple as typing the following while
-in the root directory of the source tree:
-
- ./configure
- make
- make install
-
-=== Advanced configuration =====================================================
-
-The 'configure' script supports numerous options that allow control of which
-functionality is enabled, where jemalloc is installed, etc. Optionally, pass
-any of the following arguments (not a definitive list) to 'configure':
-
---help
- Print a definitive list of options.
-
---prefix=<install-root-dir>
- Set the base directory in which to install. For example:
-
- ./configure --prefix=/usr/local
-
- will cause files to be installed into /usr/local/include, /usr/local/lib,
- and /usr/local/man.
-
---with-rpath=<colon-separated-rpath>
- Embed one or more library paths, so that libjemalloc can find the libraries
- it is linked to. This works only on ELF-based systems.
-
---with-mangling=<map>
- Mangle public symbols specified in <map> which is a comma-separated list of
- name:mangled pairs.
-
- For example, to use ld's --wrap option as an alternative method for
- overriding libc's malloc implementation, specify something like:
-
- --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...]
-
- Note that mangling happens prior to application of the prefix specified by
- --with-jemalloc-prefix, and mangled symbols are then ignored when applying
- the prefix.
-
---with-jemalloc-prefix=<prefix>
- Prefix all public APIs with <prefix>. For example, if <prefix> is
- "prefix_", API changes like the following occur:
-
- malloc() --> prefix_malloc()
- malloc_conf --> prefix_malloc_conf
- /etc/malloc.conf --> /etc/prefix_malloc.conf
- MALLOC_CONF --> PREFIX_MALLOC_CONF
-
- This makes it possible to use jemalloc at the same time as the system
- allocator, or even to use multiple copies of jemalloc simultaneously.
-
- By default, the prefix is "", except on OS X, where it is "je_". On OS X,
- jemalloc overlays the default malloc zone, but makes no attempt to actually
- replace the "malloc", "calloc", etc. symbols.
-
---without-export
- Don't export public APIs. This can be useful when building jemalloc as a
- static library, or to avoid exporting public APIs when using the zone
- allocator on OSX.
-
---with-private-namespace=<prefix>
- Prefix all library-private APIs with <prefix>je_. For shared libraries,
- symbol visibility mechanisms prevent these symbols from being exported, but
- for static libraries, naming collisions are a real possibility. By
- default, <prefix> is empty, which results in a symbol prefix of je_ .
-
---with-install-suffix=<suffix>
- Append <suffix> to the base name of all installed files, such that multiple
- versions of jemalloc can coexist in the same installation directory. For
- example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
-
---enable-cc-silence
- Enable code that silences non-useful compiler warnings. This is helpful
- when trying to tell serious warnings from those due to compiler
- limitations, but it potentially incurs a performance penalty.
-
---enable-debug
- Enable assertions and validation code. This incurs a substantial
- performance hit, but is very useful during application development.
- Implies --enable-ivsalloc.
-
---enable-code-coverage
- Enable code coverage support, for use during jemalloc test development.
- Additional testing targets are available if this option is enabled:
-
- coverage
- coverage_unit
- coverage_integration
- coverage_stress
-
- These targets do not clear code coverage results from previous runs, and
- there are interactions between the various coverage targets, so it is
- usually advisable to run 'make clean' between repeated code coverage runs.
-
---enable-ivsalloc
- Enable validation code, which verifies that pointers reside within
- jemalloc-owned chunks before dereferencing them. This incurs a substantial
- performance hit.
-
---disable-stats
- Disable statistics gathering functionality. See the "opt.stats_print"
- option documentation for usage details.
-
---enable-prof
- Enable heap profiling and leak detection functionality. See the "opt.prof"
- option documentation for usage details. When enabled, there are several
- approaches to backtracing, and the configure script chooses the first one
- in the following list that appears to function correctly:
-
- + libunwind (requires --enable-prof-libunwind)
- + libgcc (unless --disable-prof-libgcc)
- + gcc intrinsics (unless --disable-prof-gcc)
-
---enable-prof-libunwind
- Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
- backtracing.
-
---disable-prof-libgcc
- Disable the use of libgcc's backtracing functionality.
-
---disable-prof-gcc
- Disable the use of gcc intrinsics for backtracing.
-
---with-static-libunwind=<libunwind.a>
- Statically link against the specified libunwind.a rather than dynamically
- linking with -lunwind.
-
---disable-tcache
- Disable thread-specific caches for small objects. Objects are cached and
- released in bulk, thus reducing the total number of mutex operations. See
- the "opt.tcache" option for usage details.
-
---enable-mremap
- Enable huge realloc() via mremap(2). mremap() is disabled by default
- because the flavor used is specific to Linux, which has a quirk in its
- virtual memory allocation algorithm that causes semi-permanent VM map holes
- under normal jemalloc operation.
-
---disable-munmap
- Disable virtual memory deallocation via munmap(2); instead keep track of
- the virtual memory for later use. munmap() is disabled by default (i.e.
- --disable-munmap is implied) on Linux, which has a quirk in its virtual
- memory allocation algorithm that causes semi-permanent VM map holes under
- normal jemalloc operation.
-
---enable-dss
- Enable support for page allocation/deallocation via sbrk(2), in addition to
- mmap(2).
-
---disable-fill
- Disable support for junk/zero filling of memory, quarantine, and redzones.
- See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
- documentation for usage details.
-
---disable-valgrind
- Disable support for Valgrind.
-
---disable-experimental
- Disable support for the experimental API (*allocm()).
-
---disable-zone-allocator
- Disable zone allocator for Darwin. This means jemalloc won't be hooked as
- the default allocator on OSX/iOS.
-
---enable-utrace
- Enable utrace(2)-based allocation tracing. This feature is not broadly
- portable (FreeBSD has it, but Linux and OS X do not).
-
---enable-xmalloc
- Enable support for optional immediate termination due to out-of-memory
- errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
- See the "opt.xmalloc" option documentation for usage details.
-
---enable-lazy-lock
- Enable code that wraps pthread_create() to detect when an application
- switches from single-threaded to multi-threaded mode, so that it can avoid
- mutex locking/unlocking operations while in single-threaded mode. In
- practice, this feature usually has little impact on performance unless
- thread-specific caching is disabled.
-
---disable-tls
- Disable thread-local storage (TLS), which allows for fast access to
- thread-local variables via the __thread keyword. If TLS is available,
- jemalloc uses it for several purposes.
-
---with-xslroot=<path>
- Specify where to find DocBook XSL stylesheets when building the
- documentation.
-
-The following environment variables (not a definitive list) impact configure's
-behavior:
-
-CFLAGS="?"
- Pass these flags to the compiler. You probably shouldn't define this unless
- you know what you are doing. (Use EXTRA_CFLAGS instead.)
-
-EXTRA_CFLAGS="?"
- Append these flags to CFLAGS. This makes it possible to add flags such as
- -Werror, while allowing the configure script to determine what other flags
- are appropriate for the specified configuration.
-
- The configure script specifically checks whether an optimization flag (-O*)
- is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
- level if it finds that one has already been specified.
-
-CPPFLAGS="?"
- Pass these flags to the C preprocessor. Note that CFLAGS is not passed to
- 'cpp' when 'configure' is looking for include files, so you must use
- CPPFLAGS instead if you need to help 'configure' find header files.
-
-LD_LIBRARY_PATH="?"
- 'ld' uses this colon-separated list to find libraries.
-
-LDFLAGS="?"
- Pass these flags when linking.
-
-PATH="?"
- 'configure' uses this to find programs.
-
-=== Advanced compilation =======================================================
-
-To build only parts of jemalloc, use the following targets:
-
- build_lib_shared
- build_lib_static
- build_lib
- build_doc_html
- build_doc_man
- build_doc
-
-To install only parts of jemalloc, use the following targets:
-
- install_bin
- install_include
- install_lib_shared
- install_lib_static
- install_lib
- install_doc_html
- install_doc_man
- install_doc
-
-To clean up build results to varying degrees, use the following make targets:
-
- clean
- distclean
- relclean
-
-=== Advanced installation ======================================================
-
-Optionally, define make variables when invoking make, including (not
-exclusively):
-
-INCLUDEDIR="?"
- Use this as the installation prefix for header files.
-
-LIBDIR="?"
- Use this as the installation prefix for libraries.
-
-MANDIR="?"
- Use this as the installation prefix for man pages.
-
-DESTDIR="?"
- Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful
- when installing to a different path than was specified via --prefix.
-
-CC="?"
- Use this to invoke the C compiler.
-
-CFLAGS="?"
- Pass these flags to the compiler.
-
-CPPFLAGS="?"
- Pass these flags to the C preprocessor.
-
-LDFLAGS="?"
- Pass these flags when linking.
-
-PATH="?"
- Use this to search for programs used during configuration and building.
-
-=== Development ================================================================
-
-If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
-script rather than 'configure'. This re-generates 'configure', enables
-configuration dependency rules, and enables re-generation of automatically
-generated source files.
-
-The build system supports using an object directory separate from the source
-tree. For example, you can create an 'obj' directory, and from within that
-directory, issue configuration and build commands:
-
- autoconf
- mkdir obj
- cd obj
- ../configure --enable-autogen
- make
-
-=== Documentation ==============================================================
-
-The manual page is generated in both html and roff formats. Any web browser
-can be used to view the html manual. The roff manual page can be formatted
-prior to installation via the following command:
-
- nroff -man -t doc/jemalloc.3
diff --git a/deps/jemalloc/INSTALL.md b/deps/jemalloc/INSTALL.md
new file mode 100644
index 000000000..ef328c60f
--- /dev/null
+++ b/deps/jemalloc/INSTALL.md
@@ -0,0 +1,423 @@
+Building and installing a packaged release of jemalloc can be as simple as
+typing the following while in the root directory of the source tree:
+
+ ./configure
+ make
+ make install
+
+If building from unpackaged developer sources, the simplest command sequence
+that might work is:
+
+ ./autogen.sh
+ make dist
+ make
+ make install
+
+Note that documentation is not built by the default target because doing so
+would create a dependency on xsltproc in packaged releases, hence the
+requirement to either run 'make dist' or avoid installing docs via the various
+install_* targets documented below.
+
+
+## Advanced configuration
+
+The 'configure' script supports numerous options that allow control of which
+functionality is enabled, where jemalloc is installed, etc. Optionally, pass
+any of the following arguments (not a definitive list) to 'configure':
+
+* `--help`
+
+ Print a definitive list of options.
+
+* `--prefix=<install-root-dir>`
+
+ Set the base directory in which to install. For example:
+
+ ./configure --prefix=/usr/local
+
+ will cause files to be installed into /usr/local/include, /usr/local/lib,
+ and /usr/local/man.
+
+* `--with-version=(<major>.<minor>.<bugfix>-<nrev>-g<gid>|VERSION)`
+
+ The VERSION file is mandatory for successful configuration, and the
+ following steps are taken to assure its presence:
+ 1) If --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid> is specified,
+ generate VERSION using the specified value.
+ 2) If --with-version is not specified in either form and the source
+ directory is inside a git repository, try to generate VERSION via 'git
+ describe' invocations that pattern-match release tags.
+ 3) If VERSION is missing, generate it with a bogus version:
+ 0.0.0-0-g0000000000000000000000000000000000000000
+
+ Note that --with-version=VERSION bypasses (1) and (2), which simplifies
+ VERSION configuration when embedding a jemalloc release into another
+ project's git repository.
+
+* `--with-rpath=<colon-separated-rpath>`
+
+ Embed one or more library paths, so that libjemalloc can find the libraries
+ it is linked to. This works only on ELF-based systems.
+
+* `--with-mangling=<map>`
+
+ Mangle public symbols specified in <map> which is a comma-separated list of
+ name:mangled pairs.
+
+ For example, to use ld's --wrap option as an alternative method for
+ overriding libc's malloc implementation, specify something like:
+
+ --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...]
+
+ Note that mangling happens prior to application of the prefix specified by
+ --with-jemalloc-prefix, and mangled symbols are then ignored when applying
+ the prefix.
+
+* `--with-jemalloc-prefix=<prefix>`
+
+ Prefix all public APIs with <prefix>. For example, if <prefix> is
+ "prefix_", API changes like the following occur:
+
+ malloc() --> prefix_malloc()
+ malloc_conf --> prefix_malloc_conf
+ /etc/malloc.conf --> /etc/prefix_malloc.conf
+ MALLOC_CONF --> PREFIX_MALLOC_CONF
+
+ This makes it possible to use jemalloc at the same time as the system
+ allocator, or even to use multiple copies of jemalloc simultaneously.
+
+ By default, the prefix is "", except on OS X, where it is "je_". On OS X,
+ jemalloc overlays the default malloc zone, but makes no attempt to actually
+ replace the "malloc", "calloc", etc. symbols.
+
+* `--without-export`
+
+ Don't export public APIs. This can be useful when building jemalloc as a
+ static library, or to avoid exporting public APIs when using the zone
+ allocator on OSX.
+
+* `--with-private-namespace=<prefix>`
+
+ Prefix all library-private APIs with <prefix>je_. For shared libraries,
+ symbol visibility mechanisms prevent these symbols from being exported, but
+ for static libraries, naming collisions are a real possibility. By
+ default, <prefix> is empty, which results in a symbol prefix of je_ .
+
+* `--with-install-suffix=<suffix>`
+
+ Append <suffix> to the base name of all installed files, such that multiple
+ versions of jemalloc can coexist in the same installation directory. For
+ example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
+
+* `--with-malloc-conf=<malloc_conf>`
+
+ Embed `<malloc_conf>` as a run-time options string that is processed prior to
+ the malloc_conf global variable, the /etc/malloc.conf symlink, and the
+ MALLOC_CONF environment variable. For example, to change the default decay
+ time to 30 seconds:
+
+ --with-malloc-conf=decay_ms:30000
+
+* `--enable-debug`
+
+ Enable assertions and validation code. This incurs a substantial
+ performance hit, but is very useful during application development.
+
+* `--disable-stats`
+
+ Disable statistics gathering functionality. See the "opt.stats_print"
+ option documentation for usage details.
+
+* `--enable-prof`
+
+ Enable heap profiling and leak detection functionality. See the "opt.prof"
+ option documentation for usage details. When enabled, there are several
+ approaches to backtracing, and the configure script chooses the first one
+ in the following list that appears to function correctly:
+
+ + libunwind (requires --enable-prof-libunwind)
+ + libgcc (unless --disable-prof-libgcc)
+ + gcc intrinsics (unless --disable-prof-gcc)
+
+* `--enable-prof-libunwind`
+
+ Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
+ backtracing.
+
+* `--disable-prof-libgcc`
+
+ Disable the use of libgcc's backtracing functionality.
+
+* `--disable-prof-gcc`
+
+ Disable the use of gcc intrinsics for backtracing.
+
+* `--with-static-libunwind=<libunwind.a>`
+
+ Statically link against the specified libunwind.a rather than dynamically
+ linking with -lunwind.
+
+* `--disable-fill`
+
+ Disable support for junk/zero filling of memory. See the "opt.junk" and
+ "opt.zero" option documentation for usage details.
+
+* `--disable-zone-allocator`
+
+ Disable zone allocator for Darwin. This means jemalloc won't be hooked as
+ the default allocator on OSX/iOS.
+
+* `--enable-utrace`
+
+ Enable utrace(2)-based allocation tracing. This feature is not broadly
+ portable (FreeBSD has it, but Linux and OS X do not).
+
+* `--enable-xmalloc`
+
+ Enable support for optional immediate termination due to out-of-memory
+ errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
+ See the "opt.xmalloc" option documentation for usage details.
+
+* `--enable-lazy-lock`
+
+ Enable code that wraps pthread_create() to detect when an application
+ switches from single-threaded to multi-threaded mode, so that it can avoid
+ mutex locking/unlocking operations while in single-threaded mode. In
+ practice, this feature usually has little impact on performance unless
+ thread-specific caching is disabled.
+
+* `--disable-cache-oblivious`
+
+ Disable cache-oblivious large allocation alignment for large allocation
+ requests with no alignment constraints. If this feature is disabled, all
+ large allocations are page-aligned as an implementation artifact, which can
+ severely harm CPU cache utilization. However, the cache-oblivious layout
+ comes at the cost of one extra page per large allocation, which in the
+ most extreme case increases physical memory usage for the 16 KiB size class
+ to 20 KiB.
+
+* `--disable-syscall`
+
+ Disable use of syscall(2) rather than {open,read,write,close}(2). This is
+ intended as a workaround for systems that place security limitations on
+ syscall(2).
+
+* `--disable-cxx`
+
+ Disable C++ integration. This will cause new and delete operator
+ implementations to be omitted.
+
+* `--with-xslroot=<path>`
+
+ Specify where to find DocBook XSL stylesheets when building the
+ documentation.
+
+* `--with-lg-page=<lg-page>`
+
+ Specify the base 2 log of the allocator page size, which must in turn be at
+ least as large as the system page size. By default the configure script
+ determines the host's page size and sets the allocator page size equal to
+ the system page size, so this option need not be specified unless the
+ system page size may change between configuration and execution, e.g. when
+ cross compiling.
+
+* `--with-lg-page-sizes=<lg-page-sizes>`
+
+ Specify the comma-separated base 2 logs of the page sizes to support. This
+ option may be useful when cross compiling in combination with
+ `--with-lg-page`, but its primary use case is for integration with FreeBSD's
+ libc, wherein jemalloc is embedded.
+
+* `--with-lg-hugepage=<lg-hugepage>`
+
+ Specify the base 2 log of the system huge page size. This option is useful
+ when cross compiling, or when overriding the default for systems that do
+ not explicitly support huge pages.
+
+* `--with-lg-quantum=<lg-quantum>`
+
+ Specify the base 2 log of the minimum allocation alignment. jemalloc needs
+ to know the minimum alignment that meets the following C standard
+ requirement (quoted from the April 12, 2011 draft of the C11 standard):
+
+ > The pointer returned if the allocation succeeds is suitably aligned so
+ that it may be assigned to a pointer to any type of object with a
+ fundamental alignment requirement and then used to access such an object
+ or an array of such objects in the space allocated [...]
+
+ This setting is architecture-specific, and although jemalloc includes known
+ safe values for the most commonly used modern architectures, there is a
+ wrinkle related to GNU libc (glibc) that may impact your choice of
+ <lg-quantum>. On most modern architectures, this mandates 16-byte
+ alignment (<lg-quantum>=4), but the glibc developers chose not to meet this
+ requirement for performance reasons. An old discussion can be found at
+ <https://sourceware.org/bugzilla/show_bug.cgi?id=206> . Unlike glibc,
+ jemalloc does follow the C standard by default (caveat: jemalloc
+ technically cheats for size classes smaller than the quantum), but the fact
+ that Linux systems already work around this allocator noncompliance means
+ that it is generally safe in practice to let jemalloc's minimum alignment
+ follow glibc's lead. If you specify `--with-lg-quantum=3` during
+ configuration, jemalloc will provide additional size classes that are not
+ 16-byte-aligned (24, 40, and 56).
+
+* `--with-lg-vaddr=<lg-vaddr>`
+
+ Specify the number of significant virtual address bits. By default, the
+ configure script attempts to detect virtual address size on those platforms
+ where it knows how, and picks a default otherwise. This option may be
+ useful when cross-compiling.
+
+* `--disable-initial-exec-tls`
+
+ Disable the initial-exec TLS model for jemalloc's internal thread-local
+ storage (on those platforms that support explicit settings). This can allow
+ jemalloc to be dynamically loaded after program startup (e.g. using dlopen).
+ Note that in this case, there will be two malloc implementations operating
+ in the same process, which will almost certainly result in confusing runtime
+ crashes if pointers leak from one implementation to the other.
+
+The following environment variables (not a definitive list) impact configure's
+behavior:
+
+* `CFLAGS="?"`
+* `CXXFLAGS="?"`
+
+ Pass these flags to the C/C++ compiler. Any flags set by the configure
+ script are prepended, which means explicitly set flags generally take
+ precedence. Take care when specifying flags such as -Werror, because
+ configure tests may be affected in undesirable ways.
+
+* `EXTRA_CFLAGS="?"`
+* `EXTRA_CXXFLAGS="?"`
+
+ Append these flags to CFLAGS/CXXFLAGS, without passing them to the
+ compiler(s) during configuration. This makes it possible to add flags such
+ as -Werror, while allowing the configure script to determine what other
+ flags are appropriate for the specified configuration.
+
+* `CPPFLAGS="?"`
+
+ Pass these flags to the C preprocessor. Note that CFLAGS is not passed to
+ 'cpp' when 'configure' is looking for include files, so you must use
+ CPPFLAGS instead if you need to help 'configure' find header files.
+
+* `LD_LIBRARY_PATH="?"`
+
+ 'ld' uses this colon-separated list to find libraries.
+
+* `LDFLAGS="?"`
+
+ Pass these flags when linking.
+
+* `PATH="?"`
+
+ 'configure' uses this to find programs.
+
+In some cases it may be necessary to work around configuration results that do
+not match reality. For example, Linux 4.5 added support for the MADV_FREE flag
+to madvise(2), which can cause problems if building on a host with MADV_FREE
+support and deploying to a target without. To work around this, use a cache
+file to override the relevant configuration variable defined in configure.ac,
+e.g.:
+
+ echo "je_cv_madv_free=no" > config.cache && ./configure -C
+
+
+## Advanced compilation
+
+To build only parts of jemalloc, use the following targets:
+
+ build_lib_shared
+ build_lib_static
+ build_lib
+ build_doc_html
+ build_doc_man
+ build_doc
+
+To install only parts of jemalloc, use the following targets:
+
+ install_bin
+ install_include
+ install_lib_shared
+ install_lib_static
+ install_lib_pc
+ install_lib
+ install_doc_html
+ install_doc_man
+ install_doc
+
+To clean up build results to varying degrees, use the following make targets:
+
+ clean
+ distclean
+ relclean
+
+
+## Advanced installation
+
+Optionally, define make variables when invoking make, including (not
+exclusively):
+
+* `INCLUDEDIR="?"`
+
+ Use this as the installation prefix for header files.
+
+* `LIBDIR="?"`
+
+ Use this as the installation prefix for libraries.
+
+* `MANDIR="?"`
+
+ Use this as the installation prefix for man pages.
+
+* `DESTDIR="?"`
+
+ Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful
+ when installing to a different path than was specified via --prefix.
+
+* `CC="?"`
+
+ Use this to invoke the C compiler.
+
+* `CFLAGS="?"`
+
+ Pass these flags to the compiler.
+
+* `CPPFLAGS="?"`
+
+ Pass these flags to the C preprocessor.
+
+* `LDFLAGS="?"`
+
+ Pass these flags when linking.
+
+* `PATH="?"`
+
+ Use this to search for programs used during configuration and building.
+
+
+## Development
+
+If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
+script rather than 'configure'. This re-generates 'configure', enables
+configuration dependency rules, and enables re-generation of automatically
+generated source files.
+
+The build system supports using an object directory separate from the source
+tree. For example, you can create an 'obj' directory, and from within that
+directory, issue configuration and build commands:
+
+ autoconf
+ mkdir obj
+ cd obj
+ ../configure --enable-autogen
+ make
+
+
+## Documentation
+
+The manual page is generated in both html and roff formats. Any web browser
+can be used to view the html manual. The roff manual page can be formatted
+prior to installation via the following command:
+
+ nroff -man -t doc/jemalloc.3
diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in
index d6b7d6ea3..9b9347fff 100644
--- a/deps/jemalloc/Makefile.in
+++ b/deps/jemalloc/Makefile.in
@@ -9,6 +9,7 @@ vpath % .
SHELL := /bin/sh
CC := @CC@
+CXX := @CXX@
# Configuration parameters.
DESTDIR =
@@ -23,8 +24,15 @@ abs_srcroot := @abs_srcroot@
abs_objroot := @abs_objroot@
# Build parameters.
-CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+CPPFLAGS := @CPPFLAGS@ -I$(objroot)include -I$(srcroot)include
+CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@
+SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS))
+CONFIGURE_CXXFLAGS := @CONFIGURE_CXXFLAGS@
+SPECIFIED_CXXFLAGS := @SPECIFIED_CXXFLAGS@
+EXTRA_CXXFLAGS := @EXTRA_CXXFLAGS@
+CXXFLAGS := $(strip $(CONFIGURE_CXXFLAGS) $(SPECIFIED_CXXFLAGS) $(EXTRA_CXXFLAGS))
LDFLAGS := @LDFLAGS@
EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
LIBS := @LIBS@
@@ -42,23 +50,29 @@ XSLTPROC := @XSLTPROC@
AUTOCONF := @AUTOCONF@
_RPATH = @RPATH@
RPATH = $(if $(1),$(call _RPATH,$(1)))
-cfghdrs_in := @cfghdrs_in@
+cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@)
cfghdrs_out := @cfghdrs_out@
-cfgoutputs_in := @cfgoutputs_in@
+cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
cfgoutputs_out := @cfgoutputs_out@
enable_autogen := @enable_autogen@
-enable_code_coverage := @enable_code_coverage@
-enable_experimental := @enable_experimental@
+enable_prof := @enable_prof@
enable_zone_allocator := @enable_zone_allocator@
+MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
DSO_LDFLAGS = @DSO_LDFLAGS@
SOREV = @SOREV@
PIC_CFLAGS = @PIC_CFLAGS@
CTARGET = @CTARGET@
LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
MKLIB = @MKLIB@
AR = @AR@
ARFLAGS = @ARFLAGS@
+DUMP_SYMS = @DUMP_SYMS@
+AWK := @AWK@
CC_MM = @CC_MM@
+LM := @LM@
+INSTALL = @INSTALL@
ifeq (macho, $(ABI))
TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
@@ -73,16 +87,38 @@ endif
LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
# Lists of files.
-BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh
+BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof
C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
-C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
- $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \
- $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \
- $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \
- $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
- $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \
- $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \
- $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c
+C_SRCS := $(srcroot)src/jemalloc.c \
+ $(srcroot)src/arena.c \
+ $(srcroot)src/background_thread.c \
+ $(srcroot)src/base.c \
+ $(srcroot)src/bin.c \
+ $(srcroot)src/bitmap.c \
+ $(srcroot)src/ckh.c \
+ $(srcroot)src/ctl.c \
+ $(srcroot)src/div.c \
+ $(srcroot)src/extent.c \
+ $(srcroot)src/extent_dss.c \
+ $(srcroot)src/extent_mmap.c \
+ $(srcroot)src/hash.c \
+ $(srcroot)src/hooks.c \
+ $(srcroot)src/large.c \
+ $(srcroot)src/log.c \
+ $(srcroot)src/malloc_io.c \
+ $(srcroot)src/mutex.c \
+ $(srcroot)src/mutex_pool.c \
+ $(srcroot)src/nstime.c \
+ $(srcroot)src/pages.c \
+ $(srcroot)src/prng.c \
+ $(srcroot)src/prof.c \
+ $(srcroot)src/rtree.c \
+ $(srcroot)src/stats.c \
+ $(srcroot)src/sz.c \
+ $(srcroot)src/tcache.c \
+ $(srcroot)src/ticker.c \
+ $(srcroot)src/tsd.c \
+ $(srcroot)src/witness.c
ifeq ($(enable_zone_allocator), 1)
C_SRCS += $(srcroot)src/zone.c
endif
@@ -98,57 +134,119 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
ifneq ($(SOREV),$(SO))
DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
endif
+ifeq (1, $(link_whole_archive))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
+PC := $(objroot)jemalloc.pc
MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
-DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html)
-DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3)
+DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
+DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
DOCS := $(DOCS_HTML) $(DOCS_MAN3)
-C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \
+C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
+ $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
+ $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
- $(srcroot)test/src/thd.c
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/bitmap.c \
+ $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (1, $(link_whole_archive))
+C_UTIL_INTEGRATION_SRCS :=
+C_UTIL_CPP_SRCS :=
+else
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+endif
+TESTS_UNIT := \
+ $(srcroot)test/unit/a0.c \
+ $(srcroot)test/unit/arena_reset.c \
+ $(srcroot)test/unit/atomic.c \
+ $(srcroot)test/unit/background_thread.c \
+ $(srcroot)test/unit/background_thread_enable.c \
+ $(srcroot)test/unit/base.c \
+ $(srcroot)test/unit/bitmap.c \
$(srcroot)test/unit/ckh.c \
+ $(srcroot)test/unit/decay.c \
+ $(srcroot)test/unit/div.c \
+ $(srcroot)test/unit/emitter.c \
+ $(srcroot)test/unit/extent_quantize.c \
+ $(srcroot)test/unit/fork.c \
$(srcroot)test/unit/hash.c \
+ $(srcroot)test/unit/hooks.c \
$(srcroot)test/unit/junk.c \
+ $(srcroot)test/unit/junk_alloc.c \
+ $(srcroot)test/unit/junk_free.c \
+ $(srcroot)test/unit/log.c \
$(srcroot)test/unit/mallctl.c \
+ $(srcroot)test/unit/malloc_io.c \
$(srcroot)test/unit/math.c \
$(srcroot)test/unit/mq.c \
$(srcroot)test/unit/mtx.c \
+ $(srcroot)test/unit/pack.c \
+ $(srcroot)test/unit/pages.c \
+ $(srcroot)test/unit/ph.c \
+ $(srcroot)test/unit/prng.c \
$(srcroot)test/unit/prof_accum.c \
+ $(srcroot)test/unit/prof_active.c \
$(srcroot)test/unit/prof_gdump.c \
$(srcroot)test/unit/prof_idump.c \
+ $(srcroot)test/unit/prof_reset.c \
+ $(srcroot)test/unit/prof_tctx.c \
+ $(srcroot)test/unit/prof_thread_name.c \
$(srcroot)test/unit/ql.c \
$(srcroot)test/unit/qr.c \
- $(srcroot)test/unit/quarantine.c \
$(srcroot)test/unit/rb.c \
+ $(srcroot)test/unit/retained.c \
$(srcroot)test/unit/rtree.c \
$(srcroot)test/unit/SFMT.c \
+ $(srcroot)test/unit/size_classes.c \
+ $(srcroot)test/unit/slab.c \
+ $(srcroot)test/unit/smoothstep.c \
+ $(srcroot)test/unit/spin.c \
$(srcroot)test/unit/stats.c \
+ $(srcroot)test/unit/stats_print.c \
+ $(srcroot)test/unit/ticker.c \
+ $(srcroot)test/unit/nstime.c \
$(srcroot)test/unit/tsd.c \
- $(srcroot)test/unit/util.c \
+ $(srcroot)test/unit/witness.c \
$(srcroot)test/unit/zero.c
-TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \
- $(srcroot)test/unit/prof_accum_b.c
+ifeq (@enable_prof@, 1)
+TESTS_UNIT += \
+ $(srcroot)test/unit/arena_reset_prof.c
+endif
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
+ $(srcroot)test/integration/extent.c \
$(srcroot)test/integration/mallocx.c \
- $(srcroot)test/integration/mremap.c \
+ $(srcroot)test/integration/MALLOCX_ARENA.c \
+ $(srcroot)test/integration/overflow.c \
$(srcroot)test/integration/posix_memalign.c \
$(srcroot)test/integration/rallocx.c \
+ $(srcroot)test/integration/sdallocx.c \
$(srcroot)test/integration/thread_arena.c \
$(srcroot)test/integration/thread_tcache_enabled.c \
$(srcroot)test/integration/xallocx.c
-ifeq ($(enable_experimental), 1)
-TESTS_INTEGRATION += $(srcroot)test/integration/allocm.c \
- $(srcroot)test/integration/MALLOCX_ARENA.c \
- $(srcroot)test/integration/rallocm.c
+ifeq (@enable_cxx@, 1)
+CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
+TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
+else
+CPP_SRCS :=
+TESTS_INTEGRATION_CPP :=
endif
-TESTS_STRESS :=
-TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS)
+TESTS_STRESS := $(srcroot)test/stress/microbench.c
+TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
+
+PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
+PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
+C_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym.$(O))
+C_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym)
C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O))
+CPP_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.$(O))
C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
+CPP_PIC_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.pic.$(O))
+C_JET_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym.$(O))
+C_JET_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym)
C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
@@ -157,27 +255,28 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
+TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
.PHONY: all dist build_doc_html build_doc_man build_doc
.PHONY: install_bin install_include install_lib
.PHONY: install_doc_html install_doc_man install_doc install
.PHONY: tests check clean distclean relclean
-.SECONDARY : $(TESTS_OBJS)
+.SECONDARY : $(PRIVATE_NAMESPACE_GEN_HDRS) $(TESTS_OBJS) $(TESTS_CPP_OBJS)
# Default target.
all: build_lib
dist: build_doc
-$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
-$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
build_doc_html: $(DOCS_HTML)
@@ -188,18 +287,32 @@ build_doc: $(DOCS)
# Include generated dependency files.
#
ifdef CC_MM
+-include $(C_SYM_OBJS:%.$(O)=%.d)
-include $(C_OBJS:%.$(O)=%.d)
+-include $(CPP_OBJS:%.$(O)=%.d)
-include $(C_PIC_OBJS:%.$(O)=%.d)
+-include $(CPP_PIC_OBJS:%.$(O)=%.d)
+-include $(C_JET_SYM_OBJS:%.$(O)=%.d)
-include $(C_JET_OBJS:%.$(O)=%.d)
-include $(C_TESTLIB_OBJS:%.$(O)=%.d)
-include $(TESTS_OBJS:%.$(O)=%.d)
+-include $(TESTS_CPP_OBJS:%.$(O)=%.d)
endif
+$(C_SYM_OBJS): $(objroot)src/%.sym.$(O): $(srcroot)src/%.c
+$(C_SYM_OBJS): CPPFLAGS += -DJEMALLOC_NO_PRIVATE_NAMESPACE
+$(C_SYMS): $(objroot)src/%.sym: $(objroot)src/%.sym.$(O)
$(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
+$(CPP_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.cpp
$(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
$(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS)
+$(CPP_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.cpp
+$(CPP_PIC_OBJS): CXXFLAGS += $(PIC_CFLAGS)
+$(C_JET_SYM_OBJS): $(objroot)src/%.jet.sym.$(O): $(srcroot)src/%.c
+$(C_JET_SYM_OBJS): CPPFLAGS += -DJEMALLOC_JET -DJEMALLOC_NO_PRIVATE_NAMESPACE
+$(C_JET_SYMS): $(objroot)src/%.jet.sym: $(objroot)src/%.jet.sym.$(O)
$(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c
-$(C_JET_OBJS): CFLAGS += -DJEMALLOC_JET
+$(C_JET_OBJS): CPPFLAGS += -DJEMALLOC_JET
$(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
@@ -209,112 +322,147 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%
$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
-$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
-define make-unit-link-dep
-$(1): TESTS_UNIT_LINK_OBJS += $(2)
-$(1): $(2)
-endef
-$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS)))))
$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
+$(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
+$(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
$(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
+$(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
ifneq ($(IMPORTLIB),$(SO))
-$(C_OBJS): CPPFLAGS += -DDLLEXPORT
+$(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
endif
-ifndef CC_MM
# Dependencies.
+ifndef CC_MM
HEADER_DIRS = $(srcroot)include/jemalloc/internal \
$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
-HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
-$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS)
-$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h
+HEADERS = $(filter-out $(PRIVATE_NAMESPACE_HDRS),$(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)))
+$(C_SYM_OBJS) $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(HEADERS)
+$(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
endif
-$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
+$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+
+$(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
@mkdir -p $(@D)
$(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
ifdef CC_MM
@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
endif
+$(C_SYMS): %.sym:
+ @mkdir -p $(@D)
+ $(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols.awk > $@
+
+$(C_JET_SYMS): %.sym:
+ @mkdir -p $(@D)
+ $(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols_jet.awk > $@
+
+$(objroot)include/jemalloc/internal/private_namespace.gen.h: $(C_SYMS)
+ $(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
+
+$(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS)
+ $(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
+
+%.h: %.gen.h
+ @if ! `cmp -s $< $@` ; then echo "cp $< $<"; cp $< $@ ; fi
+
+$(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
+ @mkdir -p $(@D)
+ $(CXX) $(CXXFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
+ifdef CC_MM
+ @$(CXX) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
ifneq ($(SOREV),$(SO))
%.$(SO) : %.$(SOREV)
@mkdir -p $(@D)
ln -sf $(<F) $@
endif
-$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS))
+$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) $(if $(PIC_CFLAGS),$(CPP_PIC_OBJS),$(CPP_OBJS))
@mkdir -p $(@D)
$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
-$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS)
-$(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS)
-$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS)
+$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) $(CPP_PIC_OBJS)
+$(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) $(CPP_OBJS)
+$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS) $(CPP_OBJS)
$(STATIC_LIBS):
@mkdir -p $(@D)
$(AR) $(ARFLAGS)@AROUT@ $+
-$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
+$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
$(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread -lstdc++,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+ @mkdir -p $(@D)
+ $(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
$(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
- $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+ $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
build_lib_shared: $(DSOS)
build_lib_static: $(STATIC_LIBS)
build_lib: build_lib_shared build_lib_static
install_bin:
- install -d $(BINDIR)
+ $(INSTALL) -d $(BINDIR)
@for b in $(BINS); do \
- echo "install -m 755 $$b $(BINDIR)"; \
- install -m 755 $$b $(BINDIR); \
+ echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+ $(INSTALL) -m 755 $$b $(BINDIR); \
done
install_include:
- install -d $(INCLUDEDIR)/jemalloc
+ $(INSTALL) -d $(INCLUDEDIR)/jemalloc
@for h in $(C_HDRS); do \
- echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
- install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+ echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+ $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
done
install_lib_shared: $(DSOS)
- install -d $(LIBDIR)
- install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+ $(INSTALL) -d $(LIBDIR)
+ $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
ifneq ($(SOREV),$(SO))
ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
endif
install_lib_static: $(STATIC_LIBS)
- install -d $(LIBDIR)
+ $(INSTALL) -d $(LIBDIR)
@for l in $(STATIC_LIBS); do \
- echo "install -m 755 $$l $(LIBDIR)"; \
- install -m 755 $$l $(LIBDIR); \
+ echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+ $(INSTALL) -m 755 $$l $(LIBDIR); \
done
-install_lib: install_lib_shared install_lib_static
+install_lib_pc: $(PC)
+ $(INSTALL) -d $(LIBDIR)/pkgconfig
+ @for l in $(PC); do \
+ echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+ $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
+done
+
+install_lib: install_lib_shared install_lib_static install_lib_pc
install_doc_html:
- install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+ $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
@for d in $(DOCS_HTML); do \
- echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
- install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+ echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+ $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
done
install_doc_man:
- install -d $(MANDIR)/man3
+ $(INSTALL) -d $(MANDIR)/man3
@for d in $(DOCS_MAN3); do \
- echo "install -m 644 $$d $(MANDIR)/man3"; \
- install -m 644 $$d $(MANDIR)/man3; \
+ echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+ $(INSTALL) -m 644 $$d $(MANDIR)/man3; \
done
install_doc: install_doc_html install_doc_man
@@ -322,7 +470,7 @@ install_doc: install_doc_html install_doc_man
install: install_bin install_include install_lib install_doc
tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
-tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE))
+tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
tests: tests_unit tests_integration tests_stress
@@ -330,78 +478,61 @@ check_unit_dir:
@mkdir -p $(objroot)test/unit
check_integration_dir:
@mkdir -p $(objroot)test/integration
-check_stress_dir:
+stress_dir:
@mkdir -p $(objroot)test/stress
-check_dir: check_unit_dir check_integration_dir check_stress_dir
+check_dir: check_unit_dir check_integration_dir
check_unit: tests_unit check_unit_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+check_integration_prof: tests_integration check_integration_dir
+ifeq ($(enable_prof), 1)
+ $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+ $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+endif
+check_integration_decay: tests_integration check_integration_dir
+ $(MALLOC_CONF)="dirty_decay_ms:-1,muzzy_decay_ms:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+ $(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
check_integration: tests_integration check_integration_dir
- $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-check_stress: tests_stress check_stress_dir
+ $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+stress: tests_stress stress_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
-check: tests check_dir
- $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%)
-
-ifeq ($(enable_code_coverage), 1)
-coverage_unit: check_unit
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS)
-
-coverage_integration: check_integration
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
-
-coverage_stress: check_stress
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress stress $(TESTS_STRESS_OBJS)
-
-coverage: check
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
- $(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS)
-endif
+check: check_unit check_integration check_integration_decay check_integration_prof
clean:
+ rm -f $(PRIVATE_NAMESPACE_HDRS)
+ rm -f $(PRIVATE_NAMESPACE_GEN_HDRS)
+ rm -f $(C_SYM_OBJS)
+ rm -f $(C_SYMS)
rm -f $(C_OBJS)
+ rm -f $(CPP_OBJS)
rm -f $(C_PIC_OBJS)
+ rm -f $(CPP_PIC_OBJS)
+ rm -f $(C_JET_SYM_OBJS)
+ rm -f $(C_JET_SYMS)
rm -f $(C_JET_OBJS)
rm -f $(C_TESTLIB_OBJS)
+ rm -f $(C_SYM_OBJS:%.$(O)=%.d)
rm -f $(C_OBJS:%.$(O)=%.d)
- rm -f $(C_OBJS:%.$(O)=%.gcda)
- rm -f $(C_OBJS:%.$(O)=%.gcno)
+ rm -f $(CPP_OBJS:%.$(O)=%.d)
rm -f $(C_PIC_OBJS:%.$(O)=%.d)
- rm -f $(C_PIC_OBJS:%.$(O)=%.gcda)
- rm -f $(C_PIC_OBJS:%.$(O)=%.gcno)
+ rm -f $(CPP_PIC_OBJS:%.$(O)=%.d)
+ rm -f $(C_JET_SYM_OBJS:%.$(O)=%.d)
rm -f $(C_JET_OBJS:%.$(O)=%.d)
- rm -f $(C_JET_OBJS:%.$(O)=%.gcda)
- rm -f $(C_JET_OBJS:%.$(O)=%.gcno)
rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d)
- rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcda)
- rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcno)
rm -f $(TESTS_OBJS:%.$(O)=%$(EXE))
rm -f $(TESTS_OBJS)
rm -f $(TESTS_OBJS:%.$(O)=%.d)
- rm -f $(TESTS_OBJS:%.$(O)=%.gcda)
- rm -f $(TESTS_OBJS:%.$(O)=%.gcno)
rm -f $(TESTS_OBJS:%.$(O)=%.out)
+ rm -f $(TESTS_CPP_OBJS:%.$(O)=%$(EXE))
+ rm -f $(TESTS_CPP_OBJS)
+ rm -f $(TESTS_CPP_OBJS:%.$(O)=%.d)
+ rm -f $(TESTS_CPP_OBJS:%.$(O)=%.out)
rm -f $(DSOS) $(STATIC_LIBS)
- rm -f $(objroot)*.gcov.*
distclean: clean
- rm -rf $(objroot)autom4te.cache
+ rm -f $(objroot)bin/jemalloc-config
rm -f $(objroot)bin/jemalloc.sh
+ rm -f $(objroot)bin/jeprof
rm -f $(objroot)config.log
rm -f $(objroot)config.status
rm -f $(objroot)config.stamp
@@ -410,7 +541,7 @@ distclean: clean
relclean: distclean
rm -f $(objroot)configure
- rm -f $(srcroot)VERSION
+ rm -f $(objroot)VERSION
rm -f $(DOCS_HTML)
rm -f $(DOCS_MAN3)
diff --git a/deps/jemalloc/README b/deps/jemalloc/README
index 9b268f422..3a6e0d272 100644
--- a/deps/jemalloc/README
+++ b/deps/jemalloc/README
@@ -3,12 +3,12 @@ fragmentation avoidance and scalable concurrency support. jemalloc first came
into use as the FreeBSD libc allocator in 2005, and since then it has found its
way into numerous applications that rely on its predictable behavior. In 2010
jemalloc development efforts broadened to include developer support features
-such as heap profiling, Valgrind integration, and extensive monitoring/tuning
-hooks. Modern jemalloc releases continue to be integrated back into FreeBSD,
-and therefore versatility remains critical. Ongoing development efforts trend
-toward making jemalloc among the best allocators for a broad range of demanding
-applications, and eliminating/mitigating weaknesses that have practical
-repercussions for real world applications.
+such as heap profiling and extensive monitoring/tuning hooks. Modern jemalloc
+releases continue to be integrated back into FreeBSD, and therefore versatility
+remains critical. Ongoing development efforts trend toward making jemalloc
+among the best allocators for a broad range of demanding applications, and
+eliminating/mitigating weaknesses that have practical repercussions for real
+world applications.
The COPYING file contains copyright and licensing information.
@@ -17,4 +17,4 @@ jemalloc.
The ChangeLog file contains a brief summary of changes for each release.
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
diff --git a/deps/jemalloc/TUNING.md b/deps/jemalloc/TUNING.md
new file mode 100644
index 000000000..34fca05b4
--- /dev/null
+++ b/deps/jemalloc/TUNING.md
@@ -0,0 +1,129 @@
+This document summarizes the common approaches for performance fine tuning with
+jemalloc (as of 5.1.0). The default configuration of jemalloc tends to work
+reasonably well in practice, and most applications should not have to tune any
+options. However, in order to cover a wide range of applications and avoid
+pathological cases, the default setting is sometimes kept conservative and
+suboptimal, even for many common workloads. When jemalloc is properly tuned for
+a specific application / workload, it is common to improve system level metrics
+by a few percent, or make favorable trade-offs.
+
+
+## Notable runtime options for performance tuning
+
+Runtime options can be set via
+[malloc_conf](http://jemalloc.net/jemalloc.3.html#tuning).
+
+* [background_thread](http://jemalloc.net/jemalloc.3.html#background_thread)
+
+ Enabling jemalloc background threads generally improves the tail latency for
+ application threads, since unused memory purging is shifted to the dedicated
+ background threads. In addition, unintended purging delay caused by
+ application inactivity is avoided with background threads.
+
+ Suggested: `background_thread:true` when jemalloc managed threads can be
+ allowed.
+
+* [metadata_thp](http://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
+
+ Allowing jemalloc to utilize transparent huge pages for its internal
+ metadata usually reduces TLB misses significantly, especially for programs
+ with large memory footprint and frequent allocation / deallocation
+ activities. Metadata memory usage may increase due to the use of huge
+ pages.
+
+ Suggested for allocation intensive programs: `metadata_thp:auto` or
+ `metadata_thp:always`, which is expected to improve CPU utilization at a
+ small memory cost.
+
+* [dirty_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
+ [muzzy_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
+
+ Decay time determines how fast jemalloc returns unused pages back to the
+ operating system, and therefore provides a fairly straightforward trade-off
+ between CPU and memory usage. Shorter decay time purges unused pages faster
+ to reduces memory usage (usually at the cost of more CPU cycles spent on
+ purging), and vice versa.
+
+ Suggested: tune the values based on the desired trade-offs.
+
+* [narenas](http://jemalloc.net/jemalloc.3.html#opt.narenas)
+
+ By default jemalloc uses multiple arenas to reduce internal lock contention.
+ However high arena count may also increase overall memory fragmentation,
+ since arenas manage memory independently. When high degree of parallelism
+ is not expected at the allocator level, lower number of arenas often
+ improves memory usage.
+
+ Suggested: if low parallelism is expected, try lower arena count while
+ monitoring CPU and memory usage.
+
+* [percpu_arena](http://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
+
+ Enable dynamic thread to arena association based on running CPU. This has
+ the potential to improve locality, e.g. when thread to CPU affinity is
+ present.
+
+ Suggested: try `percpu_arena:percpu` or `percpu_arena:phycpu` if
+ thread migration between processors is expected to be infrequent.
+
+Examples:
+
+* High resource consumption application, prioritizing CPU utilization:
+
+ `background_thread:true,metadata_thp:auto` combined with relaxed decay time
+ (increased `dirty_decay_ms` and / or `muzzy_decay_ms`,
+ e.g. `dirty_decay_ms:30000,muzzy_decay_ms:30000`).
+
+* High resource consumption application, prioritizing memory usage:
+
+ `background_thread:true` combined with shorter decay time (decreased
+ `dirty_decay_ms` and / or `muzzy_decay_ms`,
+ e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count
+ (e.g. number of CPUs).
+
+* Low resource consumption application:
+
+ `narenas:1,lg_tcache_max:13` combined with shorter decay time (decreased
+ `dirty_decay_ms` and / or `muzzy_decay_ms`,e.g.
+ `dirty_decay_ms:1000,muzzy_decay_ms:0`).
+
+* Extremely conservative -- minimize memory usage at all costs, only suitable when
+allocation activity is very rare:
+
+ `narenas:1,tcache:false,dirty_decay_ms:0,muzzy_decay_ms:0`
+
+Note that it is recommended to combine the options with `abort_conf:true` which
+aborts immediately on illegal options.
+
+## Beyond runtime options
+
+In addition to the runtime options, there are a number of programmatic ways to
+improve application performance with jemalloc.
+
+* [Explicit arenas](http://jemalloc.net/jemalloc.3.html#arenas.create)
+
+ Manually created arenas can help performance in various ways, e.g. by
+ managing locality and contention for specific usages. For example,
+ applications can explicitly allocate frequently accessed objects from a
+ dedicated arena with
+ [mallocx()](http://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
+ locality. In addition, explicit arenas often benefit from individually
+ tuned options, e.g. relaxed [decay
+ time](http://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
+ frequent reuse is expected.
+
+* [Extent hooks](http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
+
+ Extent hooks allow customization for managing underlying memory. One use
+ case for performance purpose is to utilize huge pages -- for example,
+ [HHVM](https://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
+ uses explicit arenas with customized extent hooks to manage 1GB huge pages
+ for frequently accessed data, which reduces TLB misses significantly.
+
+* [Explicit thread-to-arena
+ binding](http://jemalloc.net/jemalloc.3.html#thread.arena)
+
+ It is common for some threads in an application to have different memory
+ access / allocation patterns. Threads with heavy workloads often benefit
+ from explicit binding, e.g. binding very active threads to dedicated arenas
+ may reduce contention at the allocator level.
diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION
index dace31ba7..5c2e26d43 100644
--- a/deps/jemalloc/VERSION
+++ b/deps/jemalloc/VERSION
@@ -1 +1 @@
-3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340
+5.1.0-0-g0
diff --git a/deps/jemalloc/bin/jemalloc-config.in b/deps/jemalloc/bin/jemalloc-config.in
new file mode 100644
index 000000000..80eca2e64
--- /dev/null
+++ b/deps/jemalloc/bin/jemalloc-config.in
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+usage() {
+ cat <<EOF
+Usage:
+ @BINDIR@/jemalloc-config <option>
+Options:
+ --help | -h : Print usage.
+ --version : Print jemalloc version.
+ --revision : Print shared library revision number.
+ --config : Print configure options used to build jemalloc.
+ --prefix : Print installation directory prefix.
+ --bindir : Print binary installation directory.
+ --datadir : Print data installation directory.
+ --includedir : Print include installation directory.
+ --libdir : Print library installation directory.
+ --mandir : Print manual page installation directory.
+ --cc : Print compiler used to build jemalloc.
+ --cflags : Print compiler flags used to build jemalloc.
+ --cppflags : Print preprocessor flags used to build jemalloc.
+ --cxxflags : Print C++ compiler flags used to build jemalloc.
+ --ldflags : Print library flags used to build jemalloc.
+ --libs : Print libraries jemalloc was linked against.
+EOF
+}
+
+prefix="@prefix@"
+exec_prefix="@exec_prefix@"
+
+case "$1" in
+--help | -h)
+ usage
+ exit 0
+ ;;
+--version)
+ echo "@jemalloc_version@"
+ ;;
+--revision)
+ echo "@rev@"
+ ;;
+--config)
+ echo "@CONFIG@"
+ ;;
+--prefix)
+ echo "@PREFIX@"
+ ;;
+--bindir)
+ echo "@BINDIR@"
+ ;;
+--datadir)
+ echo "@DATADIR@"
+ ;;
+--includedir)
+ echo "@INCLUDEDIR@"
+ ;;
+--libdir)
+ echo "@LIBDIR@"
+ ;;
+--mandir)
+ echo "@MANDIR@"
+ ;;
+--cc)
+ echo "@CC@"
+ ;;
+--cflags)
+ echo "@CFLAGS@"
+ ;;
+--cppflags)
+ echo "@CPPFLAGS@"
+ ;;
+--cxxflags)
+ echo "@CXXFLAGS@"
+ ;;
+--ldflags)
+ echo "@LDFLAGS@ @EXTRA_LDFLAGS@"
+ ;;
+--libs)
+ echo "@LIBS@"
+ ;;
+*)
+ usage
+ exit 1
+esac
diff --git a/deps/jemalloc/bin/pprof b/deps/jemalloc/bin/jeprof.in
index a309943c1..588c6b438 100755..100644
--- a/deps/jemalloc/bin/pprof
+++ b/deps/jemalloc/bin/jeprof.in
@@ -2,11 +2,11 @@
# Copyright (c) 1998-2007, Google Inc.
# All rights reserved.
-#
+#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
-#
+#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
@@ -16,7 +16,7 @@
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
-#
+#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -40,28 +40,28 @@
#
# Examples:
#
-# % tools/pprof "program" "profile"
+# % tools/jeprof "program" "profile"
# Enters "interactive" mode
#
-# % tools/pprof --text "program" "profile"
+# % tools/jeprof --text "program" "profile"
# Generates one line per procedure
#
-# % tools/pprof --gv "program" "profile"
+# % tools/jeprof --gv "program" "profile"
# Generates annotated call-graph and displays via "gv"
#
-# % tools/pprof --gv --focus=Mutex "program" "profile"
+# % tools/jeprof --gv --focus=Mutex "program" "profile"
# Restrict to code paths that involve an entry that matches "Mutex"
#
-# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile"
+# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile"
# Restrict to code paths that involve an entry that matches "Mutex"
# and does not match "string"
#
-# % tools/pprof --list=IBF_CheckDocid "program" "profile"
+# % tools/jeprof --list=IBF_CheckDocid "program" "profile"
# Generates disassembly listing of all routines with at least one
# sample that match the --list=<regexp> pattern. The listing is
# annotated with the flat and cumulative sample counts at each line.
#
-# % tools/pprof --disasm=IBF_CheckDocid "program" "profile"
+# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile"
# Generates disassembly listing of all routines with at least one
# sample that match the --disasm=<regexp> pattern. The listing is
# annotated with the flat and cumulative sample counts at each PC value.
@@ -71,11 +71,13 @@
use strict;
use warnings;
use Getopt::Long;
+use Cwd;
+my $JEPROF_VERSION = "@jemalloc_version@";
my $PPROF_VERSION = "2.0";
# These are the object tools we use which can come from a
-# user-specified location using --tools, from the PPROF_TOOLS
+# user-specified location using --tools, from the JEPROF_TOOLS
# environment variable, or from the environment.
my %obj_tool_map = (
"objdump" => "objdump",
@@ -94,7 +96,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread
my @KCACHEGRIND = ("kcachegrind");
my @PS2PDF = ("ps2pdf");
# These are used for dynamic profiles
-my @URL_FETCHER = ("curl", "-s");
+my @URL_FETCHER = ("curl", "-s", "--fail");
# These are the web pages that servers need to support for dynamic profiles
my $HEAP_PAGE = "/pprof/heap";
@@ -144,13 +146,13 @@ my $sep_address = undef;
sub usage_string {
return <<EOF;
Usage:
-pprof [options] <program> <profiles>
+jeprof [options] <program> <profiles>
<profiles> is a space separated list of profile names.
-pprof [options] <symbolized-profiles>
+jeprof [options] <symbolized-profiles>
<symbolized-profiles> is a list of profile files where each file contains
the necessary symbol mappings as well as profile data (likely generated
with --raw).
-pprof [options] <profile>
+jeprof [options] <profile>
<profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE
Each name can be:
@@ -161,9 +163,9 @@ pprof [options] <profile>
$GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
$CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
For instance:
- pprof http://myserver.com:80$HEAP_PAGE
+ jeprof http://myserver.com:80$HEAP_PAGE
If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
-pprof --symbols <program>
+jeprof --symbols <program>
Maps addresses to symbol names. In this mode, stdin should be a
list of library mappings, in the same format as is found in the heap-
and cpu-profile files (this loosely matches that of /proc/self/maps
@@ -202,7 +204,7 @@ Output type:
--pdf Generate PDF to stdout
--svg Generate SVG to stdout
--gif Generate GIF to stdout
- --raw Generate symbolized pprof data (useful with remote fetch)
+ --raw Generate symbolized jeprof data (useful with remote fetch)
Heap-Profile Options:
--inuse_space Display in-use (mega)bytes [default]
@@ -222,11 +224,14 @@ Call-graph Options:
--nodefraction=<f> Hide nodes below <f>*total [default=.005]
--edgefraction=<f> Hide edges below <f>*total [default=.001]
--maxdegree=<n> Max incoming/outgoing edges per node [default=8]
- --focus=<regexp> Focus on nodes matching <regexp>
- --ignore=<regexp> Ignore nodes matching <regexp>
+ --focus=<regexp> Focus on backtraces with nodes matching <regexp>
+ --thread=<n> Show profile for thread <n>
+ --ignore=<regexp> Ignore backtraces with nodes matching <regexp>
--scale=<n> Set GV scaling [default=0]
--heapcheck Make nodes with non-0 object counts
(i.e. direct leak generators) more visible
+ --retain=<regexp> Retain only nodes that match <regexp>
+ --exclude=<regexp> Exclude all nodes that match <regexp>
Miscellaneous:
--tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames
@@ -235,34 +240,34 @@ Miscellaneous:
--version Version information
Environment Variables:
- PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof
- PPROF_TOOLS Prefix for object tools pathnames
+ JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof
+ JEPROF_TOOLS Prefix for object tools pathnames
Examples:
-pprof /bin/ls ls.prof
+jeprof /bin/ls ls.prof
Enters "interactive" mode
-pprof --text /bin/ls ls.prof
+jeprof --text /bin/ls ls.prof
Outputs one line per procedure
-pprof --web /bin/ls ls.prof
+jeprof --web /bin/ls ls.prof
Displays annotated call-graph in web browser
-pprof --gv /bin/ls ls.prof
+jeprof --gv /bin/ls ls.prof
Displays annotated call-graph via 'gv'
-pprof --gv --focus=Mutex /bin/ls ls.prof
+jeprof --gv --focus=Mutex /bin/ls ls.prof
Restricts to code paths including a .*Mutex.* entry
-pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
+jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
Code paths including Mutex but not string
-pprof --list=getdir /bin/ls ls.prof
+jeprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
-pprof --disasm=getdir /bin/ls ls.prof
+jeprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
-pprof http://localhost:1234/
+jeprof http://localhost:1234/
Enters "interactive" mode
-pprof --text localhost:1234
+jeprof --text localhost:1234
Outputs one line per procedure for localhost:1234
-pprof --raw localhost:1234 > ./local.raw
-pprof --text ./local.raw
+jeprof --raw localhost:1234 > ./local.raw
+jeprof --text ./local.raw
Fetches a remote profile for later analysis and then
analyzes it in text mode.
EOF
@@ -270,7 +275,8 @@ EOF
sub version_string {
return <<EOF
-pprof (part of gperftools $PPROF_VERSION)
+jeprof (part of jemalloc $JEPROF_VERSION)
+based on pprof (part of gperftools $PPROF_VERSION)
Copyright 1998-2007 Google Inc.
@@ -293,8 +299,8 @@ sub Init() {
# Setup tmp-file name and handler to clean it up.
# We do this in the very beginning so that we can use
# error() and cleanup() function anytime here after.
- $main::tmpfile_sym = "/tmp/pprof$$.sym";
- $main::tmpfile_ps = "/tmp/pprof$$";
+ $main::tmpfile_sym = "/tmp/jeprof$$.sym";
+ $main::tmpfile_ps = "/tmp/jeprof$$";
$main::next_tmpfile = 0;
$SIG{'INT'} = \&sighandler;
@@ -332,9 +338,12 @@ sub Init() {
$main::opt_edgefraction = 0.001;
$main::opt_maxdegree = 8;
$main::opt_focus = '';
+ $main::opt_thread = undef;
$main::opt_ignore = '';
$main::opt_scale = 0;
$main::opt_heapcheck = 0;
+ $main::opt_retain = '';
+ $main::opt_exclude = '';
$main::opt_seconds = 30;
$main::opt_lib = "";
@@ -402,9 +411,12 @@ sub Init() {
"edgefraction=f" => \$main::opt_edgefraction,
"maxdegree=i" => \$main::opt_maxdegree,
"focus=s" => \$main::opt_focus,
+ "thread=s" => \$main::opt_thread,
"ignore=s" => \$main::opt_ignore,
"scale=i" => \$main::opt_scale,
"heapcheck" => \$main::opt_heapcheck,
+ "retain=s" => \$main::opt_retain,
+ "exclude=s" => \$main::opt_exclude,
"inuse_space!" => \$main::opt_inuse_space,
"inuse_objects!" => \$main::opt_inuse_objects,
"alloc_space!" => \$main::opt_alloc_space,
@@ -562,66 +574,12 @@ sub Init() {
}
}
-sub Main() {
- Init();
- $main::collected_profile = undef;
- @main::profile_files = ();
- $main::op_time = time();
-
- # Printing symbols is special and requires a lot less info that most.
- if ($main::opt_symbols) {
- PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin
- return;
- }
-
- # Fetch all profile data
- FetchDynamicProfiles();
-
- # this will hold symbols that we read from the profile files
- my $symbol_map = {};
-
- # Read one profile, pick the last item on the list
- my $data = ReadProfile($main::prog, pop(@main::profile_files));
- my $profile = $data->{profile};
- my $pcs = $data->{pcs};
- my $libs = $data->{libs}; # Info about main program and shared libraries
- $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
-
- # Add additional profiles, if available.
- if (scalar(@main::profile_files) > 0) {
- foreach my $pname (@main::profile_files) {
- my $data2 = ReadProfile($main::prog, $pname);
- $profile = AddProfile($profile, $data2->{profile});
- $pcs = AddPcs($pcs, $data2->{pcs});
- $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
- }
- }
-
- # Subtract base from profile, if specified
- if ($main::opt_base ne '') {
- my $base = ReadProfile($main::prog, $main::opt_base);
- $profile = SubtractProfile($profile, $base->{profile});
- $pcs = AddPcs($pcs, $base->{pcs});
- $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
- }
+sub FilterAndPrint {
+ my ($profile, $symbols, $libs, $thread) = @_;
# Get total data in profile
my $total = TotalProfile($profile);
- # Collect symbols
- my $symbols;
- if ($main::use_symbolized_profile) {
- $symbols = FetchSymbols($pcs, $symbol_map);
- } elsif ($main::use_symbol_page) {
- $symbols = FetchSymbols($pcs);
- } else {
- # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
- # which may differ from the data from subsequent profiles, especially
- # if they were run on different machines. Use appropriate libs for
- # each pc somehow.
- $symbols = ExtractSymbols($libs, $pcs);
- }
-
# Remove uniniteresting stack items
$profile = RemoveUninterestingFrames($symbols, $profile);
@@ -656,7 +614,9 @@ sub Main() {
# (only matters when --heapcheck is given but we must be
# compatible with old branches that did not pass --heapcheck always):
if ($total != 0) {
- printf("Total: %s %s\n", Unparse($total), Units());
+ printf("Total%s: %s %s\n",
+ (defined($thread) ? " (t$thread)" : ""),
+ Unparse($total), Units());
}
PrintText($symbols, $flat, $cumulative, -1);
} elsif ($main::opt_raw) {
@@ -692,6 +652,77 @@ sub Main() {
} else {
InteractiveMode($profile, $symbols, $libs, $total);
}
+}
+
+sub Main() {
+ Init();
+ $main::collected_profile = undef;
+ @main::profile_files = ();
+ $main::op_time = time();
+
+ # Printing symbols is special and requires a lot less info that most.
+ if ($main::opt_symbols) {
+ PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin
+ return;
+ }
+
+ # Fetch all profile data
+ FetchDynamicProfiles();
+
+ # this will hold symbols that we read from the profile files
+ my $symbol_map = {};
+
+ # Read one profile, pick the last item on the list
+ my $data = ReadProfile($main::prog, pop(@main::profile_files));
+ my $profile = $data->{profile};
+ my $pcs = $data->{pcs};
+ my $libs = $data->{libs}; # Info about main program and shared libraries
+ $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
+
+ # Add additional profiles, if available.
+ if (scalar(@main::profile_files) > 0) {
+ foreach my $pname (@main::profile_files) {
+ my $data2 = ReadProfile($main::prog, $pname);
+ $profile = AddProfile($profile, $data2->{profile});
+ $pcs = AddPcs($pcs, $data2->{pcs});
+ $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
+ }
+ }
+
+ # Subtract base from profile, if specified
+ if ($main::opt_base ne '') {
+ my $base = ReadProfile($main::prog, $main::opt_base);
+ $profile = SubtractProfile($profile, $base->{profile});
+ $pcs = AddPcs($pcs, $base->{pcs});
+ $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
+ }
+
+ # Collect symbols
+ my $symbols;
+ if ($main::use_symbolized_profile) {
+ $symbols = FetchSymbols($pcs, $symbol_map);
+ } elsif ($main::use_symbol_page) {
+ $symbols = FetchSymbols($pcs);
+ } else {
+ # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
+ # which may differ from the data from subsequent profiles, especially
+ # if they were run on different machines. Use appropriate libs for
+ # each pc somehow.
+ $symbols = ExtractSymbols($libs, $pcs);
+ }
+
+ if (!defined($main::opt_thread)) {
+ FilterAndPrint($profile, $symbols, $libs);
+ }
+ if (defined($data->{threads})) {
+ foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) {
+ if (defined($main::opt_thread) &&
+ ($main::opt_thread eq '*' || $main::opt_thread == $thread)) {
+ my $thread_profile = $data->{threads}{$thread};
+ FilterAndPrint($thread_profile, $symbols, $libs, $thread);
+ }
+ }
+ }
cleanup();
exit(0);
@@ -780,14 +811,14 @@ sub InteractiveMode {
$| = 1; # Make output unbuffered for interactive mode
my ($orig_profile, $symbols, $libs, $total) = @_;
- print STDERR "Welcome to pprof! For help, type 'help'.\n";
+ print STDERR "Welcome to jeprof! For help, type 'help'.\n";
# Use ReadLine if it's installed and input comes from a console.
if ( -t STDIN &&
!ReadlineMightFail() &&
defined(eval {require Term::ReadLine}) ) {
- my $term = new Term::ReadLine 'pprof';
- while ( defined ($_ = $term->readline('(pprof) '))) {
+ my $term = new Term::ReadLine 'jeprof';
+ while ( defined ($_ = $term->readline('(jeprof) '))) {
$term->addhistory($_) if /\S/;
if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
last; # exit when we get an interactive command to quit
@@ -795,7 +826,7 @@ sub InteractiveMode {
}
} else { # don't have readline
while (1) {
- print STDERR "(pprof) ";
+ print STDERR "(jeprof) ";
$_ = <STDIN>;
last if ! defined $_ ;
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -988,7 +1019,7 @@ sub ProcessProfile {
sub InteractiveHelpMessage {
print STDERR <<ENDOFHELP;
-Interactive pprof mode
+Interactive jeprof mode
Commands:
gv
@@ -1031,7 +1062,7 @@ Commands:
Generates callgrind file. If no filename is given, kcachegrind is called.
help - This listing
- quit or ^D - End pprof
+ quit or ^D - End jeprof
For commands that accept optional -ignore tags, samples where any routine in
the stack trace matches the regular expression in any of the -ignore
@@ -1136,8 +1167,21 @@ sub PrintSymbolizedProfile {
}
print '---', "\n";
- $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash
- my $profile_marker = $&;
+ my $profile_marker;
+ if ($main::profile_type eq 'heap') {
+ $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ $profile_marker = $&;
+ } elsif ($main::profile_type eq 'growth') {
+ $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ $profile_marker = $&;
+ } elsif ($main::profile_type eq 'contention') {
+ $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ $profile_marker = $&;
+ } else { # elsif ($main::profile_type eq 'cpu')
+ $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ $profile_marker = $&;
+ }
+
print '--- ', $profile_marker, "\n";
if (defined($main::collected_profile)) {
# if used with remote fetch, simply dump the collected profile to output.
@@ -1147,6 +1191,12 @@ sub PrintSymbolizedProfile {
}
close(SRC);
} else {
+ # --raw/http: For everything to work correctly for non-remote profiles, we
+ # would need to extend PrintProfileData() to handle all possible profile
+ # types, re-enable the code that is currently disabled in ReadCPUProfile()
+ # and FixCallerAddresses(), and remove the remote profile dumping code in
+ # the block above.
+ die "--raw/http: jeprof can only dump remote profiles for --raw\n";
# dump a cpu-format profile to standard out
PrintProfileData($profile);
}
@@ -1476,7 +1526,7 @@ h1 {
}
</style>
<script type="text/javascript">
-function pprof_toggle_asm(e) {
+function jeprof_toggle_asm(e) {
var target;
if (!e) e = window.event;
if (e.target) target = e.target;
@@ -1683,23 +1733,23 @@ sub PrintSource {
HtmlPrintNumber($c2),
UnparseAddress($offset, $e->[0]),
CleanDisassembly($e->[3]));
-
+
# Append the most specific source line associated with this instruction
if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) };
$dis = HtmlEscape($dis);
my $f = $e->[5];
my $l = $e->[6];
if ($f ne $last_dis_filename) {
- $dis .= sprintf("<span class=disasmloc>%s:%d</span>",
+ $dis .= sprintf("<span class=disasmloc>%s:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
} elsif ($l ne $last_dis_linenum) {
# De-emphasize the unchanged file name portion
$dis .= sprintf("<span class=unimportant>%s</span>" .
- "<span class=disasmloc>:%d</span>",
+ "<span class=disasmloc>:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
} else {
# De-emphasize the entire location
- $dis .= sprintf("<span class=unimportant>%s:%d</span>",
+ $dis .= sprintf("<span class=unimportant>%s:%d</span>",
HtmlEscape(CleanFileName($f)), $l);
}
$last_dis_filename = $f;
@@ -1745,7 +1795,7 @@ sub PrintSource {
if ($html) {
printf $output (
- "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" .
+ "<h1>%s</h1>%s\n<pre onClick=\"jeprof_toggle_asm()\">\n" .
"Total:%6s %6s (flat / cumulative %s)\n",
HtmlEscape(ShortFunctionName($routine)),
HtmlEscape(CleanFileName($filename)),
@@ -1788,8 +1838,8 @@ sub PrintSource {
if (defined($dis) && $dis ne '') {
$asm = "<span class=\"asm\">" . $dis . "</span>";
}
- my $source_class = (($n1 + $n2 > 0)
- ? "livesrc"
+ my $source_class = (($n1 + $n2 > 0)
+ ? "livesrc"
: (($asm ne "") ? "deadsrc" : "nop"));
printf $output (
"<span class=\"line\">%5d</span> " .
@@ -2797,6 +2847,43 @@ sub ExtractCalls {
return $calls;
}
+sub FilterFrames {
+ my $symbols = shift;
+ my $profile = shift;
+
+ if ($main::opt_retain eq '' && $main::opt_exclude eq '') {
+ return $profile;
+ }
+
+ my $result = {};
+ foreach my $k (keys(%{$profile})) {
+ my $count = $profile->{$k};
+ my @addrs = split(/\n/, $k);
+ my @path = ();
+ foreach my $a (@addrs) {
+ my $sym;
+ if (exists($symbols->{$a})) {
+ $sym = $symbols->{$a}->[0];
+ } else {
+ $sym = $a;
+ }
+ if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) {
+ next;
+ }
+ if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) {
+ next;
+ }
+ push(@path, $a);
+ }
+ if (scalar(@path) > 0) {
+ my $reduced_path = join("\n", @path);
+ AddEntry($result, $reduced_path, $count);
+ }
+ }
+
+ return $result;
+}
+
sub RemoveUninterestingFrames {
my $symbols = shift;
my $profile = shift;
@@ -2805,15 +2892,23 @@ sub RemoveUninterestingFrames {
my %skip = ();
my $skip_regexp = 'NOMATCH';
if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
- foreach my $name ('calloc',
+ foreach my $name ('@JEMALLOC_PREFIX@calloc',
'cfree',
- 'malloc',
- 'free',
- 'memalign',
- 'posix_memalign',
+ '@JEMALLOC_PREFIX@malloc',
+ 'newImpl',
+ 'void* newImpl',
+ '@JEMALLOC_PREFIX@free',
+ '@JEMALLOC_PREFIX@memalign',
+ '@JEMALLOC_PREFIX@posix_memalign',
+ '@JEMALLOC_PREFIX@aligned_alloc',
'pvalloc',
- 'valloc',
- 'realloc',
+ '@JEMALLOC_PREFIX@valloc',
+ '@JEMALLOC_PREFIX@realloc',
+ '@JEMALLOC_PREFIX@mallocx',
+ '@JEMALLOC_PREFIX@rallocx',
+ '@JEMALLOC_PREFIX@xallocx',
+ '@JEMALLOC_PREFIX@dallocx',
+ '@JEMALLOC_PREFIX@sdallocx',
'tc_calloc',
'tc_cfree',
'tc_malloc',
@@ -2923,6 +3018,10 @@ sub RemoveUninterestingFrames {
if (exists($symbols->{$a})) {
my $func = $symbols->{$a}->[0];
if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
+ # Throw away the portion of the backtrace seen so far, under the
+ # assumption that previous frames were for functions internal to the
+ # allocator.
+ @path = ();
next;
}
}
@@ -2931,6 +3030,9 @@ sub RemoveUninterestingFrames {
my $reduced_path = join("\n", @path);
AddEntry($result, $reduced_path, $count);
}
+
+ $result = FilterFrames($symbols, $result);
+
return $result;
}
@@ -3240,7 +3342,7 @@ sub ResolveRedirectionForCurl {
# Add a timeout flat to URL_FETCHER. Returns a new list.
sub AddFetchTimeout {
my $timeout = shift;
- my @fetcher = shift;
+ my @fetcher = @_;
if (defined($timeout)) {
if (join(" ", @fetcher) =~ m/\bcurl -s/) {
push(@fetcher, "--max-time", sprintf("%d", $timeout));
@@ -3286,6 +3388,27 @@ sub ReadSymbols {
return $map;
}
+sub URLEncode {
+ my $str = shift;
+ $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg;
+ return $str;
+}
+
+sub AppendSymbolFilterParams {
+ my $url = shift;
+ my @params = ();
+ if ($main::opt_retain ne '') {
+ push(@params, sprintf("retain=%s", URLEncode($main::opt_retain)));
+ }
+ if ($main::opt_exclude ne '') {
+ push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude)));
+ }
+ if (scalar @params > 0) {
+ $url = sprintf("%s?%s", $url, join("&", @params));
+ }
+ return $url;
+}
+
# Fetches and processes symbols to prepare them for use in the profile output
# code. If the optional 'symbol_map' arg is not given, fetches symbols from
# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols
@@ -3310,9 +3433,11 @@ sub FetchSymbols {
my $command_line;
if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
$url = ResolveRedirectionForCurl($url);
+ $url = AppendSymbolFilterParams($url);
$command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
$url);
} else {
+ $url = AppendSymbolFilterParams($url);
$command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
. " < " . ShellEscape($main::tmpfile_sym));
}
@@ -3393,15 +3518,25 @@ sub FetchDynamicProfile {
}
$url .= sprintf("seconds=%d", $main::opt_seconds);
$fetch_timeout = $main::opt_seconds * 1.01 + 60;
+ # Set $profile_type for consumption by PrintSymbolizedProfile.
+ $main::profile_type = 'cpu';
} else {
# For non-CPU profiles, we add a type-extension to
# the target profile file name.
my $suffix = $path;
$suffix =~ s,/,.,g;
$profile_file .= $suffix;
+ # Set $profile_type for consumption by PrintSymbolizedProfile.
+ if ($path =~ m/$HEAP_PAGE/) {
+ $main::profile_type = 'heap';
+ } elsif ($path =~ m/$GROWTH_PAGE/) {
+ $main::profile_type = 'growth';
+ } elsif ($path =~ m/$CONTENTION_PAGE/) {
+ $main::profile_type = 'contention';
+ }
}
- my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
+ my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof");
if (! -d $profile_dir) {
mkdir($profile_dir)
|| die("Unable to create profile directory $profile_dir: $!\n");
@@ -3617,7 +3752,7 @@ BEGIN {
# Reads the top, 'header' section of a profile, and returns the last
# line of the header, commonly called a 'header line'. The header
# section of a profile consists of zero or more 'command' lines that
-# are instructions to pprof, which pprof executes when reading the
+# are instructions to jeprof, which jeprof executes when reading the
# header. All 'command' lines start with a %. After the command
# lines is the 'header line', which is a profile-specific line that
# indicates what type of profile it is, and perhaps other global
@@ -3680,6 +3815,7 @@ sub IsSymbolizedProfileFile {
# $result->{version} Version number of profile file
# $result->{period} Sampling period (in microseconds)
# $result->{profile} Profile object
+# $result->{threads} Map of thread IDs to profile objects
# $result->{map} Memory map info from profile
# $result->{pcs} Hash of all PC values seen, key is hex address
sub ReadProfile {
@@ -3695,6 +3831,8 @@ sub ReadProfile {
my $symbol_marker = $&;
$PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash
my $profile_marker = $&;
+ $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ my $heap_marker = $&;
# Look at first line to see if it is a heap or a CPU profile.
# CPU profile may start with no header at all, and just binary data
@@ -3721,13 +3859,22 @@ sub ReadProfile {
$header = ReadProfileHeader(*PROFILE) || "";
}
+ if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) {
+ # Skip "--- ..." line for profile types that have their own headers.
+ $header = ReadProfileHeader(*PROFILE) || "";
+ }
+
$main::profile_type = '';
+
if ($header =~ m/^heap profile:.*$growth_marker/o) {
$main::profile_type = 'growth';
$result = ReadHeapProfile($prog, *PROFILE, $header);
} elsif ($header =~ m/^heap profile:/) {
$main::profile_type = 'heap';
$result = ReadHeapProfile($prog, *PROFILE, $header);
+ } elsif ($header =~ m/^heap/) {
+ $main::profile_type = 'heap';
+ $result = ReadThreadedHeapProfile($prog, $fname, $header);
} elsif ($header =~ m/^--- *$contention_marker/o) {
$main::profile_type = 'contention';
$result = ReadSynchProfile($prog, *PROFILE);
@@ -3770,9 +3917,9 @@ sub ReadProfile {
# independent implementation.
sub FixCallerAddresses {
my $stack = shift;
- if ($main::use_symbolized_profile) {
- return $stack;
- } else {
+ # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile()
+ # dumps unadjusted profiles.
+ {
$stack =~ /(\s)/;
my $delimiter = $1;
my @addrs = split(' ', $stack);
@@ -3840,12 +3987,7 @@ sub ReadCPUProfile {
for (my $j = 0; $j < $d; $j++) {
my $pc = $slots->get($i+$j);
# Subtract one from caller pc so we map back to call instr.
- # However, don't do this if we're reading a symbolized profile
- # file, in which case the subtract-one was done when the file
- # was written.
- if ($j > 0 && !$main::use_symbolized_profile) {
- $pc--;
- }
+ $pc--;
$pc = sprintf("%0*x", $address_length, $pc);
$pcs->{$pc} = 1;
push @k, $pc;
@@ -3870,11 +4012,7 @@ sub ReadCPUProfile {
return $r;
}
-sub ReadHeapProfile {
- my $prog = shift;
- local *PROFILE = shift;
- my $header = shift;
-
+sub HeapProfileIndex {
my $index = 1;
if ($main::opt_inuse_space) {
$index = 1;
@@ -3885,6 +4023,84 @@ sub ReadHeapProfile {
} elsif ($main::opt_alloc_objects) {
$index = 2;
}
+ return $index;
+}
+
+sub ReadMappedLibraries {
+ my $fh = shift;
+ my $map = "";
+ # Read the /proc/self/maps data
+ while (<$fh>) {
+ s/\r//g; # turn windows-looking lines into unix-looking lines
+ $map .= $_;
+ }
+ return $map;
+}
+
+sub ReadMemoryMap {
+ my $fh = shift;
+ my $map = "";
+ # Read /proc/self/maps data as formatted by DumpAddressMap()
+ my $buildvar = "";
+ while (<PROFILE>) {
+ s/\r//g; # turn windows-looking lines into unix-looking lines
+ # Parse "build=<dir>" specification if supplied
+ if (m/^\s*build=(.*)\n/) {
+ $buildvar = $1;
+ }
+
+ # Expand "$build" variable if available
+ $_ =~ s/\$build\b/$buildvar/g;
+
+ $map .= $_;
+ }
+ return $map;
+}
+
+sub AdjustSamples {
+ my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_;
+ if ($sample_adjustment) {
+ if ($sampling_algorithm == 2) {
+ # Remote-heap version 2
+ # The sampling frequency is the rate of a Poisson process.
+ # This means that the probability of sampling an allocation of
+ # size X with sampling rate Y is 1 - exp(-X/Y)
+ if ($n1 != 0) {
+ my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n1 *= $scale_factor;
+ $s1 *= $scale_factor;
+ }
+ if ($n2 != 0) {
+ my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ my $scale_factor = 1/(1 - exp(-$ratio));
+ $n2 *= $scale_factor;
+ $s2 *= $scale_factor;
+ }
+ } else {
+ # Remote-heap version 1
+ my $ratio;
+ $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n1 /= $ratio;
+ $s1 /= $ratio;
+ }
+ $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+ if ($ratio < 1) {
+ $n2 /= $ratio;
+ $s2 /= $ratio;
+ }
+ }
+ }
+ return ($n1, $s1, $n2, $s2);
+}
+
+sub ReadHeapProfile {
+ my $prog = shift;
+ local *PROFILE = shift;
+ my $header = shift;
+
+ my $index = HeapProfileIndex();
# Find the type of this profile. The header line looks like:
# heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053
@@ -3974,29 +4190,12 @@ sub ReadHeapProfile {
while (<PROFILE>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
if (/^MAPPED_LIBRARIES:/) {
- # Read the /proc/self/maps data
- while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
- $map .= $_;
- }
+ $map .= ReadMappedLibraries(*PROFILE);
last;
}
if (/^--- Memory map:/) {
- # Read /proc/self/maps data as formatted by DumpAddressMap()
- my $buildvar = "";
- while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
- # Parse "build=<dir>" specification if supplied
- if (m/^\s*build=(.*)\n/) {
- $buildvar = $1;
- }
-
- # Expand "$build" variable if available
- $_ =~ s/\$build\b/$buildvar/g;
-
- $map .= $_;
- }
+ $map .= ReadMemoryMap(*PROFILE);
last;
}
@@ -4007,43 +4206,85 @@ sub ReadHeapProfile {
if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
my $stack = $5;
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+ my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
+ $n1, $s1, $n2, $s2);
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+ }
+ }
- if ($sample_adjustment) {
- if ($sampling_algorithm == 2) {
- # Remote-heap version 2
- # The sampling frequency is the rate of a Poisson process.
- # This means that the probability of sampling an allocation of
- # size X with sampling rate Y is 1 - exp(-X/Y)
- if ($n1 != 0) {
- my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- my $scale_factor = 1/(1 - exp(-$ratio));
- $n1 *= $scale_factor;
- $s1 *= $scale_factor;
- }
- if ($n2 != 0) {
- my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- my $scale_factor = 1/(1 - exp(-$ratio));
- $n2 *= $scale_factor;
- $s2 *= $scale_factor;
- }
- } else {
- # Remote-heap version 1
- my $ratio;
- $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
- if ($ratio < 1) {
- $n1 /= $ratio;
- $s1 /= $ratio;
- }
- $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
- if ($ratio < 1) {
- $n2 /= $ratio;
- $s2 /= $ratio;
- }
+ my $r = {};
+ $r->{version} = "heap";
+ $r->{period} = 1;
+ $r->{profile} = $profile;
+ $r->{libs} = ParseLibraries($prog, $map, $pcs);
+ $r->{pcs} = $pcs;
+ return $r;
+}
+
+sub ReadThreadedHeapProfile {
+ my ($prog, $fname, $header) = @_;
+
+ my $index = HeapProfileIndex();
+ my $sampling_algorithm = 0;
+ my $sample_adjustment = 0;
+ chomp($header);
+ my $type = "unknown";
+ # Assuming a very specific type of header for now.
+ if ($header =~ m"^heap_v2/(\d+)") {
+ $type = "_v2";
+ $sampling_algorithm = 2;
+ $sample_adjustment = int($1);
+ }
+ if ($type ne "_v2" || !defined($sample_adjustment)) {
+ die "Threaded heap profiles require v2 sampling with a sample rate\n";
+ }
+
+ my $profile = {};
+ my $thread_profiles = {};
+ my $pcs = {};
+ my $map = "";
+ my $stack = "";
+
+ while (<PROFILE>) {
+ s/\r//g;
+ if (/^MAPPED_LIBRARIES:/) {
+ $map .= ReadMappedLibraries(*PROFILE);
+ last;
+ }
+
+ if (/^--- Memory map:/) {
+ $map .= ReadMemoryMap(*PROFILE);
+ last;
+ }
+
+ # Read entry of the form:
+ # @ a1 a2 ... an
+ # t*: <count1>: <bytes1> [<count2>: <bytes2>]
+ # t1: <count1>: <bytes1> [<count2>: <bytes2>]
+ # ...
+ # tn: <count1>: <bytes1> [<count2>: <bytes2>]
+ s/^\s*//;
+ s/\s*$//;
+ if (m/^@\s+(.*)$/) {
+ $stack = $1;
+ } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) {
+ if ($stack eq "") {
+ # Still in the header, so this is just a per-thread summary.
+ next;
+ }
+ my $thread = $2;
+ my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6);
+ my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
+ $n1, $s1, $n2, $s2);
+ if ($thread eq "*") {
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+ } else {
+ if (!exists($thread_profiles->{$thread})) {
+ $thread_profiles->{$thread} = {};
}
+ AddEntries($thread_profiles->{$thread}, $pcs,
+ FixCallerAddresses($stack), $counts[$index]);
}
-
- my @counts = ($n1, $s1, $n2, $s2);
- AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
}
}
@@ -4051,6 +4292,7 @@ sub ReadHeapProfile {
$r->{version} = "heap";
$r->{period} = 1;
$r->{profile} = $profile;
+ $r->{threads} = $thread_profiles;
$r->{libs} = ParseLibraries($prog, $map, $pcs);
$r->{pcs} = $pcs;
return $r;
@@ -4120,10 +4362,10 @@ sub ReadSynchProfile {
} elsif ($variable eq "sampling period") {
$sampling_period = $value;
} elsif ($variable eq "ms since reset") {
- # Currently nothing is done with this value in pprof
+ # Currently nothing is done with this value in jeprof
# So we just silently ignore it for now
} elsif ($variable eq "discarded samples") {
- # Currently nothing is done with this value in pprof
+ # Currently nothing is done with this value in jeprof
# So we just silently ignore it for now
} else {
printf STDERR ("Ignoring unnknown variable in /contention output: " .
@@ -4331,7 +4573,7 @@ sub ParseTextSectionHeader {
# Split /proc/pid/maps dump into a list of libraries
sub ParseLibraries {
return if $main::use_symbol_page; # We don't need libraries info.
- my $prog = shift;
+ my $prog = Cwd::abs_path(shift);
my $map = shift;
my $pcs = shift;
@@ -4364,6 +4606,16 @@ sub ParseLibraries {
$finish = HexExtend($2);
$offset = $zero_offset;
$lib = $3;
+ } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) {
+ # PIEs and address space randomization do not play well with our
+ # default assumption that main executable is at lowest
+ # addresses. So we're detecting main executable in
+ # /proc/self/maps as well.
+ $start = HexExtend($1);
+ $finish = HexExtend($2);
+ $offset = HexExtend($3);
+ $lib = $4;
+ $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths
}
# FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
# function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
@@ -4429,7 +4681,7 @@ sub ParseLibraries {
}
# Add two hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressAdd {
my $addr1 = shift;
my $addr2 = shift;
@@ -4483,7 +4735,7 @@ sub AddressAdd {
# Subtract two hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressSub {
my $addr1 = shift;
my $addr2 = shift;
@@ -4535,7 +4787,7 @@ sub AddressSub {
}
# Increment a hex addresses of length $address_length.
-# Run pprof --test for unit test if this is changed.
+# Run jeprof --test for unit test if this is changed.
sub AddressInc {
my $addr = shift;
my $sum;
@@ -4747,7 +4999,7 @@ sub MapToSymbols {
}
}
}
-
+
# Prepend to accumulated symbols for pcstr
# (so that caller comes before callee)
my $sym = $symbols->{$pcstr};
@@ -4853,7 +5105,7 @@ sub UnparseAddress {
# 32-bit or ELF 64-bit executable file. The location of the tools
# is determined by considering the following options in this order:
# 1) --tools option, if set
-# 2) PPROF_TOOLS environment variable, if set
+# 2) JEPROF_TOOLS environment variable, if set
# 3) the environment
sub ConfigureObjTools {
my $prog_file = shift;
@@ -4886,7 +5138,7 @@ sub ConfigureObjTools {
# For windows, we provide a version of nm and addr2line as part of
# the opensource release, which is capable of parsing
# Windows-style PDB executables. It should live in the path, or
- # in the same directory as pprof.
+ # in the same directory as jeprof.
$obj_tool_map{"nm_pdb"} = "nm-pdb";
$obj_tool_map{"addr2line_pdb"} = "addr2line-pdb";
}
@@ -4905,20 +5157,20 @@ sub ConfigureObjTools {
}
# Returns the path of a caller-specified object tool. If --tools or
-# PPROF_TOOLS are specified, then returns the full path to the tool
+# JEPROF_TOOLS are specified, then returns the full path to the tool
# with that prefix. Otherwise, returns the path unmodified (which
# means we will look for it on PATH).
sub ConfigureTool {
my $tool = shift;
my $path;
- # --tools (or $PPROF_TOOLS) is a comma separated list, where each
+ # --tools (or $JEPROF_TOOLS) is a comma separated list, where each
# item is either a) a pathname prefix, or b) a map of the form
# <tool>:<path>. First we look for an entry of type (b) for our
# tool. If one is found, we use it. Otherwise, we consider all the
# pathname prefixes in turn, until one yields an existing file. If
# none does, we use a default path.
- my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
+ my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || "";
if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
$path = $2;
# TODO(csilvers): sanity-check that $path exists? Hard if it's relative.
@@ -4932,16 +5184,16 @@ sub ConfigureTool {
}
if (!$path) {
error("No '$tool' found with prefix specified by " .
- "--tools (or \$PPROF_TOOLS) '$tools'\n");
+ "--tools (or \$JEPROF_TOOLS) '$tools'\n");
}
} else {
# ... otherwise use the version that exists in the same directory as
- # pprof. If there's nothing there, use $PATH.
+ # jeprof. If there's nothing there, use $PATH.
$0 =~ m,[^/]*$,; # this is everything after the last slash
my $dirname = $`; # this is everything up to and including the last slash
if (-x "$dirname$tool") {
$path = "$dirname$tool";
- } else {
+ } else {
$path = $tool;
}
}
@@ -4966,7 +5218,7 @@ sub cleanup {
unlink($main::tmpfile_sym);
unlink(keys %main::tempnames);
- # We leave any collected profiles in $HOME/pprof in case the user wants
+ # We leave any collected profiles in $HOME/jeprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
defined($main::collected_profile)) {
@@ -4975,7 +5227,7 @@ sub cleanup {
}
print STDERR "If you want to investigate this profile further, you can do:\n";
print STDERR "\n";
- print STDERR " pprof \\\n";
+ print STDERR " jeprof \\\n";
print STDERR " $main::prog \\\n";
print STDERR " $main::collected_profile\n";
print STDERR "\n";
@@ -5160,7 +5412,7 @@ sub GetProcedureBoundaries {
# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
# To make them more readable, we add underscores at interesting places.
# This routine removes the underscores, producing the canonical representation
-# used by pprof to represent addresses, particularly in the tested routines.
+# used by jeprof to represent addresses, particularly in the tested routines.
sub CanonicalHex {
my $arg = shift;
return join '', (split '_',$arg);
diff --git a/deps/jemalloc/config.guess b/deps/jemalloc/build-aux/config.guess
index b79252d6b..2e9ad7fe8 100755
--- a/deps/jemalloc/config.guess
+++ b/deps/jemalloc/build-aux/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2013 Free Software Foundation, Inc.
+# Copyright 1992-2016 Free Software Foundation, Inc.
-timestamp='2013-06-10'
+timestamp='2016-10-02'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -24,12 +24,12 @@ timestamp='2013-06-10'
# program. This Exception is an additional permission under section 7
# of the GNU General Public License, version 3 ("GPLv3").
#
-# Originally written by Per Bothner.
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
#
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
#
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
me=`echo "$0" | sed -e 's,.*/,,'`
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2013 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -149,7 +149,7 @@ Linux|GNU|GNU/*)
LIBC=gnu
#endif
EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+ eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
;;
esac
@@ -168,19 +168,29 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# Note: NetBSD doesn't particularly care about the vendor
# portion of the name. We always set it to "unknown".
sysctl="sysctl -n hw.machine_arch"
- UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
- /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+ UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+ /sbin/$sysctl 2>/dev/null || \
+ /usr/sbin/$sysctl 2>/dev/null || \
+ echo unknown)`
case "${UNAME_MACHINE_ARCH}" in
armeb) machine=armeb-unknown ;;
arm*) machine=arm-unknown ;;
sh3el) machine=shl-unknown ;;
sh3eb) machine=sh-unknown ;;
sh5el) machine=sh5le-unknown ;;
+ earmv*)
+ arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+ endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
+ machine=${arch}${endian}-unknown
+ ;;
*) machine=${UNAME_MACHINE_ARCH}-unknown ;;
esac
# The Operating System including object format, if it has switched
- # to ELF recently, or will in the future.
+ # to ELF recently (or will in the future) and ABI.
case "${UNAME_MACHINE_ARCH}" in
+ earm*)
+ os=netbsdelf
+ ;;
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -197,6 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
os=netbsd
;;
esac
+ # Determine ABI tags.
+ case "${UNAME_MACHINE_ARCH}" in
+ earm*)
+ expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+ abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+ ;;
+ esac
# The OS release
# Debian GNU/NetBSD machines have a different userland, and
# thus, need a distinct triplet. However, they do not need
@@ -207,13 +224,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
release='-gnu'
;;
*)
- release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+ release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
;;
esac
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
# contains redundant information, the shorter form:
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "${machine}-${os}${release}"
+ echo "${machine}-${os}${release}${abi}"
exit ;;
*:Bitrig:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@@ -223,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
exit ;;
+ *:LibertyBSD:*:*)
+ UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+ echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+ exit ;;
*:ekkoBSD:*:*)
echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
exit ;;
@@ -235,6 +256,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:MirBSD:*:*)
echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
exit ;;
+ *:Sortix:*:*)
+ echo ${UNAME_MACHINE}-unknown-sortix
+ exit ;;
alpha:OSF1:*:*)
case $UNAME_RELEASE in
*4.0)
@@ -251,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
case "$ALPHA_CPU_TYPE" in
"EV4 (21064)")
- UNAME_MACHINE="alpha" ;;
+ UNAME_MACHINE=alpha ;;
"EV4.5 (21064)")
- UNAME_MACHINE="alpha" ;;
+ UNAME_MACHINE=alpha ;;
"LCA4 (21066/21068)")
- UNAME_MACHINE="alpha" ;;
+ UNAME_MACHINE=alpha ;;
"EV5 (21164)")
- UNAME_MACHINE="alphaev5" ;;
+ UNAME_MACHINE=alphaev5 ;;
"EV5.6 (21164A)")
- UNAME_MACHINE="alphaev56" ;;
+ UNAME_MACHINE=alphaev56 ;;
"EV5.6 (21164PC)")
- UNAME_MACHINE="alphapca56" ;;
+ UNAME_MACHINE=alphapca56 ;;
"EV5.7 (21164PC)")
- UNAME_MACHINE="alphapca57" ;;
+ UNAME_MACHINE=alphapca57 ;;
"EV6 (21264)")
- UNAME_MACHINE="alphaev6" ;;
+ UNAME_MACHINE=alphaev6 ;;
"EV6.7 (21264A)")
- UNAME_MACHINE="alphaev67" ;;
+ UNAME_MACHINE=alphaev67 ;;
"EV6.8CB (21264C)")
- UNAME_MACHINE="alphaev68" ;;
+ UNAME_MACHINE=alphaev68 ;;
"EV6.8AL (21264B)")
- UNAME_MACHINE="alphaev68" ;;
+ UNAME_MACHINE=alphaev68 ;;
"EV6.8CX (21264D)")
- UNAME_MACHINE="alphaev68" ;;
+ UNAME_MACHINE=alphaev68 ;;
"EV6.9A (21264/EV69A)")
- UNAME_MACHINE="alphaev69" ;;
+ UNAME_MACHINE=alphaev69 ;;
"EV7 (21364)")
- UNAME_MACHINE="alphaev7" ;;
+ UNAME_MACHINE=alphaev7 ;;
"EV7.9 (21364A)")
- UNAME_MACHINE="alphaev79" ;;
+ UNAME_MACHINE=alphaev79 ;;
esac
# A Pn.n version is a patched version.
# A Vn.n version is a released version.
# A Tn.n version is a released field test version.
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+ echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
@@ -359,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
eval $set_cc_for_build
- SUN_ARCH="i386"
+ SUN_ARCH=i386
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
# This test works for both compilers.
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+ if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
- SUN_ARCH="x86_64"
+ SUN_ARCH=x86_64
fi
fi
echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
@@ -393,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
exit ;;
sun*:*:4.2BSD:*)
UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+ test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
case "`/bin/arch`" in
sun3)
echo m68k-sun-sunos${UNAME_RELEASE}
@@ -579,8 +603,9 @@ EOF
else
IBM_ARCH=powerpc
fi
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
+ if [ -x /usr/bin/lslpp ] ; then
+ IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+ awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
else
IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
fi
@@ -617,13 +642,13 @@ EOF
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
case "${sc_cpu_version}" in
- 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
- 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+ 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+ 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
532) # CPU_PA_RISC2_0
case "${sc_kernel_bits}" in
- 32) HP_ARCH="hppa2.0n" ;;
- 64) HP_ARCH="hppa2.0w" ;;
- '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
+ 32) HP_ARCH=hppa2.0n ;;
+ 64) HP_ARCH=hppa2.0w ;;
+ '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20
esac ;;
esac
fi
@@ -662,11 +687,11 @@ EOF
exit (0);
}
EOF
- (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+ (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa
fi ;;
esac
- if [ ${HP_ARCH} = "hppa2.0w" ]
+ if [ ${HP_ARCH} = hppa2.0w ]
then
eval $set_cc_for_build
@@ -679,12 +704,12 @@ EOF
# $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
# => hppa64-hp-hpux11.23
- if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+ if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
grep -q __LP64__
then
- HP_ARCH="hppa2.0w"
+ HP_ARCH=hppa2.0w
else
- HP_ARCH="hppa64"
+ HP_ARCH=hppa64
fi
fi
echo ${HP_ARCH}-hp-hpux${HPUX_REV}
@@ -789,14 +814,14 @@ EOF
echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
- FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+ FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+ FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+ FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+ FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
@@ -826,7 +851,7 @@ EOF
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
- i*:MSYS*:*)
+ *:MSYS*:*)
echo ${UNAME_MACHINE}-pc-msys
exit ;;
i*:windows32*:*)
@@ -878,7 +903,7 @@ EOF
exit ;;
*:GNU/*:*:*)
# other systems with GNU libc and userland
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+ echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
exit ;;
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
@@ -901,7 +926,7 @@ EOF
EV68*) UNAME_MACHINE=alphaev68 ;;
esac
objdump --private-headers /bin/sh | grep -q ld.so.1
- if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
arc:Linux:*:* | arceb:Linux:*:*)
@@ -932,6 +957,9 @@ EOF
crisv32:Linux:*:*)
echo ${UNAME_MACHINE}-axis-linux-${LIBC}
exit ;;
+ e2k:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
frv:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
@@ -944,6 +972,9 @@ EOF
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
+ k1om:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
m32r*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
@@ -969,10 +1000,13 @@ EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
- or1k:Linux:*:*)
+ mips64el:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
- or32:Linux:*:*)
+ openrisc*:Linux:*:*)
+ echo or1k-unknown-linux-${LIBC}
+ exit ;;
+ or32:Linux:*:* | or1k*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
padre:Linux:*:*)
@@ -1001,6 +1035,9 @@ EOF
ppcle:Linux:*:*)
echo powerpcle-unknown-linux-${LIBC}
exit ;;
+ riscv32:Linux:*:* | riscv64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
exit ;;
@@ -1020,7 +1057,7 @@ EOF
echo ${UNAME_MACHINE}-dec-linux-${LIBC}
exit ;;
x86_64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo ${UNAME_MACHINE}-pc-linux-${LIBC}
exit ;;
xtensa*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
@@ -1099,7 +1136,7 @@ EOF
# uname -m prints for DJGPP always 'pc', but it prints nothing about
# the processor, so we play safe by assuming i586.
# Note: whatever this is, it MUST be the same as what config.sub
- # prints for the "djgpp" host, or else GDB configury will decide that
+ # prints for the "djgpp" host, or else GDB configure will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
exit ;;
@@ -1248,6 +1285,9 @@ EOF
SX-8R:SUPER-UX:*:*)
echo sx8r-nec-superux${UNAME_RELEASE}
exit ;;
+ SX-ACE:SUPER-UX:*:*)
+ echo sxace-nec-superux${UNAME_RELEASE}
+ exit ;;
Power*:Rhapsody:*:*)
echo powerpc-apple-rhapsody${UNAME_RELEASE}
exit ;;
@@ -1260,22 +1300,32 @@ EOF
if test "$UNAME_PROCESSOR" = unknown ; then
UNAME_PROCESSOR=powerpc
fi
- if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- case $UNAME_PROCESSOR in
- i386) UNAME_PROCESSOR=x86_64 ;;
- powerpc) UNAME_PROCESSOR=powerpc64 ;;
- esac
+ if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
+ if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ case $UNAME_PROCESSOR in
+ i386) UNAME_PROCESSOR=x86_64 ;;
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
+ esac
+ fi
fi
+ elif test "$UNAME_PROCESSOR" = i386 ; then
+ # Avoid executing cc on OS X 10.9, as it ships with a stub
+ # that puts up a graphical alert prompting to install
+ # developer tools. Any system running Mac OS X 10.7 or
+ # later (Darwin 11 and later) is required to have a 64-bit
+ # processor. This is not true of the ARM version of Darwin
+ # that Apple uses in portable devices.
+ UNAME_PROCESSOR=x86_64
fi
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
exit ;;
*:procnto*:*:* | *:QNX:[0123456789]*:*)
UNAME_PROCESSOR=`uname -p`
- if test "$UNAME_PROCESSOR" = "x86"; then
+ if test "$UNAME_PROCESSOR" = x86; then
UNAME_PROCESSOR=i386
UNAME_MACHINE=pc
fi
@@ -1306,7 +1356,7 @@ EOF
# "uname -m" is not consistent, so use $cputype instead. 386
# is converted to i386 for consistency with other x86
# operating systems.
- if test "$cputype" = "386"; then
+ if test "$cputype" = 386; then
UNAME_MACHINE=i386
else
UNAME_MACHINE="$cputype"
@@ -1348,7 +1398,7 @@ EOF
echo i386-pc-xenix
exit ;;
i*86:skyos:*:*)
- echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+ echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
exit ;;
i*86:rdos:*:*)
echo ${UNAME_MACHINE}-pc-rdos
@@ -1359,171 +1409,25 @@ EOF
x86_64:VMkernel:*:*)
echo ${UNAME_MACHINE}-unknown-esx
exit ;;
-esac
-
-eval $set_cc_for_build
-cat >$dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
- /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
- I don't know.... */
- printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
- printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
- "4"
-#else
- ""
-#endif
- ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
- printf ("arm-acorn-riscix\n"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
- printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
- int version;
- version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
- if (version < 4)
- printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
- else
- printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
- exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
- printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
- printf ("ns32k-encore-mach\n"); exit (0);
-#else
- printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
- printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
- printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
- printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
- struct utsname un;
-
- uname(&un);
-
- if (strncmp(un.version, "V2", 2) == 0) {
- printf ("i386-sequent-ptx2\n"); exit (0);
- }
- if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
- printf ("i386-sequent-ptx1\n"); exit (0);
- }
- printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-# if !defined (ultrix)
-# include <sys/param.h>
-# if defined (BSD)
-# if BSD == 43
- printf ("vax-dec-bsd4.3\n"); exit (0);
-# else
-# if BSD == 199006
- printf ("vax-dec-bsd4.3reno\n"); exit (0);
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# endif
-# else
- printf ("vax-dec-bsd\n"); exit (0);
-# endif
-# else
- printf ("vax-dec-ultrix\n"); exit (0);
-# endif
-#endif
-
-#if defined (alliant) && defined (i860)
- printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
- exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
- { echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
- case `getsysinfo -f cpu_type` in
- c1*)
- echo c1-convex-bsd
- exit ;;
- c2*)
- if getsysinfo -f scalar_acc
- then echo c32-convex-bsd
- else echo c2-convex-bsd
- fi
- exit ;;
- c34*)
- echo c34-convex-bsd
- exit ;;
- c38*)
- echo c38-convex-bsd
- exit ;;
- c4*)
- echo c4-convex-bsd
+ amd64:Isilon\ OneFS:*:*)
+ echo x86_64-unknown-onefs
exit ;;
- esac
-fi
+esac
cat >&2 <<EOF
$0: unable to guess system type
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite
+config.guess and config.sub with the latest versions from:
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
and
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+ http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches@gnu.org> in order to provide the needed
-information to handle your system.
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
config.guess timestamp = $timestamp
diff --git a/deps/jemalloc/config.sub b/deps/jemalloc/build-aux/config.sub
index 61cb4bc22..dd2ca93c6 100755
--- a/deps/jemalloc/config.sub
+++ b/deps/jemalloc/build-aux/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2013 Free Software Foundation, Inc.
+# Copyright 1992-2016 Free Software Foundation, Inc.
-timestamp='2013-10-01'
+timestamp='2016-11-04'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -25,7 +25,7 @@ timestamp='2013-10-01'
# of the GNU General Public License, version 3 ("GPLv3").
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
#
# Configuration subroutine to validate and canonicalize a configuration type.
# Supply the specified configuration type as an argument.
@@ -33,7 +33,7 @@ timestamp='2013-10-01'
# Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
@@ -53,8 +53,7 @@ timestamp='2013-10-01'
me=`echo "$0" | sed -e 's,.*/,,'`
usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
- $0 [OPTION] ALIAS
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
Canonicalize a configuration name.
@@ -68,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2013 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -117,8 +116,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
- knetbsd*-gnu* | netbsd*-gnu* | \
- kopensolaris*-gnu* | \
+ knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+ kopensolaris*-gnu* | cloudabi*-eabi* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -255,12 +254,13 @@ case $basic_machine in
| arc | arceb \
| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
| avr | avr32 \
+ | ba \
| be32 | be64 \
| bfin \
| c4x | c8051 | clipper \
| d10v | d30v | dlx | dsp16xx \
- | epiphany \
- | fido | fr30 | frv \
+ | e2k | epiphany \
+ | fido | fr30 | frv | ft32 \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
| hexagon \
| i370 | i860 | i960 | ia64 \
@@ -283,8 +283,10 @@ case $basic_machine in
| mips64vr5900 | mips64vr5900el \
| mipsisa32 | mipsisa32el \
| mipsisa32r2 | mipsisa32r2el \
+ | mipsisa32r6 | mipsisa32r6el \
| mipsisa64 | mipsisa64el \
| mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64r6 | mipsisa64r6el \
| mipsisa64sb1 | mipsisa64sb1el \
| mipsisa64sr71k | mipsisa64sr71kel \
| mipsr5900 | mipsr5900el \
@@ -296,14 +298,15 @@ case $basic_machine in
| nds32 | nds32le | nds32be \
| nios | nios2 | nios2eb | nios2el \
| ns16k | ns32k \
- | open8 \
- | or1k | or32 \
+ | open8 | or1k | or1knd | or32 \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
+ | pru \
| pyramid \
+ | riscv32 | riscv64 \
| rl78 | rx \
| score \
- | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh64 | sh64le \
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
@@ -311,6 +314,7 @@ case $basic_machine in
| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| ubicom32 \
| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+ | visium \
| we32k \
| x86 | xc16x | xstormy16 | xtensa \
| z8k | z80)
@@ -325,6 +329,9 @@ case $basic_machine in
c6x)
basic_machine=tic6x-unknown
;;
+ leon|leon[3-9])
+ basic_machine=sparc-$basic_machine
+ ;;
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
basic_machine=$basic_machine-unknown
os=-none
@@ -370,12 +377,13 @@ case $basic_machine in
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \
+ | ba-* \
| be32-* | be64-* \
| bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* \
| c8051-* | clipper-* | craynv-* | cydra-* \
| d10v-* | d30v-* | dlx-* \
- | elxsi-* \
+ | e2k-* | elxsi-* \
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
| h8300-* | h8500-* \
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
@@ -402,8 +410,10 @@ case $basic_machine in
| mips64vr5900-* | mips64vr5900el-* \
| mipsisa32-* | mipsisa32el-* \
| mipsisa32r2-* | mipsisa32r2el-* \
+ | mipsisa32r6-* | mipsisa32r6el-* \
| mipsisa64-* | mipsisa64el-* \
| mipsisa64r2-* | mipsisa64r2el-* \
+ | mipsisa64r6-* | mipsisa64r6el-* \
| mipsisa64sb1-* | mipsisa64sb1el-* \
| mipsisa64sr71k-* | mipsisa64sr71kel-* \
| mipsr5900-* | mipsr5900el-* \
@@ -415,16 +425,19 @@ case $basic_machine in
| nios-* | nios2-* | nios2eb-* | nios2el-* \
| none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
+ | or1k*-* \
| orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+ | pru-* \
| pyramid-* \
+ | riscv32-* | riscv64-* \
| rl78-* | romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparclite-* \
- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
| tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
| tile*-* \
@@ -432,6 +445,7 @@ case $basic_machine in
| ubicom32-* \
| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
| vax-* \
+ | visium-* \
| we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \
@@ -508,6 +522,9 @@ case $basic_machine in
basic_machine=i386-pc
os=-aros
;;
+ asmjs)
+ basic_machine=asmjs-unknown
+ ;;
aux)
basic_machine=m68k-apple
os=-aux
@@ -628,6 +645,14 @@ case $basic_machine in
basic_machine=m68k-bull
os=-sysv3
;;
+ e500v[12])
+ basic_machine=powerpc-unknown
+ os=$os"spe"
+ ;;
+ e500v[12]-*)
+ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ os=$os"spe"
+ ;;
ebmon29k)
basic_machine=a29k-amd
os=-ebmon
@@ -769,6 +794,9 @@ case $basic_machine in
basic_machine=m68k-isi
os=-sysv
;;
+ leon-*|leon[3-9]-*)
+ basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
+ ;;
m68knommu)
basic_machine=m68k-unknown
os=-linux
@@ -824,6 +852,10 @@ case $basic_machine in
basic_machine=powerpc-unknown
os=-morphos
;;
+ moxiebox)
+ basic_machine=moxie-unknown
+ os=-moxiebox
+ ;;
msdos)
basic_machine=i386-pc
os=-msdos
@@ -1000,7 +1032,7 @@ case $basic_machine in
ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppcle | powerpclittle | ppc-le | powerpc-little)
+ ppcle | powerpclittle)
basic_machine=powerpcle-unknown
;;
ppcle-* | powerpclittle-*)
@@ -1010,7 +1042,7 @@ case $basic_machine in
;;
ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+ ppc64le | powerpc64little)
basic_machine=powerpc64le-unknown
;;
ppc64le-* | powerpc64little-*)
@@ -1356,27 +1388,28 @@ case $os in
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* | -plan9* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* | -aros* \
+ | -aos* | -aros* | -cloudabi* | -sortix* \
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -bitrig* | -openbsd* | -solidbsd* \
+ | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+ | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
- | -uxpv* | -beos* | -mpeix* | -udk* \
+ | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
+ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+ | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
@@ -1508,6 +1541,8 @@ case $os in
;;
-nacl*)
;;
+ -ios)
+ ;;
-none)
;;
*)
@@ -1594,9 +1629,6 @@ case $basic_machine in
mips*-*)
os=-elf
;;
- or1k-*)
- os=-elf
- ;;
or32-*)
os=-coff
;;
diff --git a/deps/jemalloc/install-sh b/deps/jemalloc/build-aux/install-sh
index ebc66913e..ebc66913e 100755
--- a/deps/jemalloc/install-sh
+++ b/deps/jemalloc/build-aux/install-sh
diff --git a/deps/jemalloc/configure b/deps/jemalloc/configure
index 2e5496bfb..6aebfad0d 100755
--- a/deps/jemalloc/configure
+++ b/deps/jemalloc/configure
@@ -625,6 +625,7 @@ cfgoutputs_out
cfgoutputs_in
cfghdrs_out
cfghdrs_in
+enable_initial_exec_tls
enable_zone_allocator
enable_tls
enable_lazy_lock
@@ -634,22 +635,19 @@ jemalloc_version_bugfix
jemalloc_version_minor
jemalloc_version_major
jemalloc_version
+enable_log
+enable_cache_oblivious
enable_xmalloc
-enable_valgrind
enable_utrace
enable_fill
-enable_dss
-enable_munmap
-enable_mremap
-enable_tcache
enable_prof
enable_stats
enable_debug
je_
install_suffix
private_namespace
-enable_code_coverage
-enable_experimental
+JEMALLOC_CPREFIX
+JEMALLOC_PREFIX
AUTOCONF
LD
RANLIB
@@ -658,16 +656,20 @@ INSTALL_SCRIPT
INSTALL_PROGRAM
enable_autogen
RPATH_EXTRA
+LM
CC_MM
+DUMP_SYMS
AROUT
ARFLAGS
MKLIB
+TEST_LD_MODE
LDTARGET
CTARGET
PIC_CFLAGS
SOREV
EXTRA_LDFLAGS
DSO_LDFLAGS
+link_whole_archive
libprefix
exe
a
@@ -677,6 +679,8 @@ so
LD_PRELOAD_VAR
RPATH
abi
+AWK
+NM
AR
host_os
host_vendor
@@ -688,7 +692,18 @@ build_cpu
build
EGREP
GREP
+EXTRA_CXXFLAGS
+SPECIFIED_CXXFLAGS
+CONFIGURE_CXXFLAGS
+enable_cxx
+HAVE_CXX14
+ac_ct_CXX
+CXXFLAGS
+CXX
CPP
+EXTRA_CFLAGS
+SPECIFIED_CFLAGS
+CONFIGURE_CFLAGS
OBJEXT
EXEEXT
ac_ct_CC
@@ -709,6 +724,7 @@ objroot
abs_srcroot
srcroot
rev
+CONFIG
target_alias
host_alias
build_alias
@@ -751,35 +767,37 @@ ac_subst_files=''
ac_user_opts='
enable_option_checking
with_xslroot
+enable_cxx
+with_lg_vaddr
with_rpath
enable_autogen
-enable_experimental
-enable_code_coverage
with_mangling
with_jemalloc_prefix
with_export
with_private_namespace
with_install_suffix
-enable_cc_silence
+with_malloc_conf
enable_debug
-enable_ivsalloc
enable_stats
enable_prof
enable_prof_libunwind
with_static_libunwind
enable_prof_libgcc
enable_prof_gcc
-enable_tcache
-enable_mremap
-enable_munmap
-enable_dss
enable_fill
enable_utrace
-enable_valgrind
enable_xmalloc
+enable_cache_oblivious
+enable_log
+with_lg_quantum
+with_lg_page
+with_lg_hugepage
+with_lg_page_sizes
+with_version
+enable_syscall
enable_lazy_lock
-enable_tls
enable_zone_allocator
+enable_initial_exec_tls
'
ac_precious_vars='build_alias
host_alias
@@ -789,7 +807,10 @@ CFLAGS
LDFLAGS
LIBS
CPPFLAGS
-CPP'
+CPP
+CXX
+CXXFLAGS
+CCC'
# Initialize some variables set by options.
@@ -1401,35 +1422,34 @@ Optional Features:
--disable-option-checking ignore unrecognized --enable/--with options
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
+ --disable-cxx Disable C++ integration
--enable-autogen Automatically regenerate configure output
- --disable-experimental Disable support for the experimental API
- --enable-code-coverage Enable code coverage
- --enable-cc-silence Silence irrelevant compiler warnings
- --enable-debug Build debugging code (implies --enable-ivsalloc)
- --enable-ivsalloc Validate pointers passed through the public API
+ --enable-debug Build debugging code
--disable-stats Disable statistics calculation/reporting
--enable-prof Enable allocation profiling
--enable-prof-libunwind Use libunwind for backtracing
--disable-prof-libgcc Do not use libgcc for backtracing
--disable-prof-gcc Do not use gcc intrinsics for backtracing
- --disable-tcache Disable per thread caches
- --enable-mremap Enable mremap(2) for huge realloc()
- --disable-munmap Disable VM deallocation via munmap(2)
- --enable-dss Enable allocation from DSS
- --disable-fill Disable support for junk/zero filling, quarantine,
- and redzones
+ --disable-fill Disable support for junk/zero filling
--enable-utrace Enable utrace(2)-based tracing
- --disable-valgrind Disable support for Valgrind
--enable-xmalloc Support xmalloc option
+ --disable-cache-oblivious
+ Disable support for cache-oblivious allocation
+ alignment
+ --enable-log Support debug logging
+ --disable-syscall Disable use of syscall(2)
--enable-lazy-lock Enable lazy locking (only lock when multi-threaded)
- --disable-tls Disable thread-local storage (__thread keyword)
--disable-zone-allocator
Disable zone allocator for Darwin
+ --disable-initial-exec-tls
+ Disable the initial-exec tls model
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
--with-xslroot=<path> XSL stylesheet root path
+ --with-lg-vaddr=<lg-vaddr>
+ Number of significant virtual address bits
--with-rpath=<rpath> Colon-separated rpath (ELF systems only)
--with-mangling=<map> Mangle symbols in <map>
--with-jemalloc-prefix=<prefix>
@@ -1439,9 +1459,21 @@ Optional Packages:
Prefix to prepend to all library-private APIs
--with-install-suffix=<suffix>
Suffix to append to all installed files
+ --with-malloc-conf=<malloc_conf>
+ config.malloc_conf options string
--with-static-libunwind=<libunwind.a>
Path to static libunwind library; use rather than
dynamically linking
+ --with-lg-quantum=<lg-quantum>
+ Base 2 log of minimum allocation alignment
+ --with-lg-page=<lg-page>
+ Base 2 log of system page size
+ --with-lg-hugepage=<lg-hugepage>
+ Base 2 log of system huge page size
+ --with-lg-page-sizes=<lg-page-sizes>
+ Base 2 logs of system page sizes to support
+ --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+ Version string
Some influential environment variables:
CC C compiler command
@@ -1452,6 +1484,8 @@ Some influential environment variables:
CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
you have headers in a nonstandard directory <include dir>
CPP C preprocessor
+ CXX C++ compiler command
+ CXXFLAGS C++ compiler flags
Use these variables to override the choices made by `configure' or to help
it to find libraries and programs with nonstandard names/locations.
@@ -1608,6 +1642,90 @@ fi
} # ac_fn_c_try_cpp
+# ac_fn_cxx_try_compile LINENO
+# ----------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_cxx_try_compile ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext
+ if { { ac_try="$ac_compile"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compile") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_cxx_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest.$ac_objext; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_cxx_try_compile
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext conftest$ac_exeext
+ if { { ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_link") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ test -x conftest$ac_exeext
+ }; then :
+ ac_retval=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ ac_retval=1
+fi
+ # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+ # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+ # interfere with the next link command; also delete a directory that is
+ # left behind by Apple's compiler. We do this before executing the actions.
+ rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+ as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
# ac_fn_c_try_run LINENO
# ----------------------
# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
@@ -1864,119 +1982,6 @@ rm -f conftest.val
} # ac_fn_c_compute_int
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
- if { { ac_try="$ac_link"
-case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
- ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- test -x conftest$ac_exeext
- }; then :
- ac_retval=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- ac_retval=1
-fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
- rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
- as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_link
-
-# ac_fn_c_check_func LINENO FUNC VAR
-# ----------------------------------
-# Tests whether FUNC exists, setting the cache variable VAR accordingly
-ac_fn_c_check_func ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
- For example, HP-UX 11i <limits.h> declares gettimeofday. */
-#define $2 innocuous_$2
-
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $2 (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $2
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char $2 ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined __stub_$2 || defined __stub___$2
-choke me
-#endif
-
-int
-main ()
-{
-return $2 ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- eval "$3=yes"
-else
- eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_func
-
# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
# -------------------------------------------------------
# Tests whether HEADER exists, giving a warning if it cannot be compiled using
@@ -2064,6 +2069,73 @@ fi
} # ac_fn_c_check_header_mongrel
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+ For example, HP-UX 11i <limits.h> declares gettimeofday. */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $2 (); below.
+ Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+ <limits.h> exists even on freestanding compilers. */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ eval "$3=yes"
+else
+ eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
# -------------------------------------------
# Tests whether TYPE exists after having included INCLUDES, setting cache
@@ -2470,13 +2542,59 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
+ac_aux_dir=
+for ac_dir in build-aux "$srcdir"/build-aux; do
+ if test -f "$ac_dir/install-sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f "$ac_dir/install.sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f "$ac_dir/shtool"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ as_fn_error $? "cannot find install-sh, install.sh, or shtool in build-aux \"$srcdir\"/build-aux" "$LINENO" 5
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var.
+
+
+
+
+
+
+
+
+CONFIGURE_CFLAGS=
+SPECIFIED_CFLAGS="${CFLAGS}"
+CONFIGURE_CXXFLAGS=
+SPECIFIED_CXXFLAGS="${CXXFLAGS}"
-rev=1
+
+
+
+
+CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
+
+
+rev=2
srcroot=$srcdir
@@ -3373,6 +3491,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
if test "x$GCC" != "xyes" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5
@@ -3406,18 +3525,155 @@ fi
$as_echo "$je_cv_msvc" >&6; }
fi
-if test "x$CFLAGS" = "x" ; then
- no_CFLAGS="yes"
- if test "x$GCC" = "xyes" ; then
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+ case "${CC}" in
+ CC|cc)
+ je_cv_cray_prgenv_wrapper="yes"
+ ;;
+ *)
+ ;;
+ esac
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is cray" >&5
+$as_echo_n "checking whether compiler is cray... " >&6; }
+if ${je_cv_cray+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+#ifndef _CRAYC
+ int fail-1;
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cray=yes
+else
+ je_cv_cray=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray" >&5
+$as_echo "$je_cv_cray" >&6; }
+
+if test "x${je_cv_cray}" = "xyes" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cray compiler version is 8.4" >&5
+$as_echo_n "checking whether cray compiler version is 8.4... " >&6; }
+if ${je_cv_cray_84+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+ int fail-1;
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cray_84=yes
+else
+ je_cv_cray_84=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_cray_84" >&5
+$as_echo "$je_cv_cray_84" >&6; }
+fi
+
+if test "x$GCC" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu11" >&5
+$as_echo_n "checking whether compiler supports -std=gnu11... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-std=gnu11
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-std=gnu11
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+ if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
+ cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_HAS_RESTRICT 1
+_ACEOF
+
+ else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5
$as_echo_n "checking whether compiler supports -std=gnu99... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-std=gnu99"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-std=gnu99
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -std=gnu99"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3433,33 +3689,49 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-std=gnu99
+ je_cv_cflags_added=-std=gnu99
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
- if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+ if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
cat >>confdefs.h <<_ACEOF
#define JEMALLOC_HAS_RESTRICT 1
_ACEOF
fi
+ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5
$as_echo_n "checking whether compiler supports -Wall... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-Wall"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Wall
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
else
- CFLAGS="${CFLAGS} -Wall"
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3475,27 +3747,246 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-Wall
+ je_cv_cflags_added=-Wall
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wshorten-64-to-32" >&5
+$as_echo_n "checking whether compiler supports -Wshorten-64-to-32... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Wshorten-64-to-32
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Wshorten-64-to-32
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wsign-compare" >&5
+$as_echo_n "checking whether compiler supports -Wsign-compare... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Wsign-compare
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Wsign-compare
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wundef" >&5
+$as_echo_n "checking whether compiler supports -Wundef... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Wundef
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Wundef
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wno-format-zero-length" >&5
+$as_echo_n "checking whether compiler supports -Wno-format-zero-length... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Wno-format-zero-length
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Wno-format-zero-length
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
$as_echo_n "checking whether compiler supports -pipe... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-pipe"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-pipe
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
else
- CFLAGS="${CFLAGS} -pipe"
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3511,27 +4002,42 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-pipe
+ je_cv_cflags_added=-pipe
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -g3" >&5
$as_echo_n "checking whether compiler supports -g3... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-g3"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-g3
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
else
- CFLAGS="${CFLAGS} -g3"
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3547,29 +4053,44 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-g3
+ je_cv_cflags_added=-g3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
- elif test "x$je_cv_msvc" = "xyes" ; then
- CC="$CC -nologo"
+
+elif test "x$je_cv_msvc" = "xyes" ; then
+ CC="$CC -nologo"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Zi" >&5
$as_echo_n "checking whether compiler supports -Zi... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-Zi"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Zi
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -Zi"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3585,27 +4106,42 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-Zi
+ je_cv_cflags_added=-Zi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -MT" >&5
$as_echo_n "checking whether compiler supports -MT... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-MT"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-MT
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -MT"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3621,27 +4157,93 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-MT
+ je_cv_cflags_added=-MT
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -W3" >&5
$as_echo_n "checking whether compiler supports -W3... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-W3"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-W3
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-W3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -FS" >&5
+$as_echo_n "checking whether compiler supports -FS... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-FS
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -W3"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3657,31 +4259,207 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-W3
+ je_cv_cflags_added=-FS
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+ T_APPEND_V=-I${srcdir}/include/msvc_compat
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
+fi
+if test "x$je_cv_cray" = "xyes" ; then
+ if test "x$je_cv_cray_84" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hipa2" >&5
+$as_echo_n "checking whether compiler supports -hipa2... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-hipa2
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-hipa2
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnognu" >&5
+$as_echo_n "checking whether compiler supports -hnognu... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-hnognu
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-hnognu
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
- CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=128" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=128... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-hnomessage=128
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-hnomessage=128
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
fi
-if test "x$EXTRA_CFLAGS" != "x" ; then
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports $EXTRA_CFLAGS" >&5
-$as_echo_n "checking whether compiler supports $EXTRA_CFLAGS... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="$EXTRA_CFLAGS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -hnomessage=1357" >&5
+$as_echo_n "checking whether compiler supports -hnomessage=1357... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-hnomessage=1357
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} $EXTRA_CFLAGS"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -3697,19 +4475,28 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=$EXTRA_CFLAGS
+ je_cv_cflags_added=-hnomessage=1357
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
fi
+
+
+
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -3848,6 +4635,1422 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
ac_compiler_gnu=$ac_cv_c_compiler_gnu
+# Check whether --enable-cxx was given.
+if test "${enable_cxx+set}" = set; then :
+ enableval=$enable_cxx; if test "x$enable_cxx" = "xno" ; then
+ enable_cxx="0"
+else
+ enable_cxx="1"
+fi
+
+else
+ enable_cxx="1"
+
+fi
+
+if test "x$enable_cxx" = "x1" ; then
+ # ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
+#
+# DESCRIPTION
+#
+# Check for baseline language coverage in the compiler for the specified
+# version of the C++ standard. If necessary, add switches to CXX and
+# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard)
+# or '14' (for the C++14 standard).
+#
+# The second argument, if specified, indicates whether you insist on an
+# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
+# -std=c++11). If neither is specified, you get whatever works, with
+# preference for an extended mode.
+#
+# The third argument, if specified 'mandatory' or if left unspecified,
+# indicates that baseline support for the specified C++ standard is
+# required and that the macro should error out if no mode with that
+# support is found. If specified 'optional', then configuration proceeds
+# regardless, after defining HAVE_CXX${VERSION} if and only if a
+# supporting mode is found.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
+# Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
+# Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
+# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
+# Copyright (c) 2015 Paul Norman <penorman@mac.com>
+# Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+ if test -n "$CCC"; then
+ CXX=$CCC
+ else
+ if test -n "$ac_tool_prefix"; then
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+ do
+ # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$CXX"; then
+ ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5
+$as_echo "$CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$CXX" && break
+ done
+fi
+if test -z "$CXX"; then
+ ac_ct_CXX=$CXX
+ for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_CXX"; then
+ ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_CXX="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5
+$as_echo "$ac_ct_CXX" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$ac_ct_CXX" && break
+done
+
+ if test "x$ac_ct_CXX" = x; then
+ CXX="g++"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ CXX=$ac_ct_CXX
+ fi
+fi
+
+ fi
+fi
+# Provide some information about the compiler.
+$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion; do
+ { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+ ac_status=$?
+ if test -s conftest.err; then
+ sed '10a\
+... rest of stderr output deleted ...
+ 10q' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ fi
+ rm -f conftest.er1 conftest.err
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }
+done
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
+$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+#ifndef __GNUC__
+ choke me
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_compiler_gnu=yes
+else
+ ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5
+$as_echo "$ac_cv_cxx_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+ GXX=yes
+else
+ GXX=
+fi
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
+$as_echo_n "checking whether $CXX accepts -g... " >&6; }
+if ${ac_cv_prog_cxx_g+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+ ac_cxx_werror_flag=yes
+ ac_cv_prog_cxx_g=no
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+else
+ CXXFLAGS=""
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+
+else
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+ CXXFLAGS="-g"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ac_cv_prog_cxx_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5
+$as_echo "$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+ CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+ if test "$GXX" = yes; then
+ CXXFLAGS="-g -O2"
+ else
+ CXXFLAGS="-g"
+ fi
+else
+ if test "$GXX" = yes; then
+ CXXFLAGS="-O2"
+ else
+ CXXFLAGS=
+ fi
+fi
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+ ax_cxx_compile_cxx14_required=false
+ ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+ ac_success=no
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++14 features by default" >&5
+$as_echo_n "checking whether $CXX supports C++14 features by default... " >&6; }
+if ${ax_cv_cxx_compile_cxx14+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+ namespace test_static_assert
+ {
+
+ template <typename T>
+ struct check
+ {
+ static_assert(sizeof(int) <= sizeof(T), "not big enough");
+ };
+
+ }
+
+ namespace test_final_override
+ {
+
+ struct Base
+ {
+ virtual void f() {}
+ };
+
+ struct Derived : public Base
+ {
+ virtual void f() override {}
+ };
+
+ }
+
+ namespace test_double_right_angle_brackets
+ {
+
+ template < typename T >
+ struct check {};
+
+ typedef check<void> single_type;
+ typedef check<check<void>> double_type;
+ typedef check<check<check<void>>> triple_type;
+ typedef check<check<check<check<void>>>> quadruple_type;
+
+ }
+
+ namespace test_decltype
+ {
+
+ int
+ f()
+ {
+ int a = 1;
+ decltype(a) b = 2;
+ return a + b;
+ }
+
+ }
+
+ namespace test_type_deduction
+ {
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static const bool value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static const bool value = true;
+ };
+
+ template < typename T1, typename T2 >
+ auto
+ add(T1 a1, T2 a2) -> decltype(a1 + a2)
+ {
+ return a1 + a2;
+ }
+
+ int
+ test(const int c, volatile int v)
+ {
+ static_assert(is_same<int, decltype(0)>::value == true, "");
+ static_assert(is_same<int, decltype(c)>::value == false, "");
+ static_assert(is_same<int, decltype(v)>::value == false, "");
+ auto ac = c;
+ auto av = v;
+ auto sumi = ac + av + 'x';
+ auto sumf = ac + av + 1.0;
+ static_assert(is_same<int, decltype(ac)>::value == true, "");
+ static_assert(is_same<int, decltype(av)>::value == true, "");
+ static_assert(is_same<int, decltype(sumi)>::value == true, "");
+ static_assert(is_same<int, decltype(sumf)>::value == false, "");
+ static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+ return (sumf > 0.0) ? sumi : add(c, v);
+ }
+
+ }
+
+ namespace test_noexcept
+ {
+
+ int f() { return 0; }
+ int g() noexcept { return 0; }
+
+ static_assert(noexcept(f()) == false, "");
+ static_assert(noexcept(g()) == true, "");
+
+ }
+
+ namespace test_constexpr
+ {
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+ {
+ return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+ }
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c(const CharT *const s) noexcept
+ {
+ return strlen_c_r(s, 0UL);
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("1") == 1UL, "");
+ static_assert(strlen_c("example") == 7UL, "");
+ static_assert(strlen_c("another\0example") == 7UL, "");
+
+ }
+
+ namespace test_rvalue_references
+ {
+
+ template < int N >
+ struct answer
+ {
+ static constexpr int value = N;
+ };
+
+ answer<1> f(int&) { return answer<1>(); }
+ answer<2> f(const int&) { return answer<2>(); }
+ answer<3> f(int&&) { return answer<3>(); }
+
+ void
+ test()
+ {
+ int i = 0;
+ const int c = 0;
+ static_assert(decltype(f(i))::value == 1, "");
+ static_assert(decltype(f(c))::value == 2, "");
+ static_assert(decltype(f(0))::value == 3, "");
+ }
+
+ }
+
+ namespace test_uniform_initialization
+ {
+
+ struct test
+ {
+ static const int zero {};
+ static const int one {1};
+ };
+
+ static_assert(test::zero == 0, "");
+ static_assert(test::one == 1, "");
+
+ }
+
+ namespace test_lambdas
+ {
+
+ void
+ test1()
+ {
+ auto lambda1 = [](){};
+ auto lambda2 = lambda1;
+ lambda1();
+ lambda2();
+ }
+
+ int
+ test2()
+ {
+ auto a = [](int i, int j){ return i + j; }(1, 2);
+ auto b = []() -> int { return '0'; }();
+ auto c = [=](){ return a + b; }();
+ auto d = [&](){ return c; }();
+ auto e = [a, &b](int x) mutable {
+ const auto identity = [](int y){ return y; };
+ for (auto i = 0; i < a; ++i)
+ a += b--;
+ return x + identity(a + b);
+ }(0);
+ return a + b + c + d + e;
+ }
+
+ int
+ test3()
+ {
+ const auto nullary = [](){ return 0; };
+ const auto unary = [](int x){ return x; };
+ using nullary_t = decltype(nullary);
+ using unary_t = decltype(unary);
+ const auto higher1st = [](nullary_t f){ return f(); };
+ const auto higher2nd = [unary](nullary_t f1){
+ return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+ };
+ return higher1st(nullary) + higher2nd(nullary)(unary);
+ }
+
+ }
+
+ namespace test_variadic_templates
+ {
+
+ template <int...>
+ struct sum;
+
+ template <int N0, int... N1toN>
+ struct sum<N0, N1toN...>
+ {
+ static constexpr auto value = N0 + sum<N1toN...>::value;
+ };
+
+ template <>
+ struct sum<>
+ {
+ static constexpr auto value = 0;
+ };
+
+ static_assert(sum<>::value == 0, "");
+ static_assert(sum<1>::value == 1, "");
+ static_assert(sum<23>::value == 23, "");
+ static_assert(sum<1, 2>::value == 3, "");
+ static_assert(sum<5, 5, 11>::value == 21, "");
+ static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+ }
+
+ // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+ // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+ // because of this.
+ namespace test_template_alias_sfinae
+ {
+
+ struct foo {};
+
+ template<typename T>
+ using member = typename T::member_type;
+
+ template<typename T>
+ void func(...) {}
+
+ template<typename T>
+ void func(member<T>*) {}
+
+ void test();
+
+ void test() { func<foo>(0); }
+
+ }
+
+} // namespace cxx11
+
+#endif // __cplusplus >= 201103L
+
+
+
+
+// If the compiler admits that it is not ready for C++14, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201402L
+
+#error "This is not a C++14 compiler"
+
+#else
+
+namespace cxx14
+{
+
+ namespace test_polymorphic_lambdas
+ {
+
+ int
+ test()
+ {
+ const auto lambda = [](auto&&... args){
+ const auto istiny = [](auto x){
+ return (sizeof(x) == 1UL) ? 1 : 0;
+ };
+ const int aretiny[] = { istiny(args)... };
+ return aretiny[0];
+ };
+ return lambda(1, 1L, 1.0f, '1');
+ }
+
+ }
+
+ namespace test_binary_literals
+ {
+
+ constexpr auto ivii = 0b0000000000101010;
+ static_assert(ivii == 42, "wrong value");
+
+ }
+
+ namespace test_generalized_constexpr
+ {
+
+ template < typename CharT >
+ constexpr unsigned long
+ strlen_c(const CharT *const s) noexcept
+ {
+ auto length = 0UL;
+ for (auto p = s; *p; ++p)
+ ++length;
+ return length;
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("x") == 1UL, "");
+ static_assert(strlen_c("test") == 4UL, "");
+ static_assert(strlen_c("another\0test") == 7UL, "");
+
+ }
+
+ namespace test_lambda_init_capture
+ {
+
+ int
+ test()
+ {
+ auto x = 0;
+ const auto lambda1 = [a = x](int b){ return a + b; };
+ const auto lambda2 = [a = lambda1(x)](){ return a; };
+ return lambda2();
+ }
+
+ }
+
+ namespace test_digit_seperators
+ {
+
+ constexpr auto ten_million = 100'000'000;
+ static_assert(ten_million == 100000000, "");
+
+ }
+
+ namespace test_return_type_deduction
+ {
+
+ auto f(int& x) { return x; }
+ decltype(auto) g(int& x) { return x; }
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static constexpr auto value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static constexpr auto value = true;
+ };
+
+ int
+ test()
+ {
+ auto x = 0;
+ static_assert(is_same<int, decltype(f(x))>::value, "");
+ static_assert(is_same<int&, decltype(g(x))>::value, "");
+ return x;
+ }
+
+ }
+
+} // namespace cxx14
+
+#endif // __cplusplus >= 201402L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ ax_cv_cxx_compile_cxx14=yes
+else
+ ax_cv_cxx_compile_cxx14=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_cxx_compile_cxx14" >&5
+$as_echo "$ax_cv_cxx_compile_cxx14" >&6; }
+ if test x$ax_cv_cxx_compile_cxx14 = xyes; then
+ ac_success=yes
+ fi
+
+
+
+ if test x$ac_success = xno; then
+ for switch in -std=c++14 -std=c++0x +std=c++14 "-h std=c++14"; do
+ cachevar=`$as_echo "ax_cv_cxx_compile_cxx14_$switch" | $as_tr_sh`
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX supports C++14 features with $switch" >&5
+$as_echo_n "checking whether $CXX supports C++14 features with $switch... " >&6; }
+if eval \${$cachevar+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_save_CXX="$CXX"
+ CXX="$CXX $switch"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+ namespace test_static_assert
+ {
+
+ template <typename T>
+ struct check
+ {
+ static_assert(sizeof(int) <= sizeof(T), "not big enough");
+ };
+
+ }
+
+ namespace test_final_override
+ {
+
+ struct Base
+ {
+ virtual void f() {}
+ };
+
+ struct Derived : public Base
+ {
+ virtual void f() override {}
+ };
+
+ }
+
+ namespace test_double_right_angle_brackets
+ {
+
+ template < typename T >
+ struct check {};
+
+ typedef check<void> single_type;
+ typedef check<check<void>> double_type;
+ typedef check<check<check<void>>> triple_type;
+ typedef check<check<check<check<void>>>> quadruple_type;
+
+ }
+
+ namespace test_decltype
+ {
+
+ int
+ f()
+ {
+ int a = 1;
+ decltype(a) b = 2;
+ return a + b;
+ }
+
+ }
+
+ namespace test_type_deduction
+ {
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static const bool value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static const bool value = true;
+ };
+
+ template < typename T1, typename T2 >
+ auto
+ add(T1 a1, T2 a2) -> decltype(a1 + a2)
+ {
+ return a1 + a2;
+ }
+
+ int
+ test(const int c, volatile int v)
+ {
+ static_assert(is_same<int, decltype(0)>::value == true, "");
+ static_assert(is_same<int, decltype(c)>::value == false, "");
+ static_assert(is_same<int, decltype(v)>::value == false, "");
+ auto ac = c;
+ auto av = v;
+ auto sumi = ac + av + 'x';
+ auto sumf = ac + av + 1.0;
+ static_assert(is_same<int, decltype(ac)>::value == true, "");
+ static_assert(is_same<int, decltype(av)>::value == true, "");
+ static_assert(is_same<int, decltype(sumi)>::value == true, "");
+ static_assert(is_same<int, decltype(sumf)>::value == false, "");
+ static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+ return (sumf > 0.0) ? sumi : add(c, v);
+ }
+
+ }
+
+ namespace test_noexcept
+ {
+
+ int f() { return 0; }
+ int g() noexcept { return 0; }
+
+ static_assert(noexcept(f()) == false, "");
+ static_assert(noexcept(g()) == true, "");
+
+ }
+
+ namespace test_constexpr
+ {
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+ {
+ return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+ }
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c(const CharT *const s) noexcept
+ {
+ return strlen_c_r(s, 0UL);
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("1") == 1UL, "");
+ static_assert(strlen_c("example") == 7UL, "");
+ static_assert(strlen_c("another\0example") == 7UL, "");
+
+ }
+
+ namespace test_rvalue_references
+ {
+
+ template < int N >
+ struct answer
+ {
+ static constexpr int value = N;
+ };
+
+ answer<1> f(int&) { return answer<1>(); }
+ answer<2> f(const int&) { return answer<2>(); }
+ answer<3> f(int&&) { return answer<3>(); }
+
+ void
+ test()
+ {
+ int i = 0;
+ const int c = 0;
+ static_assert(decltype(f(i))::value == 1, "");
+ static_assert(decltype(f(c))::value == 2, "");
+ static_assert(decltype(f(0))::value == 3, "");
+ }
+
+ }
+
+ namespace test_uniform_initialization
+ {
+
+ struct test
+ {
+ static const int zero {};
+ static const int one {1};
+ };
+
+ static_assert(test::zero == 0, "");
+ static_assert(test::one == 1, "");
+
+ }
+
+ namespace test_lambdas
+ {
+
+ void
+ test1()
+ {
+ auto lambda1 = [](){};
+ auto lambda2 = lambda1;
+ lambda1();
+ lambda2();
+ }
+
+ int
+ test2()
+ {
+ auto a = [](int i, int j){ return i + j; }(1, 2);
+ auto b = []() -> int { return '0'; }();
+ auto c = [=](){ return a + b; }();
+ auto d = [&](){ return c; }();
+ auto e = [a, &b](int x) mutable {
+ const auto identity = [](int y){ return y; };
+ for (auto i = 0; i < a; ++i)
+ a += b--;
+ return x + identity(a + b);
+ }(0);
+ return a + b + c + d + e;
+ }
+
+ int
+ test3()
+ {
+ const auto nullary = [](){ return 0; };
+ const auto unary = [](int x){ return x; };
+ using nullary_t = decltype(nullary);
+ using unary_t = decltype(unary);
+ const auto higher1st = [](nullary_t f){ return f(); };
+ const auto higher2nd = [unary](nullary_t f1){
+ return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+ };
+ return higher1st(nullary) + higher2nd(nullary)(unary);
+ }
+
+ }
+
+ namespace test_variadic_templates
+ {
+
+ template <int...>
+ struct sum;
+
+ template <int N0, int... N1toN>
+ struct sum<N0, N1toN...>
+ {
+ static constexpr auto value = N0 + sum<N1toN...>::value;
+ };
+
+ template <>
+ struct sum<>
+ {
+ static constexpr auto value = 0;
+ };
+
+ static_assert(sum<>::value == 0, "");
+ static_assert(sum<1>::value == 1, "");
+ static_assert(sum<23>::value == 23, "");
+ static_assert(sum<1, 2>::value == 3, "");
+ static_assert(sum<5, 5, 11>::value == 21, "");
+ static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+ }
+
+ // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+ // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+ // because of this.
+ namespace test_template_alias_sfinae
+ {
+
+ struct foo {};
+
+ template<typename T>
+ using member = typename T::member_type;
+
+ template<typename T>
+ void func(...) {}
+
+ template<typename T>
+ void func(member<T>*) {}
+
+ void test();
+
+ void test() { func<foo>(0); }
+
+ }
+
+} // namespace cxx11
+
+#endif // __cplusplus >= 201103L
+
+
+
+
+// If the compiler admits that it is not ready for C++14, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201402L
+
+#error "This is not a C++14 compiler"
+
+#else
+
+namespace cxx14
+{
+
+ namespace test_polymorphic_lambdas
+ {
+
+ int
+ test()
+ {
+ const auto lambda = [](auto&&... args){
+ const auto istiny = [](auto x){
+ return (sizeof(x) == 1UL) ? 1 : 0;
+ };
+ const int aretiny[] = { istiny(args)... };
+ return aretiny[0];
+ };
+ return lambda(1, 1L, 1.0f, '1');
+ }
+
+ }
+
+ namespace test_binary_literals
+ {
+
+ constexpr auto ivii = 0b0000000000101010;
+ static_assert(ivii == 42, "wrong value");
+
+ }
+
+ namespace test_generalized_constexpr
+ {
+
+ template < typename CharT >
+ constexpr unsigned long
+ strlen_c(const CharT *const s) noexcept
+ {
+ auto length = 0UL;
+ for (auto p = s; *p; ++p)
+ ++length;
+ return length;
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("x") == 1UL, "");
+ static_assert(strlen_c("test") == 4UL, "");
+ static_assert(strlen_c("another\0test") == 7UL, "");
+
+ }
+
+ namespace test_lambda_init_capture
+ {
+
+ int
+ test()
+ {
+ auto x = 0;
+ const auto lambda1 = [a = x](int b){ return a + b; };
+ const auto lambda2 = [a = lambda1(x)](){ return a; };
+ return lambda2();
+ }
+
+ }
+
+ namespace test_digit_seperators
+ {
+
+ constexpr auto ten_million = 100'000'000;
+ static_assert(ten_million == 100000000, "");
+
+ }
+
+ namespace test_return_type_deduction
+ {
+
+ auto f(int& x) { return x; }
+ decltype(auto) g(int& x) { return x; }
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static constexpr auto value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static constexpr auto value = true;
+ };
+
+ int
+ test()
+ {
+ auto x = 0;
+ static_assert(is_same<int, decltype(f(x))>::value, "");
+ static_assert(is_same<int&, decltype(g(x))>::value, "");
+ return x;
+ }
+
+ }
+
+} // namespace cxx14
+
+#endif // __cplusplus >= 201402L
+
+
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ eval $cachevar=yes
+else
+ eval $cachevar=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CXX="$ac_save_CXX"
+fi
+eval ac_res=\$$cachevar
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+ if eval test x\$$cachevar = xyes; then
+ CXX="$CXX $switch"
+ if test -n "$CXXCPP" ; then
+ CXXCPP="$CXXCPP $switch"
+ fi
+ ac_success=yes
+ break
+ fi
+ done
+ fi
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ if test x$ax_cxx_compile_cxx14_required = xtrue; then
+ if test x$ac_success = xno; then
+ as_fn_error $? "*** A compiler with support for C++14 language features is required." "$LINENO" 5
+ fi
+ fi
+ if test x$ac_success = xno; then
+ HAVE_CXX14=0
+ { $as_echo "$as_me:${as_lineno-$LINENO}: No compiler with C++14 support was found" >&5
+$as_echo "$as_me: No compiler with C++14 support was found" >&6;}
+ else
+ HAVE_CXX14=1
+
+$as_echo "#define HAVE_CXX14 1" >>confdefs.h
+
+ fi
+
+
+ if test "x${HAVE_CXX14}" = "x1" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5
+$as_echo_n "checking whether compiler supports -Wall... " >&6; }
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-Wall
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-Wall
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cxxflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -g3" >&5
+$as_echo_n "checking whether compiler supports -g3... " >&6; }
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-g3
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-g3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cxxflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+
+
+ SAVED_LIBS="${LIBS}"
+ T_APPEND_V=-lstdc++
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether libstdc++ linkage is compilable" >&5
+$as_echo_n "checking whether libstdc++ linkage is compilable... " >&6; }
+if ${je_cv_libstdcxx+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <stdlib.h>
+
+int
+main ()
+{
+
+ int *arr = (int *)malloc(sizeof(int) * 42);
+ if (arr == NULL)
+ return 1;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_libstdcxx=yes
+else
+ je_cv_libstdcxx=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_libstdcxx" >&5
+$as_echo "$je_cv_libstdcxx" >&6; }
+
+ if test "x${je_cv_libstdcxx}" = "xno" ; then
+ LIBS="${SAVED_LIBS}"
+ fi
+ else
+ enable_cxx="0"
+ fi
+fi
+
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
@@ -4338,7 +6541,23 @@ _ACEOF
fi
-# The cast to long int works around a bug in the HP C Compiler
+if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
+ T_APPEND_V=-I${srcdir}/include/msvc_compat/C99
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
+fi
+
+if test "x${je_cv_msvc}" = "xyes" ; then
+ LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&5
+$as_echo "Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit" >&6; }
+else
+ # The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
# This bug is HP SR number 8606223364.
@@ -4371,12 +6590,13 @@ cat >>confdefs.h <<_ACEOF
_ACEOF
-if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
- LG_SIZEOF_PTR=3
-elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
- LG_SIZEOF_PTR=2
-else
- as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
+ if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+ LG_SIZEOF_PTR=3
+ elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+ LG_SIZEOF_PTR=2
+ else
+ as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
+ fi
fi
cat >>confdefs.h <<_ACEOF
#define LG_SIZEOF_PTR $LG_SIZEOF_PTR
@@ -4477,6 +6697,51 @@ _ACEOF
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long long" >&5
+$as_echo_n "checking size of long long... " >&6; }
+if ${ac_cv_sizeof_long_long+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long long))" "ac_cv_sizeof_long_long" "$ac_includes_default"; then :
+
+else
+ if test "$ac_cv_type_long_long" = yes; then
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (long long)
+See \`config.log' for more details" "$LINENO" 5; }
+ else
+ ac_cv_sizeof_long_long=0
+ fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_long_long" >&5
+$as_echo "$ac_cv_sizeof_long_long" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_LONG_LONG $ac_cv_sizeof_long_long
+_ACEOF
+
+
+if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
+ LG_SIZEOF_LONG_LONG=3
+elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
+ LG_SIZEOF_LONG_LONG=2
+else
+ as_fn_error $? "Unsupported long long size: ${ac_cv_sizeof_long_long}" "$LINENO" 5
+fi
+cat >>confdefs.h <<_ACEOF
+#define LG_SIZEOF_LONG_LONG $LG_SIZEOF_LONG_LONG
+_ACEOF
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of intmax_t" >&5
$as_echo_n "checking size of intmax_t... " >&6; }
if ${ac_cv_sizeof_intmax_t+:} false; then :
@@ -4520,35 +6785,6 @@ cat >>confdefs.h <<_ACEOF
_ACEOF
-ac_aux_dir=
-for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
- if test -f "$ac_dir/install-sh"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install-sh -c"
- break
- elif test -f "$ac_dir/install.sh"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/install.sh -c"
- break
- elif test -f "$ac_dir/shtool"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/shtool install -c"
- break
- fi
-done
-if test -z "$ac_aux_dir"; then
- as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
-fi
-
-# These three variables are undocumented and unsupported,
-# and are intended to be withdrawn in a future Autoconf release.
-# They can cause serious problems if a builder's source tree is in a directory
-# whose full name contains unusual characters.
-ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var.
-ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var.
-ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var.
-
-
# Make sure we can run config.sub.
$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
@@ -4622,9 +6858,49 @@ case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
CPU_SPINWAIT=""
case "${host_cpu}" in
- i[345]86)
- ;;
i686|x86_64)
+ HAVE_CPU_SPINWAIT=1
+ if test "x${je_cv_msvc}" = "xyes" ; then
+ if ${je_cv_pause_msvc+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction MSVC is compilable" >&5
+$as_echo_n "checking whether pause instruction MSVC is compilable... " >&6; }
+if ${je_cv_pause_msvc+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+_mm_pause(); return 0;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_pause_msvc=yes
+else
+ je_cv_pause_msvc=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause_msvc" >&5
+$as_echo "$je_cv_pause_msvc" >&6; }
+
+fi
+
+ if test "x${je_cv_pause_msvc}" = "xyes" ; then
+ CPU_SPINWAIT='_mm_pause()'
+ fi
+ else
+ if ${je_cv_pause+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pause instruction is compilable" >&5
$as_echo_n "checking whether pause instruction is compilable... " >&6; }
@@ -4653,57 +6929,149 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pause" >&5
$as_echo "$je_cv_pause" >&6; }
- if test "x${je_cv_pause}" = "xyes" ; then
- CPU_SPINWAIT='__asm__ volatile("pause")'
+fi
+
+ if test "x${je_cv_pause}" = "xyes" ; then
+ CPU_SPINWAIT='__asm__ volatile("pause")'
+ fi
fi
+ ;;
+ *)
+ HAVE_CPU_SPINWAIT=0
+ ;;
+esac
+cat >>confdefs.h <<_ACEOF
+#define HAVE_CPU_SPINWAIT $HAVE_CPU_SPINWAIT
+_ACEOF
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether SSE2 intrinsics is compilable" >&5
-$as_echo_n "checking whether SSE2 intrinsics is compilable... " >&6; }
-if ${je_cv_sse2+:} false; then :
+cat >>confdefs.h <<_ACEOF
+#define CPU_SPINWAIT $CPU_SPINWAIT
+_ACEOF
+
+
+
+# Check whether --with-lg_vaddr was given.
+if test "${with_lg_vaddr+set}" = set; then :
+ withval=$with_lg_vaddr; LG_VADDR="$with_lg_vaddr"
+else
+ LG_VADDR="detect"
+fi
+
+
+case "${host_cpu}" in
+ aarch64)
+ if test "x$LG_VADDR" = "xdetect"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking number of significant virtual address bits" >&5
+$as_echo_n "checking number of significant virtual address bits... " >&6; }
+ if test "x${LG_SIZEOF_PTR}" = "x2" ; then
+ #aarch64 ILP32
+ LG_VADDR=32
+ else
+ #aarch64 LP64
+ LG_VADDR=48
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LG_VADDR" >&5
+$as_echo "$LG_VADDR" >&6; }
+ fi
+ ;;
+ x86_64)
+ if test "x$LG_VADDR" = "xdetect"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking number of significant virtual address bits" >&5
+$as_echo_n "checking number of significant virtual address bits... " >&6; }
+if ${je_cv_lg_vaddr+:} false; then :
$as_echo_n "(cached) " >&6
else
+ if test "$cross_compiling" = yes; then :
+ je_cv_lg_vaddr=57
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <emmintrin.h>
+#include <stdio.h>
+#ifdef _WIN32
+#include <limits.h>
+#include <intrin.h>
+typedef unsigned __int32 uint32_t;
+#else
+#include <stdint.h>
+#endif
int
main ()
{
+ uint32_t r[4];
+ uint32_t eax_in = 0x80000008U;
+#ifdef _WIN32
+ __cpuid((int *)r, (int)eax_in);
+#else
+ asm volatile ("cpuid"
+ : "=a" (r[0]), "=b" (r[1]), "=c" (r[2]), "=d" (r[3])
+ : "a" (eax_in), "c" (0)
+ );
+#endif
+ uint32_t eax_out = r[0];
+ uint32_t vaddr = ((eax_out & 0x0000ff00U) >> 8);
+ FILE *f = fopen("conftest.out", "w");
+ if (f == NULL) {
+ return 1;
+ }
+ if (vaddr > (sizeof(void *) << 3)) {
+ vaddr = sizeof(void *) << 3;
+ }
+ fprintf(f, "%u", vaddr);
+ fclose(f);
+ return 0;
+
;
return 0;
}
_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- je_cv_sse2=yes
+if ac_fn_c_try_run "$LINENO"; then :
+ je_cv_lg_vaddr=`cat conftest.out`
else
- je_cv_sse2=no
+ je_cv_lg_vaddr=error
fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sse2" >&5
-$as_echo "$je_cv_sse2" >&6; }
- if test "x${je_cv_sse2}" = "xyes" ; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_SSE2
-_ACEOF
-
- fi
- ;;
- powerpc)
- cat >>confdefs.h <<_ACEOF
-#define HAVE_ALTIVEC
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_lg_vaddr" >&5
+$as_echo "$je_cv_lg_vaddr" >&6; }
+ if test "x${je_cv_lg_vaddr}" != "x" ; then
+ LG_VADDR="${je_cv_lg_vaddr}"
+ fi
+ if test "x${LG_VADDR}" != "xerror" ; then
+ cat >>confdefs.h <<_ACEOF
+#define LG_VADDR $LG_VADDR
_ACEOF
- ;;
+ else
+ as_fn_error $? "cannot determine number of significant virtual address bits" "$LINENO" 5
+ fi
+ fi
+ ;;
*)
- ;;
+ if test "x$LG_VADDR" = "xdetect"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking number of significant virtual address bits" >&5
+$as_echo_n "checking number of significant virtual address bits... " >&6; }
+ if test "x${LG_SIZEOF_PTR}" = "x3" ; then
+ LG_VADDR=64
+ elif test "x${LG_SIZEOF_PTR}" = "x2" ; then
+ LG_VADDR=32
+ elif test "x${LG_SIZEOF_PTR}" = "xLG_SIZEOF_PTR_WIN" ; then
+ LG_VADDR="(1U << (LG_SIZEOF_PTR_WIN+3))"
+ else
+ as_fn_error $? "Unsupported lg(pointer size): ${LG_SIZEOF_PTR}" "$LINENO" 5
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LG_VADDR" >&5
+$as_echo "$LG_VADDR" >&6; }
+ fi
+ ;;
esac
cat >>confdefs.h <<_ACEOF
-#define CPU_SPINWAIT $CPU_SPINWAIT
+#define LG_VADDR $LG_VADDR
_ACEOF
@@ -4714,17 +7082,27 @@ o="$ac_objext"
a="a"
exe="$ac_exeext"
libprefix="lib"
+link_whole_archive="0"
DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
RPATH='-Wl,-rpath,$(1)'
SOREV="${so}.${rev}"
PIC_CFLAGS='-fPIC -DPIC'
CTARGET='-o $@'
LDTARGET='-o $@'
+TEST_LD_MODE=
EXTRA_LDFLAGS=
ARFLAGS='crus'
AROUT=' $@'
CC_MM=1
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+ TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+ CC_MM=
+fi
+
@@ -4821,42 +7199,245 @@ else
fi
-default_munmap="1"
-JEMALLOC_USABLE_SIZE_CONST="const"
+
+
+
+if test -n "$ac_tool_prefix"; then
+ # Extract the first word of "${ac_tool_prefix}nm", so it can be a program name with args.
+set dummy ${ac_tool_prefix}nm; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_NM+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$NM"; then
+ ac_cv_prog_NM="$NM" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_NM="${ac_tool_prefix}nm"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+NM=$ac_cv_prog_NM
+if test -n "$NM"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NM" >&5
+$as_echo "$NM" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_NM"; then
+ ac_ct_NM=$NM
+ # Extract the first word of "nm", so it can be a program name with args.
+set dummy nm; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_ac_ct_NM+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$ac_ct_NM"; then
+ ac_cv_prog_ac_ct_NM="$ac_ct_NM" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_ac_ct_NM="nm"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_NM=$ac_cv_prog_ac_ct_NM
+if test -n "$ac_ct_NM"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NM" >&5
+$as_echo "$ac_ct_NM" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+ if test "x$ac_ct_NM" = x; then
+ NM=":"
+ else
+ case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+ NM=$ac_ct_NM
+ fi
+else
+ NM="$ac_cv_prog_NM"
+fi
+
+
+for ac_prog in gawk mawk nawk awk
+do
+ # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_prog_AWK+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ if test -n "$AWK"; then
+ ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+ IFS=$as_save_IFS
+ test -z "$as_dir" && as_dir=.
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+ ac_cv_prog_AWK="$ac_prog"
+ $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+ break 2
+ fi
+done
+ done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+$as_echo "$AWK" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+ test -n "$AWK" && break
+done
+
+
+default_retain="0"
+maps_coalesce="1"
+DUMP_SYMS="${NM} -a"
+SYM_PREFIX=""
case "${host}" in
- *-*-darwin*)
- CFLAGS="$CFLAGS"
+ *-*-darwin* | *-*-ios*)
abi="macho"
- $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
-
RPATH=""
LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
so="dylib"
importlib="${so}"
force_tls="0"
- DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+ DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
SOREV="${rev}.${so}"
sbrk_deprecated="1"
+ SYM_PREFIX="_"
;;
*-*-freebsd*)
- CFLAGS="$CFLAGS"
abi="elf"
- $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+ $as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT " >>confdefs.h
force_lazy_lock="1"
;;
+ *-*-dragonfly*)
+ abi="elf"
+ ;;
+ *-*-openbsd*)
+ abi="elf"
+ force_tls="0"
+ ;;
+ *-*-bitrig*)
+ abi="elf"
+ ;;
+ *-*-linux-android)
+ T_APPEND_V=-D_GNU_SOURCE
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
+ abi="elf"
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h
+
+ $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_C11_ATOMICS 1" >>confdefs.h
+
+ force_tls="0"
+ if test "${LG_SIZEOF_PTR}" = "3"; then
+ default_retain="1"
+ fi
+ ;;
*-*-linux*)
- CFLAGS="$CFLAGS"
- CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+ T_APPEND_V=-D_GNU_SOURCE
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
abi="elf"
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS " >>confdefs.h
+
$as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h
- $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h
+ $as_echo "#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY " >>confdefs.h
$as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h
- JEMALLOC_USABLE_SIZE_CONST=""
- default_munmap="0"
+ $as_echo "#define JEMALLOC_USE_CXX_THROW " >>confdefs.h
+
+ if test "${LG_SIZEOF_PTR}" = "3"; then
+ default_retain="1"
+ fi
+ ;;
+ *-*-kfreebsd*)
+ T_APPEND_V=-D_GNU_SOURCE
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
+ abi="elf"
+ $as_echo "#define JEMALLOC_HAS_ALLOCA_H 1" >>confdefs.h
+
+ $as_echo "#define JEMALLOC_SYSCTL_VM_OVERCOMMIT " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_THREADED_INIT " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_USE_CXX_THROW " >>confdefs.h
+
;;
*-*-netbsd*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking ABI" >&5
@@ -4878,36 +7459,46 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- CFLAGS="$CFLAGS"; abi="elf"
+ abi="elf"
else
abi="aout"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5
$as_echo "$abi" >&6; }
- $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
-
;;
*-*-solaris2*)
- CFLAGS="$CFLAGS"
abi="elf"
- $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
-
RPATH='-Wl,-R,$(1)'
- CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
- LIBS="$LIBS -lposix4 -lsocket -lnsl"
+ T_APPEND_V=-D_POSIX_PTHREAD_SEMANTICS
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
+else
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
+fi
+
+
+ T_APPEND_V=-lposix4 -lsocket -lnsl
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
;;
*-ibm-aix*)
- if "$LG_SIZEOF_PTR" = "8"; then
+ if test "${LG_SIZEOF_PTR}" = "3"; then
LD_PRELOAD_VAR="LDR_PRELOAD64"
else
LD_PRELOAD_VAR="LDR_PRELOAD"
fi
abi="xcoff"
;;
- *-*-mingw*)
+ *-*-mingw* | *-*-cygwin*)
abi="pecoff"
force_tls="0"
+ maps_coalesce="0"
RPATH=""
so="dll"
if test "x$je_cv_msvc" = "xyes" ; then
@@ -4923,7 +7514,15 @@ $as_echo "$abi" >&6; }
else
importlib="${so}"
DSO_LDFLAGS="-shared"
+ link_whole_archive="1"
fi
+ case "${host}" in
+ *-*-cygwin*)
+ DUMP_SYMS="dumpbin /SYMBOLS"
+ ;;
+ *)
+ ;;
+ esac
a="lib"
libprefix=""
SOREV="${so}"
@@ -4935,6 +7534,50 @@ $as_echo "Unsupported operating system: ${host}" >&6; }
abi="elf"
;;
esac
+
+JEMALLOC_USABLE_SIZE_CONST=const
+for ac_header in malloc.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "malloc.h" "ac_cv_header_malloc_h" "$ac_includes_default"
+if test "x$ac_cv_header_malloc_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_MALLOC_H 1
+_ACEOF
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether malloc_usable_size definition can use const argument" >&5
+$as_echo_n "checking whether malloc_usable_size definition can use const argument... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <malloc.h>
+ #include <stddef.h>
+ size_t malloc_usable_size(const void *ptr);
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ JEMALLOC_USABLE_SIZE_CONST=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+
+done
+
cat >>confdefs.h <<_ACEOF
#define JEMALLOC_USABLE_SIZE_CONST $JEMALLOC_USABLE_SIZE_CONST
_ACEOF
@@ -4960,6 +7603,74 @@ _ACEOF
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing log" >&5
+$as_echo_n "checking for library containing log... " >&6; }
+if ${ac_cv_search_log+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char log ();
+int
+main ()
+{
+return log ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' m; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_log=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_log+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_log+:} false; then :
+
+else
+ ac_cv_search_log=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_log" >&5
+$as_echo "$ac_cv_search_log" >&6; }
+ac_res=$ac_cv_search_log
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ as_fn_error $? "Missing math functions" "$LINENO" 5
+fi
+
+if test "x$ac_cv_search_log" != "xnone required" ; then
+ LM="$ac_cv_search_log"
+else
+ LM=
+fi
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5
$as_echo_n "checking whether __attribute__ syntax is compilable... " >&6; }
if ${je_cv_attribute+:} false; then :
@@ -4994,12 +7705,21 @@ if test "x${je_cv_attribute}" = "xyes" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -fvisibility=hidden" >&5
$as_echo_n "checking whether compiler supports -fvisibility=hidden... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-fvisibility=hidden"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-fvisibility=hidden
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -fvisibility=hidden"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5015,30 +7735,109 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-fvisibility=hidden
+ je_cv_cflags_added=-fvisibility=hidden
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -fvisibility=hidden" >&5
+$as_echo_n "checking whether compiler supports -fvisibility=hidden... " >&6; }
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-fvisibility=hidden
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-fvisibility=hidden
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cxxflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
fi
fi
-SAVED_CFLAGS="${CFLAGS}"
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
$as_echo_n "checking whether compiler supports -Werror... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-Werror"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
else
- CFLAGS="${CFLAGS} -Werror"
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5054,17 +7853,74 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-Werror
+ je_cv_cflags_added=-Werror
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-herror_on_warning
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-herror_on_warning
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5
@@ -5079,7 +7935,7 @@ int
main ()
{
static __thread int
- __attribute__((tls_model("initial-exec"))) foo;
+ __attribute__((tls_model("initial-exec"), unused)) foo;
foo = 0;
;
return 0;
@@ -5096,12 +7952,445 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_tls_model" >&5
$as_echo "$je_cv_tls_model" >&6; }
-CFLAGS="${SAVED_CFLAGS}"
-if test "x${je_cv_tls_model}" = "xyes" ; then
- $as_echo "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))" >>confdefs.h
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- $as_echo "#define JEMALLOC_TLS_MODEL " >>confdefs.h
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-herror_on_warning
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-herror_on_warning
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether alloc_size attribute is compilable" >&5
+$as_echo_n "checking whether alloc_size attribute is compilable... " >&6; }
+if ${je_cv_alloc_size+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(size_t size) __attribute__((alloc_size(1)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_alloc_size=yes
+else
+ je_cv_alloc_size=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_alloc_size" >&5
+$as_echo "$je_cv_alloc_size" >&6; }
+
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+if test "x${je_cv_alloc_size}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE " >>confdefs.h
+
+fi
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-herror_on_warning
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-herror_on_warning
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(gnu_printf, ...) attribute is compilable" >&5
+$as_echo_n "checking whether format(gnu_printf, ...) attribute is compilable... " >&6; }
+if ${je_cv_format_gnu_printf+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_format_gnu_printf=yes
+else
+ je_cv_format_gnu_printf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_format_gnu_printf" >&5
+$as_echo "$je_cv_format_gnu_printf" >&6; }
+
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF " >>confdefs.h
+
+fi
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-herror_on_warning
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-herror_on_warning
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether format(printf, ...) attribute is compilable" >&5
+$as_echo_n "checking whether format(printf, ...) attribute is compilable... " >&6; }
+if ${je_cv_format_printf+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+int
+main ()
+{
+void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_format_printf=yes
+else
+ je_cv_format_printf=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_format_printf" >&5
+$as_echo "$je_cv_format_printf" >&6; }
+
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+if test "x${je_cv_format_printf}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF " >>confdefs.h
fi
@@ -5403,184 +8692,132 @@ fi
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
-ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign"
-if test "x$ac_cv_func_memalign" = xyes; then :
- $as_echo "#define JEMALLOC_OVERRIDE_MEMALIGN " >>confdefs.h
-
- public_syms="${public_syms} memalign"
+# Check whether --with-mangling was given.
+if test "${with_mangling+set}" = set; then :
+ withval=$with_mangling; mangling_map="$with_mangling"
+else
+ mangling_map=""
fi
-ac_fn_c_check_func "$LINENO" "valloc" "ac_cv_func_valloc"
-if test "x$ac_cv_func_valloc" = xyes; then :
- $as_echo "#define JEMALLOC_OVERRIDE_VALLOC " >>confdefs.h
-
- public_syms="${public_syms} valloc"
-fi
-# Check whether --enable-experimental was given.
-if test "${enable_experimental+set}" = set; then :
- enableval=$enable_experimental; if test "x$enable_experimental" = "xno" ; then
- enable_experimental="0"
+# Check whether --with-jemalloc_prefix was given.
+if test "${with_jemalloc_prefix+set}" = set; then :
+ withval=$with_jemalloc_prefix; JEMALLOC_PREFIX="$with_jemalloc_prefix"
else
- enable_experimental="1"
-fi
-
+ if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then
+ JEMALLOC_PREFIX=""
else
- enable_experimental="1"
-
+ JEMALLOC_PREFIX="je_"
fi
-if test "x$enable_experimental" = "x1" ; then
- $as_echo "#define JEMALLOC_EXPERIMENTAL " >>confdefs.h
-
- public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
fi
+if test "x$JEMALLOC_PREFIX" = "x" ; then
+ $as_echo "#define JEMALLOC_IS_MALLOC 1" >>confdefs.h
-GCOV_FLAGS=
-# Check whether --enable-code-coverage was given.
-if test "${enable_code_coverage+set}" = set; then :
- enableval=$enable_code_coverage; if test "x$enable_code_coverage" = "xno" ; then
- enable_code_coverage="0"
else
- enable_code_coverage="1"
-fi
+ JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
+ cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_PREFIX "$JEMALLOC_PREFIX"
+_ACEOF
-else
- enable_code_coverage="0"
+ cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_CPREFIX "$JEMALLOC_CPREFIX"
+_ACEOF
fi
-if test "x$enable_code_coverage" = "x1" ; then
- deoptimize="no"
- echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes"
- if test "x${deoptimize}" = "xyes" ; then
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O0" >&5
-$as_echo_n "checking whether compiler supports -O0... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-O0"
-else
- CFLAGS="${CFLAGS} -O0"
-fi
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-int
-main ()
-{
-
- return 0;
+# Check whether --with-export was given.
+if test "${with_export+set}" = set; then :
+ withval=$with_export; if test "x$with_export" = "xno"; then
+ $as_echo "#define JEMALLOC_EXPORT /**/" >>confdefs.h
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-O0
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
- je_cv_cflags_appended=
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+fi
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -fprofile-arcs -ftest-coverage" >&5
-$as_echo_n "checking whether compiler supports -fprofile-arcs -ftest-coverage... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-fprofile-arcs -ftest-coverage"
-else
- CFLAGS="${CFLAGS} -fprofile-arcs -ftest-coverage"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign"
+if test "x$ac_cv_func_memalign" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE_MEMALIGN " >>confdefs.h
+
+ public_syms="${public_syms} memalign"
fi
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ac_fn_c_check_func "$LINENO" "valloc" "ac_cv_func_valloc"
+if test "x$ac_cv_func_valloc" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE_VALLOC " >>confdefs.h
-int
-main ()
-{
+ public_syms="${public_syms} valloc"
+fi
- return 0;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-fprofile-arcs -ftest-coverage
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
- je_cv_cflags_appended=
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+wrap_syms=
+if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ ac_fn_c_check_func "$LINENO" "__libc_calloc" "ac_cv_func___libc_calloc"
+if test "x$ac_cv_func___libc_calloc" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_CALLOC " >>confdefs.h
+ wrap_syms="${wrap_syms} __libc_calloc"
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage"
- $as_echo "#define JEMALLOC_CODE_COVERAGE " >>confdefs.h
+ ac_fn_c_check_func "$LINENO" "__libc_free" "ac_cv_func___libc_free"
+if test "x$ac_cv_func___libc_free" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_FREE " >>confdefs.h
+ wrap_syms="${wrap_syms} __libc_free"
fi
+ ac_fn_c_check_func "$LINENO" "__libc_malloc" "ac_cv_func___libc_malloc"
+if test "x$ac_cv_func___libc_malloc" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_MALLOC " >>confdefs.h
-
-# Check whether --with-mangling was given.
-if test "${with_mangling+set}" = set; then :
- withval=$with_mangling; mangling_map="$with_mangling"
-else
- mangling_map=""
+ wrap_syms="${wrap_syms} __libc_malloc"
fi
+ ac_fn_c_check_func "$LINENO" "__libc_memalign" "ac_cv_func___libc_memalign"
+if test "x$ac_cv_func___libc_memalign" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN " >>confdefs.h
-
-# Check whether --with-jemalloc_prefix was given.
-if test "${with_jemalloc_prefix+set}" = set; then :
- withval=$with_jemalloc_prefix; JEMALLOC_PREFIX="$with_jemalloc_prefix"
-else
- if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then
- JEMALLOC_PREFIX=""
-else
- JEMALLOC_PREFIX="je_"
+ wrap_syms="${wrap_syms} __libc_memalign"
fi
-fi
+ ac_fn_c_check_func "$LINENO" "__libc_realloc" "ac_cv_func___libc_realloc"
+if test "x$ac_cv_func___libc_realloc" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_REALLOC " >>confdefs.h
-if test "x$JEMALLOC_PREFIX" != "x" ; then
- JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
- cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_PREFIX "$JEMALLOC_PREFIX"
-_ACEOF
+ wrap_syms="${wrap_syms} __libc_realloc"
+fi
- cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_CPREFIX "$JEMALLOC_CPREFIX"
-_ACEOF
+ ac_fn_c_check_func "$LINENO" "__libc_valloc" "ac_cv_func___libc_valloc"
+if test "x$ac_cv_func___libc_valloc" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___LIBC_VALLOC " >>confdefs.h
+ wrap_syms="${wrap_syms} __libc_valloc"
fi
+ ac_fn_c_check_func "$LINENO" "__posix_memalign" "ac_cv_func___posix_memalign"
+if test "x$ac_cv_func___posix_memalign" = xyes; then :
+ $as_echo "#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN " >>confdefs.h
-# Check whether --with-export was given.
-if test "${with_export+set}" = set; then :
- withval=$with_export; if test "x$with_export" = "xno"; then
- $as_echo "#define JEMALLOC_EXPORT /**/" >>confdefs.h
-
+ wrap_syms="${wrap_syms} __posix_memalign"
fi
fi
+case "${host}" in
+ *-*-mingw* | *-*-cygwin*)
+ wrap_syms="${wrap_syms} tls_callback"
+ ;;
+ *)
+ ;;
+esac
# Check whether --with-private_namespace was given.
@@ -5610,56 +8847,76 @@ fi
install_suffix="$INSTALL_SUFFIX"
+
+# Check whether --with-malloc_conf was given.
+if test "${with_malloc_conf+set}" = set; then :
+ withval=$with_malloc_conf; JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"
+else
+ JEMALLOC_CONFIG_MALLOC_CONF=""
+
+fi
+
+config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
+cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_CONFIG_MALLOC_CONF "$config_malloc_conf"
+_ACEOF
+
+
je_="je_"
-cfgoutputs_in="${srcroot}Makefile.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in"
+cfgoutputs_in="Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
+cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_preamble.h.in"
+cfgoutputs_in="${cfgoutputs_in} test/test.sh.in"
+cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in"
cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} jemalloc.pc"
cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h"
-cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_preamble.h"
cfgoutputs_out="${cfgoutputs_out} test/test.sh"
cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h"
cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} jemalloc.pc:jemalloc.pc.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_preamble.h"
cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in"
cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in"
-cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh"
+cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
cfghdrs_out="include/jemalloc/jemalloc_defs.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols.awk"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols_jet.awk"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h"
@@ -5672,26 +8929,8 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h"
cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h"
cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in"
-
-# Check whether --enable-cc-silence was given.
-if test "${enable_cc_silence+set}" = set; then :
- enableval=$enable_cc_silence; if test "x$enable_cc_silence" = "xno" ; then
- enable_cc_silence="0"
-else
- enable_cc_silence="1"
-fi
-
-else
- enable_cc_silence="0"
-
-fi
-
-if test "x$enable_cc_silence" = "x1" ; then
- $as_echo "#define JEMALLOC_CC_SILENCE " >>confdefs.h
-
-fi
+cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
# Check whether --enable-debug was given.
if test "${enable_debug+set}" = set; then :
@@ -5709,42 +8948,90 @@ fi
if test "x$enable_debug" = "x1" ; then
$as_echo "#define JEMALLOC_DEBUG " >>confdefs.h
- enable_ivsalloc="1"
fi
+if test "x$enable_debug" = "x1" ; then
+ $as_echo "#define JEMALLOC_DEBUG " >>confdefs.h
+
+fi
+
+if test "x$enable_debug" = "x0" ; then
+ if test "x$GCC" = "xyes" ; then
-# Check whether --enable-ivsalloc was given.
-if test "${enable_ivsalloc+set}" = set; then :
- enableval=$enable_ivsalloc; if test "x$enable_ivsalloc" = "xno" ; then
- enable_ivsalloc="0"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O3" >&5
+$as_echo_n "checking whether compiler supports -O3... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-O3
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- enable_ivsalloc="1"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
-else
- enable_ivsalloc="0"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
-if test "x$enable_ivsalloc" = "x1" ; then
- $as_echo "#define JEMALLOC_IVSALLOC " >>confdefs.h
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-O3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
-if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
- optimize="no"
- echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || optimize="yes"
- if test "x${optimize}" = "xyes" ; then
- if test "x$GCC" = "xyes" ; then
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O3" >&5
$as_echo_n "checking whether compiler supports -O3... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-O3"
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-O3
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -O3"
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5759,28 +9046,49 @@ main ()
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-O3
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-O3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cxxflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -funroll-loops" >&5
$as_echo_n "checking whether compiler supports -funroll-loops... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-funroll-loops"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-funroll-loops
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
else
- CFLAGS="${CFLAGS} -funroll-loops"
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5796,28 +9104,43 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-funroll-loops
+ je_cv_cflags_added=-funroll-loops
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
- elif test "x$je_cv_msvc" = "xyes" ; then
+ elif test "x$je_cv_msvc" = "xyes" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O2" >&5
$as_echo_n "checking whether compiler supports -O2... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-O2"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-O2
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -O2"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5833,28 +9156,106 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-O2
+ je_cv_cflags_added=-O2
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
- else
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O2" >&5
+$as_echo_n "checking whether compiler supports -O2... " >&6; }
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-O2
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-O2
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cxxflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+
+ else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O" >&5
$as_echo_n "checking whether compiler supports -O... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-O"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-O
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -O"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -5870,19 +9271,87 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-O
+ je_cv_cflags_added=-O
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O" >&5
+$as_echo_n "checking whether compiler supports -O... " >&6; }
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+T_APPEND_V=-O
+ if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+ je_cv_cxxflags_added=-O
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cxxflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test "x${CONFIGURE_CXXFLAGS}" = "x" -o "x${SPECIFIED_CXXFLAGS}" = "x" ; then
+ CXXFLAGS="${CONFIGURE_CXXFLAGS}${SPECIFIED_CXXFLAGS}"
+else
+ CXXFLAGS="${CONFIGURE_CXXFLAGS} ${SPECIFIED_CXXFLAGS}"
+fi
+
- fi
fi
fi
@@ -5969,9 +9438,9 @@ fi
done
if test "x$LUNWIND" = "x-lunwind" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lunwind" >&5
-$as_echo_n "checking for backtrace in -lunwind... " >&6; }
-if ${ac_cv_lib_unwind_backtrace+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for unw_backtrace in -lunwind" >&5
+$as_echo_n "checking for unw_backtrace in -lunwind... " >&6; }
+if ${ac_cv_lib_unwind_unw_backtrace+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
@@ -5985,34 +9454,48 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
#ifdef __cplusplus
extern "C"
#endif
-char backtrace ();
+char unw_backtrace ();
int
main ()
{
-return backtrace ();
+return unw_backtrace ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_unwind_backtrace=yes
+ ac_cv_lib_unwind_unw_backtrace=yes
else
- ac_cv_lib_unwind_backtrace=no
+ ac_cv_lib_unwind_unw_backtrace=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_backtrace" >&5
-$as_echo "$ac_cv_lib_unwind_backtrace" >&6; }
-if test "x$ac_cv_lib_unwind_backtrace" = xyes; then :
- LIBS="$LIBS $LUNWIND"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_unw_backtrace" >&5
+$as_echo "$ac_cv_lib_unwind_unw_backtrace" >&6; }
+if test "x$ac_cv_lib_unwind_unw_backtrace" = xyes; then :
+ T_APPEND_V=$LUNWIND
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
else
enable_prof_libunwind="0"
fi
else
- LIBS="$LIBS $LUNWIND"
+ T_APPEND_V=$LUNWIND
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
fi
if test "x${enable_prof_libunwind}" = "x1" ; then
backtrace_method="libunwind"
@@ -6050,7 +9533,8 @@ fi
done
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _Unwind_Backtrace in -lgcc" >&5
+ if test "x${enable_prof_libgcc}" = "x1" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _Unwind_Backtrace in -lgcc" >&5
$as_echo_n "checking for _Unwind_Backtrace in -lgcc... " >&6; }
if ${ac_cv_lib_gcc__Unwind_Backtrace+:} false; then :
$as_echo_n "(cached) " >&6
@@ -6087,11 +9571,19 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gcc__Unwind_Backtrace" >&5
$as_echo "$ac_cv_lib_gcc__Unwind_Backtrace" >&6; }
if test "x$ac_cv_lib_gcc__Unwind_Backtrace" = xyes; then :
- LIBS="$LIBS -lgcc"
+ T_APPEND_V=-lgcc
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
else
enable_prof_libgcc="0"
fi
+ fi
if test "x${enable_prof_libgcc}" = "x1" ; then
backtrace_method="libgcc"
$as_echo "#define JEMALLOC_PROF_LIBGCC " >>confdefs.h
@@ -6119,12 +9611,21 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -fno-omit-frame-pointer" >&5
$as_echo_n "checking whether compiler supports -fno-omit-frame-pointer... " >&6; }
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="-fno-omit-frame-pointer"
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-fno-omit-frame-pointer
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- CFLAGS="${CFLAGS} -fno-omit-frame-pointer"
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -6140,17 +9641,23 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- je_cv_cflags_appended=-fno-omit-frame-pointer
+ je_cv_cflags_added=-fno-omit-frame-pointer
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
- je_cv_cflags_appended=
+ je_cv_cflags_added=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- CFLAGS="${TCFLAGS}"
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
backtrace_method="gcc intrinsics"
$as_echo "#define JEMALLOC_PROF_GCC " >>confdefs.h
@@ -6168,319 +9675,357 @@ $as_echo_n "checking configured backtracing method... " >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5
$as_echo "$backtrace_method" >&6; }
if test "x$enable_prof" = "x1" ; then
- if test "x${force_tls}" = "x0" ; then
- as_fn_error $? "Heap profiling requires TLS" "$LINENO" 5;
- fi
- force_tls="1"
+ T_APPEND_V=$LM
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
- if test "x$abi" != "xpecoff"; then
- LIBS="$LIBS -lm"
- fi
$as_echo "#define JEMALLOC_PROF " >>confdefs.h
fi
-# Check whether --enable-tcache was given.
-if test "${enable_tcache+set}" = set; then :
- enableval=$enable_tcache; if test "x$enable_tcache" = "xno" ; then
- enable_tcache="0"
+if test "x${maps_coalesce}" = "x1" ; then
+ $as_echo "#define JEMALLOC_MAPS_COALESCE " >>confdefs.h
+
+fi
+
+if test "x$default_retain" = "x1" ; then
+ $as_echo "#define JEMALLOC_RETAIN " >>confdefs.h
+
+fi
+
+have_dss="1"
+ac_fn_c_check_func "$LINENO" "sbrk" "ac_cv_func_sbrk"
+if test "x$ac_cv_func_sbrk" = xyes; then :
+ have_sbrk="1"
else
- enable_tcache="1"
+ have_sbrk="0"
fi
+if test "x$have_sbrk" = "x1" ; then
+ if test "x$sbrk_deprecated" = "x1" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Disabling dss allocation because sbrk is deprecated" >&5
+$as_echo "Disabling dss allocation because sbrk is deprecated" >&6; }
+ have_dss="0"
+ fi
else
- enable_tcache="1"
+ have_dss="0"
+fi
+
+if test "x$have_dss" = "x1" ; then
+ $as_echo "#define JEMALLOC_DSS " >>confdefs.h
fi
-if test "x$enable_tcache" = "x1" ; then
- $as_echo "#define JEMALLOC_TCACHE " >>confdefs.h
+# Check whether --enable-fill was given.
+if test "${enable_fill+set}" = set; then :
+ enableval=$enable_fill; if test "x$enable_fill" = "xno" ; then
+ enable_fill="0"
+else
+ enable_fill="1"
+fi
+
+else
+ enable_fill="1"
fi
+if test "x$enable_fill" = "x1" ; then
+ $as_echo "#define JEMALLOC_FILL " >>confdefs.h
+
+fi
-# Check whether --enable-mremap was given.
-if test "${enable_mremap+set}" = set; then :
- enableval=$enable_mremap; if test "x$enable_mremap" = "xno" ; then
- enable_mremap="0"
+
+# Check whether --enable-utrace was given.
+if test "${enable_utrace+set}" = set; then :
+ enableval=$enable_utrace; if test "x$enable_utrace" = "xno" ; then
+ enable_utrace="0"
else
- enable_mremap="1"
+ enable_utrace="1"
fi
else
- enable_mremap="0"
+ enable_utrace="0"
fi
-if test "x$enable_mremap" = "x1" ; then
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mremap(...MREMAP_FIXED...) is compilable" >&5
-$as_echo_n "checking whether mremap(...MREMAP_FIXED...) is compilable... " >&6; }
-if ${je_cv_mremap_fixed+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether utrace(2) is compilable" >&5
+$as_echo_n "checking whether utrace(2) is compilable... " >&6; }
+if ${je_cv_utrace+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#define _GNU_SOURCE
-#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/ktrace.h>
int
main ()
{
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+ utrace((void *)0, 0);
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_mremap_fixed=yes
+ je_cv_utrace=yes
else
- je_cv_mremap_fixed=no
+ je_cv_utrace=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mremap_fixed" >&5
-$as_echo "$je_cv_mremap_fixed" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_utrace" >&5
+$as_echo "$je_cv_utrace" >&6; }
- if test "x${je_cv_mremap_fixed}" = "xno" ; then
- enable_mremap="0"
- fi
+if test "x${je_cv_utrace}" = "xno" ; then
+ enable_utrace="0"
fi
-if test "x$enable_mremap" = "x1" ; then
- $as_echo "#define JEMALLOC_MREMAP " >>confdefs.h
+if test "x$enable_utrace" = "x1" ; then
+ $as_echo "#define JEMALLOC_UTRACE " >>confdefs.h
fi
-# Check whether --enable-munmap was given.
-if test "${enable_munmap+set}" = set; then :
- enableval=$enable_munmap; if test "x$enable_munmap" = "xno" ; then
- enable_munmap="0"
+# Check whether --enable-xmalloc was given.
+if test "${enable_xmalloc+set}" = set; then :
+ enableval=$enable_xmalloc; if test "x$enable_xmalloc" = "xno" ; then
+ enable_xmalloc="0"
else
- enable_munmap="1"
+ enable_xmalloc="1"
fi
else
- enable_munmap="${default_munmap}"
+ enable_xmalloc="0"
fi
-if test "x$enable_munmap" = "x1" ; then
- $as_echo "#define JEMALLOC_MUNMAP " >>confdefs.h
+if test "x$enable_xmalloc" = "x1" ; then
+ $as_echo "#define JEMALLOC_XMALLOC " >>confdefs.h
fi
-# Check whether --enable-dss was given.
-if test "${enable_dss+set}" = set; then :
- enableval=$enable_dss; if test "x$enable_dss" = "xno" ; then
- enable_dss="0"
+# Check whether --enable-cache-oblivious was given.
+if test "${enable_cache_oblivious+set}" = set; then :
+ enableval=$enable_cache_oblivious; if test "x$enable_cache_oblivious" = "xno" ; then
+ enable_cache_oblivious="0"
else
- enable_dss="1"
+ enable_cache_oblivious="1"
fi
else
- enable_dss="0"
+ enable_cache_oblivious="1"
fi
-ac_fn_c_check_func "$LINENO" "sbrk" "ac_cv_func_sbrk"
-if test "x$ac_cv_func_sbrk" = xyes; then :
- have_sbrk="1"
-else
- have_sbrk="0"
+if test "x$enable_cache_oblivious" = "x1" ; then
+ $as_echo "#define JEMALLOC_CACHE_OBLIVIOUS " >>confdefs.h
+
fi
-if test "x$have_sbrk" = "x1" ; then
- if test "x$sbrk_deprecated" == "x1" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Disabling dss allocation because sbrk is deprecated" >&5
-$as_echo "Disabling dss allocation because sbrk is deprecated" >&6; }
- enable_dss="0"
- else
- $as_echo "#define JEMALLOC_HAVE_SBRK " >>confdefs.h
- fi
+# Check whether --enable-log was given.
+if test "${enable_log+set}" = set; then :
+ enableval=$enable_log; if test "x$enable_log" = "xno" ; then
+ enable_log="0"
else
- enable_dss="0"
+ enable_log="1"
fi
-if test "x$enable_dss" = "x1" ; then
- $as_echo "#define JEMALLOC_DSS " >>confdefs.h
+else
+ enable_log="0"
fi
+if test "x$enable_log" = "x1" ; then
+ $as_echo "#define JEMALLOC_LOG " >>confdefs.h
-# Check whether --enable-fill was given.
-if test "${enable_fill+set}" = set; then :
- enableval=$enable_fill; if test "x$enable_fill" = "xno" ; then
- enable_fill="0"
-else
- enable_fill="1"
fi
-else
- enable_fill="1"
-fi
-if test "x$enable_fill" = "x1" ; then
- $as_echo "#define JEMALLOC_FILL " >>confdefs.h
-fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_unreachable is compilable" >&5
+$as_echo_n "checking whether a program using __builtin_unreachable is compilable... " >&6; }
+if ${je_cv_gcc_builtin_unreachable+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+void foo (void) {
+ __builtin_unreachable();
+}
-# Check whether --enable-utrace was given.
-if test "${enable_utrace+set}" = set; then :
- enableval=$enable_utrace; if test "x$enable_utrace" = "xno" ; then
- enable_utrace="0"
+int
+main ()
+{
+
+ {
+ foo();
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_gcc_builtin_unreachable=yes
else
- enable_utrace="1"
+ je_cv_gcc_builtin_unreachable=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_unreachable" >&5
+$as_echo "$je_cv_gcc_builtin_unreachable" >&6; }
+
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable" >>confdefs.h
else
- enable_utrace="0"
+ $as_echo "#define JEMALLOC_INTERNAL_UNREACHABLE abort" >>confdefs.h
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether utrace(2) is compilable" >&5
-$as_echo_n "checking whether utrace(2) is compilable... " >&6; }
-if ${je_cv_utrace+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using __builtin_ffsl is compilable" >&5
+$as_echo_n "checking whether a program using __builtin_ffsl is compilable... " >&6; }
+if ${je_cv_gcc_builtin_ffsl+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/uio.h>
-#include <sys/ktrace.h>
+#include <stdio.h>
+#include <strings.h>
+#include <string.h>
int
main ()
{
- utrace((void *)0, 0);
+ {
+ int rv = __builtin_ffsl(0x08);
+ printf("%d\n", rv);
+ }
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_utrace=yes
+ je_cv_gcc_builtin_ffsl=yes
else
- je_cv_utrace=no
+ je_cv_gcc_builtin_ffsl=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_utrace" >&5
-$as_echo "$je_cv_utrace" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_builtin_ffsl" >&5
+$as_echo "$je_cv_gcc_builtin_ffsl" >&6; }
-if test "x${je_cv_utrace}" = "xno" ; then
- enable_utrace="0"
-fi
-if test "x$enable_utrace" = "x1" ; then
- $as_echo "#define JEMALLOC_UTRACE " >>confdefs.h
-
-fi
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll" >>confdefs.h
+ $as_echo "#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl" >>confdefs.h
-# Check whether --enable-valgrind was given.
-if test "${enable_valgrind+set}" = set; then :
- enableval=$enable_valgrind; if test "x$enable_valgrind" = "xno" ; then
- enable_valgrind="0"
-else
- enable_valgrind="1"
-fi
+ $as_echo "#define JEMALLOC_INTERNAL_FFS __builtin_ffs" >>confdefs.h
else
- enable_valgrind="1"
-
-fi
-
-if test "x$enable_valgrind" = "x1" ; then
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether valgrind is compilable" >&5
-$as_echo_n "checking whether valgrind is compilable... " >&6; }
-if ${je_cv_valgrind+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
+$as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
+if ${je_cv_function_ffsl+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-
-#if !defined(VALGRIND_RESIZEINPLACE_BLOCK)
-# error "Incompatible Valgrind version"
-#endif
+ #include <stdio.h>
+ #include <strings.h>
+ #include <string.h>
int
main ()
{
+ {
+ int rv = ffsl(0x08);
+ printf("%d\n", rv);
+ }
+
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_valgrind=yes
+ je_cv_function_ffsl=yes
else
- je_cv_valgrind=no
+ je_cv_function_ffsl=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_valgrind" >&5
-$as_echo "$je_cv_valgrind" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
+$as_echo "$je_cv_function_ffsl" >&6; }
- if test "x${je_cv_valgrind}" = "xno" ; then
- enable_valgrind="0"
- fi
- if test "x$enable_valgrind" = "x1" ; then
- $as_echo "#define JEMALLOC_VALGRIND " >>confdefs.h
+ if test "x${je_cv_function_ffsl}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_INTERNAL_FFSLL ffsll" >>confdefs.h
+
+ $as_echo "#define JEMALLOC_INTERNAL_FFSL ffsl" >>confdefs.h
+ $as_echo "#define JEMALLOC_INTERNAL_FFS ffs" >>confdefs.h
+
+ else
+ as_fn_error $? "Cannot build without ffsl(3) or __builtin_ffsl()" "$LINENO" 5
fi
fi
-# Check whether --enable-xmalloc was given.
-if test "${enable_xmalloc+set}" = set; then :
- enableval=$enable_xmalloc; if test "x$enable_xmalloc" = "xno" ; then
- enable_xmalloc="0"
+# Check whether --with-lg_quantum was given.
+if test "${with_lg_quantum+set}" = set; then :
+ withval=$with_lg_quantum; LG_QUANTA="$with_lg_quantum"
else
- enable_xmalloc="1"
+ LG_QUANTA="3 4"
fi
-else
- enable_xmalloc="0"
+if test "x$with_lg_quantum" != "x" ; then
+ cat >>confdefs.h <<_ACEOF
+#define LG_QUANTUM $with_lg_quantum
+_ACEOF
fi
-if test "x$enable_xmalloc" = "x1" ; then
- $as_echo "#define JEMALLOC_XMALLOC " >>confdefs.h
+# Check whether --with-lg_page was given.
+if test "${with_lg_page+set}" = set; then :
+ withval=$with_lg_page; LG_PAGE="$with_lg_page"
+else
+ LG_PAGE="detect"
fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking STATIC_PAGE_SHIFT" >&5
-$as_echo_n "checking STATIC_PAGE_SHIFT... " >&6; }
-if ${je_cv_static_page_shift+:} false; then :
+if test "x$LG_PAGE" = "xdetect"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking LG_PAGE" >&5
+$as_echo_n "checking LG_PAGE... " >&6; }
+if ${je_cv_lg_page+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
+ je_cv_lg_page=12
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -6510,13 +10055,13 @@ main ()
if (result == -1) {
return 1;
}
- result = ffsl(result) - 1;
+ result = JEMALLOC_INTERNAL_FFSL(result) - 1;
f = fopen("conftest.out", "w");
if (f == NULL) {
return 1;
}
- fprintf(f, "%d\n", result);
+ fprintf(f, "%d", result);
fclose(f);
return 0;
@@ -6526,32 +10071,119 @@ main ()
}
_ACEOF
if ac_fn_c_try_run "$LINENO"; then :
- je_cv_static_page_shift=`cat conftest.out`
+ je_cv_lg_page=`cat conftest.out`
else
- je_cv_static_page_shift=undefined
+ je_cv_lg_page=undefined
fi
rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
conftest.$ac_objext conftest.beam conftest.$ac_ext
fi
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_static_page_shift" >&5
-$as_echo "$je_cv_static_page_shift" >&6; }
-
-if test "x$je_cv_static_page_shift" != "xundefined"; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_lg_page" >&5
+$as_echo "$je_cv_lg_page" >&6; }
+fi
+if test "x${je_cv_lg_page}" != "x" ; then
+ LG_PAGE="${je_cv_lg_page}"
+fi
+if test "x${LG_PAGE}" != "xundefined" ; then
cat >>confdefs.h <<_ACEOF
-#define STATIC_PAGE_SHIFT $je_cv_static_page_shift
+#define LG_PAGE $LG_PAGE
_ACEOF
else
- as_fn_error $? "cannot determine value for STATIC_PAGE_SHIFT" "$LINENO" 5
+ as_fn_error $? "cannot determine value for LG_PAGE" "$LINENO" 5
fi
-if test -d "${srcroot}.git" ; then
- git describe --long --abbrev=40 > ${srcroot}VERSION
+# Check whether --with-lg_hugepage was given.
+if test "${with_lg_hugepage+set}" = set; then :
+ withval=$with_lg_hugepage; je_cv_lg_hugepage="${with_lg_hugepage}"
+else
+ je_cv_lg_hugepage=""
+fi
+
+if test "x${je_cv_lg_hugepage}" = "x" ; then
+ if test -e "/proc/meminfo" ; then
+ hpsk=`cat /proc/meminfo 2>/dev/null | \
+ grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+ awk '{print $2}'`
+ if test "x${hpsk}" != "x" ; then
+ je_cv_lg_hugepage=10
+ while test "${hpsk}" -gt 1 ; do
+ hpsk="$((hpsk / 2))"
+ je_cv_lg_hugepage="$((je_cv_lg_hugepage + 1))"
+ done
+ fi
+ fi
+
+ if test "x${je_cv_lg_hugepage}" = "x" ; then
+ je_cv_lg_hugepage=21
+ fi
fi
-jemalloc_version=`cat ${srcroot}VERSION`
+if test "x${LG_PAGE}" != "xundefined" -a \
+ "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
+ as_fn_error $? "Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})" "$LINENO" 5
+fi
+cat >>confdefs.h <<_ACEOF
+#define LG_HUGEPAGE ${je_cv_lg_hugepage}
+_ACEOF
+
+
+
+# Check whether --with-lg_page_sizes was given.
+if test "${with_lg_page_sizes+set}" = set; then :
+ withval=$with_lg_page_sizes; LG_PAGE_SIZES="$with_lg_page_sizes"
+else
+ LG_PAGE_SIZES="$LG_PAGE"
+fi
+
+
+
+
+# Check whether --with-version was given.
+if test "${with_version+set}" = set; then :
+ withval=$with_version;
+ echo "${with_version}" | grep '^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$' 2>&1 1>/dev/null
+ if test $? -eq 0 ; then
+ echo "$with_version" > "${objroot}VERSION"
+ else
+ echo "${with_version}" | grep '^VERSION$' 2>&1 1>/dev/null
+ if test $? -ne 0 ; then
+ as_fn_error $? "${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid> or VERSION" "$LINENO" 5
+ fi
+ fi
+
+else
+
+ if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+ for pattern in '[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+ '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9][0-9]'; do
+ (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+ if test $? -eq 0 ; then
+ mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+ break
+ fi
+ done
+ fi
+ rm -f "${objroot}VERSION.tmp"
+
+fi
+
+
+if test ! -e "${objroot}VERSION" ; then
+ if test ! -e "${srcroot}VERSION" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Missing VERSION file, and unable to generate it; creating bogus VERSION" >&5
+$as_echo "Missing VERSION file, and unable to generate it; creating bogus VERSION" >&6; }
+ echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+ else
+ cp ${srcroot}VERSION ${objroot}VERSION
+ fi
+fi
+jemalloc_version=`cat "${objroot}VERSION"`
jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $1}'`
jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $2}'`
jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $3}'`
@@ -6566,6 +10198,8 @@ jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $5}'
if test "x$abi" != "xpecoff" ; then
+ $as_echo "#define JEMALLOC_HAVE_PTHREAD " >>confdefs.h
+
for ac_header in pthread.h
do :
ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default"
@@ -6617,7 +10251,14 @@ fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5
$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; }
if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then :
- LIBS="$LIBS -lpthread"
+ T_APPEND_V=-lpthread
+ if test "x${LIBS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ LIBS="${LIBS}${T_APPEND_V}"
+else
+ LIBS="${LIBS} ${T_APPEND_V}"
+fi
+
+
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_create" >&5
$as_echo_n "checking for library containing pthread_create... " >&6; }
@@ -6679,82 +10320,277 @@ fi
fi
-fi
+ wrap_syms="${wrap_syms} pthread_create"
+ have_pthread="1"
+ have_dlsym="1"
+ for ac_header in dlfcn.h
+do :
+ ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default"
+if test "x$ac_cv_header_dlfcn_h" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_DLFCN_H 1
+_ACEOF
+ ac_fn_c_check_func "$LINENO" "dlsym" "ac_cv_func_dlsym"
+if test "x$ac_cv_func_dlsym" = xyes; then :
-CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
+$as_echo_n "checking for dlsym in -ldl... " >&6; }
+if ${ac_cv_lib_dl_dlsym+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
-ac_fn_c_check_func "$LINENO" "_malloc_thread_cleanup" "ac_cv_func__malloc_thread_cleanup"
-if test "x$ac_cv_func__malloc_thread_cleanup" = xyes; then :
- have__malloc_thread_cleanup="1"
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlsym ();
+int
+main ()
+{
+return dlsym ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_dl_dlsym=yes
else
- have__malloc_thread_cleanup="0"
+ ac_cv_lib_dl_dlsym=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5
+$as_echo "$ac_cv_lib_dl_dlsym" >&6; }
+if test "x$ac_cv_lib_dl_dlsym" = xyes; then :
+ LIBS="$LIBS -ldl"
+else
+ have_dlsym="0"
+fi
fi
-if test "x$have__malloc_thread_cleanup" = "x1" ; then
- $as_echo "#define JEMALLOC_MALLOC_THREAD_CLEANUP " >>confdefs.h
+else
+ have_dlsym="0"
+fi
- force_tls="1"
+done
+
+ if test "x$have_dlsym" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_DLSYM " >>confdefs.h
+
+ fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthread_atfork(3) is compilable" >&5
+$as_echo_n "checking whether pthread_atfork(3) is compilable... " >&6; }
+if ${je_cv_pthread_atfork+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <pthread.h>
+
+int
+main ()
+{
+
+ pthread_atfork((void *)0, (void *)0, (void *)0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_pthread_atfork=yes
+else
+ je_cv_pthread_atfork=no
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_atfork" >&5
+$as_echo "$je_cv_pthread_atfork" >&6; }
-ac_fn_c_check_func "$LINENO" "_pthread_mutex_init_calloc_cb" "ac_cv_func__pthread_mutex_init_calloc_cb"
-if test "x$ac_cv_func__pthread_mutex_init_calloc_cb" = xyes; then :
- have__pthread_mutex_init_calloc_cb="1"
+ if test "x${je_cv_pthread_atfork}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_PTHREAD_ATFORK " >>confdefs.h
+
+ fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthread_setname_np(3) is compilable" >&5
+$as_echo_n "checking whether pthread_setname_np(3) is compilable... " >&6; }
+if ${je_cv_pthread_setname_np+:} false; then :
+ $as_echo_n "(cached) " >&6
else
- have__pthread_mutex_init_calloc_cb="0"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <pthread.h>
+
+int
+main ()
+{
+ pthread_setname_np(pthread_self(), "setname_test");
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_pthread_setname_np=yes
+else
+ je_cv_pthread_setname_np=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_setname_np" >&5
+$as_echo "$je_cv_pthread_setname_np" >&6; }
-if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
- $as_echo "#define JEMALLOC_MUTEX_INIT_CB 1" >>confdefs.h
+ if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP " >>confdefs.h
+ fi
fi
-# Check whether --enable-lazy_lock was given.
-if test "${enable_lazy_lock+set}" = set; then :
- enableval=$enable_lazy_lock; if test "x$enable_lazy_lock" = "xno" ; then
- enable_lazy_lock="0"
+T_APPEND_V=-D_REENTRANT
+ if test "x${CPPFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CPPFLAGS="${CPPFLAGS}${T_APPEND_V}"
else
- enable_lazy_lock="1"
+ CPPFLAGS="${CPPFLAGS} ${T_APPEND_V}"
fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+ $as_echo_n "(cached) " >&6
else
- enable_lazy_lock="0"
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_clock_gettime+:} false; then :
+ break
fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
-if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
-$as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
- enable_lazy_lock="1"
+else
+ ac_cv_search_clock_gettime=no
fi
-if test "x$enable_lazy_lock" = "x1" ; then
- if test "x$abi" != "xpecoff" ; then
- for ac_header in dlfcn.h
-do :
- ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default"
-if test "x$ac_cv_header_dlfcn_h" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_DLFCN_H 1
-_ACEOF
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+ if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+ SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+
+ unset ac_cv_search_clock_gettime
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -dynamic" >&5
+$as_echo_n "checking whether compiler supports -dynamic... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-dynamic
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- as_fn_error $? "dlfcn.h is missing" "$LINENO" 5
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
-done
- ac_fn_c_check_func "$LINENO" "dlsym" "ac_cv_func_dlsym"
-if test "x$ac_cv_func_dlsym" = xyes; then :
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-dynamic
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
-$as_echo_n "checking for dlsym in -ldl... " >&6; }
-if ${ac_cv_lib_dl_dlsym+:} false; then :
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
$as_echo_n "(cached) " >&6
else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldl $LIBS"
+ ac_func_search_save_LIBS=$LIBS
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
@@ -6764,63 +10600,416 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
#ifdef __cplusplus
extern "C"
#endif
-char dlsym ();
+char clock_gettime ();
int
main ()
{
-return dlsym ();
+return clock_gettime ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_clock_gettime+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+ ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
+ CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+ fi
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic_coarse+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <time.h>
+
+int
+main ()
+{
+
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_dl_dlsym=yes
+ je_cv_clock_monotonic_coarse=yes
else
- ac_cv_lib_dl_dlsym=no
+ je_cv_clock_monotonic_coarse=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5
-$as_echo "$ac_cv_lib_dl_dlsym" >&6; }
-if test "x$ac_cv_lib_dl_dlsym" = xyes; then :
- LIBS="$LIBS -ldl"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic_coarse" >&5
+$as_echo "$je_cv_clock_monotonic_coarse" >&6; }
+
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable" >&5
+$as_echo_n "checking whether clock_gettime(CLOCK_MONOTONIC, ...) is compilable... " >&6; }
+if ${je_cv_clock_monotonic+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <unistd.h>
+#include <time.h>
+
+int
+main ()
+{
+
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+# error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_clock_monotonic=yes
+else
+ je_cv_clock_monotonic=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_clock_monotonic" >&5
+$as_echo "$je_cv_clock_monotonic" >&6; }
+
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1" >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mach_absolute_time() is compilable" >&5
+$as_echo_n "checking whether mach_absolute_time() is compilable... " >&6; }
+if ${je_cv_mach_absolute_time+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <mach/mach_time.h>
+
+int
+main ()
+{
+
+ mach_absolute_time();
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_mach_absolute_time=yes
+else
+ je_cv_mach_absolute_time=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mach_absolute_time" >&5
+$as_echo "$je_cv_mach_absolute_time" >&6; }
+
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1" >>confdefs.h
+
+fi
+
+# Check whether --enable-syscall was given.
+if test "${enable_syscall+set}" = set; then :
+ enableval=$enable_syscall; if test "x$enable_syscall" = "xno" ; then
+ enable_syscall="0"
+else
+ enable_syscall="1"
+fi
+
+else
+ enable_syscall="1"
+
+fi
+
+if test "x$enable_syscall" = "x1" ; then
+ SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
- as_fn_error $? "libdl is missing" "$LINENO" 5
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
fi
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ return 0;
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ je_cv_cflags_added=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether syscall(2) is compilable" >&5
+$as_echo_n "checking whether syscall(2) is compilable... " >&6; }
+if ${je_cv_syscall+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int
+main ()
+{
+
+ syscall(SYS_write, 2, "hello", 5);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_syscall=yes
+else
+ je_cv_syscall=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_syscall" >&5
+$as_echo "$je_cv_syscall" >&6; }
+
+ CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+ if test "x$je_cv_syscall" = "xyes" ; then
+ $as_echo "#define JEMALLOC_USE_SYSCALL " >>confdefs.h
fi
- $as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h
+fi
+
+ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv"
+if test "x$ac_cv_func_secure_getenv" = xyes; then :
+ have_secure_getenv="1"
+else
+ have_secure_getenv="0"
fi
+if test "x$have_secure_getenv" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_SECURE_GETENV " >>confdefs.h
-# Check whether --enable-tls was given.
-if test "${enable_tls+set}" = set; then :
- enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then
- enable_tls="0"
+fi
+
+ac_fn_c_check_func "$LINENO" "sched_getcpu" "ac_cv_func_sched_getcpu"
+if test "x$ac_cv_func_sched_getcpu" = xyes; then :
+ have_sched_getcpu="1"
else
- enable_tls="1"
+ have_sched_getcpu="0"
+
fi
+if test "x$have_sched_getcpu" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_SCHED_GETCPU " >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "sched_setaffinity" "ac_cv_func_sched_setaffinity"
+if test "x$ac_cv_func_sched_setaffinity" = xyes; then :
+ have_sched_setaffinity="1"
else
- enable_tls="1"
+ have_sched_setaffinity="0"
fi
-if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing TLS to avoid allocator/threading bootstrap issues" >&5
-$as_echo "Forcing TLS to avoid allocator/threading bootstrap issues" >&6; }
- enable_tls="1"
+if test "x$have_sched_setaffinity" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_SCHED_SETAFFINITY " >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "issetugid" "ac_cv_func_issetugid"
+if test "x$ac_cv_func_issetugid" = xyes; then :
+ have_issetugid="1"
+else
+ have_issetugid="0"
+
+fi
+
+if test "x$have_issetugid" = "x1" ; then
+ $as_echo "#define JEMALLOC_HAVE_ISSETUGID " >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "_malloc_thread_cleanup" "ac_cv_func__malloc_thread_cleanup"
+if test "x$ac_cv_func__malloc_thread_cleanup" = xyes; then :
+ have__malloc_thread_cleanup="1"
+else
+ have__malloc_thread_cleanup="0"
+
+fi
+
+if test "x$have__malloc_thread_cleanup" = "x1" ; then
+ $as_echo "#define JEMALLOC_MALLOC_THREAD_CLEANUP " >>confdefs.h
+
+ wrap_syms="${wrap_syms} _malloc_thread_cleanup"
+ force_tls="1"
+fi
+
+ac_fn_c_check_func "$LINENO" "_pthread_mutex_init_calloc_cb" "ac_cv_func__pthread_mutex_init_calloc_cb"
+if test "x$ac_cv_func__pthread_mutex_init_calloc_cb" = xyes; then :
+ have__pthread_mutex_init_calloc_cb="1"
+else
+ have__pthread_mutex_init_calloc_cb="0"
+
+fi
+
+if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
+ $as_echo "#define JEMALLOC_MUTEX_INIT_CB 1" >>confdefs.h
+
+ wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
+fi
+
+# Check whether --enable-lazy_lock was given.
+if test "${enable_lazy_lock+set}" = set; then :
+ enableval=$enable_lazy_lock; if test "x$enable_lazy_lock" = "xno" ; then
+ enable_lazy_lock="0"
+else
+ enable_lazy_lock="1"
+fi
+
+else
+ enable_lazy_lock=""
+
+fi
+
+if test "x${enable_lazy_lock}" = "x" ; then
+ if test "x${force_lazy_lock}" = "x1" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
+$as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
+ enable_lazy_lock="1"
+ else
+ enable_lazy_lock="0"
+ fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no lazy-lock because thread creation monitoring is unimplemented" >&5
+$as_echo "Forcing no lazy-lock because thread creation monitoring is unimplemented" >&6; }
+ enable_lazy_lock="0"
+fi
+if test "x$enable_lazy_lock" = "x1" ; then
+ if test "x$have_dlsym" = "x1" ; then
+ $as_echo "#define JEMALLOC_LAZY_LOCK " >>confdefs.h
+
+ else
+ as_fn_error $? "Missing dlsym support: lazy-lock cannot be enabled." "$LINENO" 5
+ fi
fi
-if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no TLS to avoid allocator/threading bootstrap issues" >&5
-$as_echo "Forcing no TLS to avoid allocator/threading bootstrap issues" >&6; }
+
+
+if test "x${force_tls}" = "x1" ; then
+ enable_tls="1"
+elif test "x${force_tls}" = "x0" ; then
enable_tls="0"
+else
+ enable_tls="1"
fi
if test "x${enable_tls}" = "x1" ; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TLS" >&5
@@ -6851,6 +11040,8 @@ $as_echo "no" >&6; }
enable_tls="0"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+ enable_tls="0"
fi
if test "x${enable_tls}" = "x1" ; then
@@ -6858,97 +11049,133 @@ if test "x${enable_tls}" = "x1" ; then
#define JEMALLOC_TLS
_ACEOF
-elif test "x${force_tls}" = "x1" ; then
- as_fn_error $? "Failed to configure TLS, which is mandatory for correct function" "$LINENO" 5
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
-$as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
-if ${je_cv_function_ffsl+:} false; then :
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C11 atomics is compilable" >&5
+$as_echo_n "checking whether C11 atomics is compilable... " >&6; }
+if ${je_cv_c11_atomics+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <stdio.h>
-#include <strings.h>
-#include <string.h>
+#include <stdint.h>
+#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#else
+#error Atomics not available
+#endif
int
main ()
{
- {
- int rv = ffsl(0x08);
- printf("%d\n", rv);
- }
+ uint64_t *p = (uint64_t *)0;
+ uint64_t x = 1;
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ uint64_t r = atomic_fetch_add(a, x) + x;
+ return r == 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_function_ffsl=yes
+ je_cv_c11_atomics=yes
else
- je_cv_function_ffsl=no
+ je_cv_c11_atomics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
-$as_echo "$je_cv_function_ffsl" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_c11_atomics" >&5
+$as_echo "$je_cv_c11_atomics" >&6; }
+
+if test "x${je_cv_c11_atomics}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_C11_ATOMICS 1" >>confdefs.h
-if test "x${je_cv_function_ffsl}" != "xyes" ; then
- as_fn_error $? "Cannot build without ffsl(3)" "$LINENO" 5
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether atomic(9) is compilable" >&5
-$as_echo_n "checking whether atomic(9) is compilable... " >&6; }
-if ${je_cv_atomic9+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether GCC __atomic atomics is compilable" >&5
+$as_echo_n "checking whether GCC __atomic atomics is compilable... " >&6; }
+if ${je_cv_gcc_atomic_atomics+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <sys/types.h>
-#include <machine/atomic.h>
-#include <inttypes.h>
int
main ()
{
- {
- uint32_t x32 = 0;
- volatile uint32_t *x32p = &x32;
- atomic_fetchadd_32(x32p, 1);
- }
- {
- unsigned long xlong = 0;
- volatile unsigned long *xlongp = &xlong;
- atomic_fetchadd_long(xlongp, 1);
- }
+ int x = 0;
+ int val = 1;
+ int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED);
+ int after_add = x;
+ return after_add == 1;
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- je_cv_atomic9=yes
+ je_cv_gcc_atomic_atomics=yes
else
- je_cv_atomic9=no
+ je_cv_gcc_atomic_atomics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_atomic9" >&5
-$as_echo "$je_cv_atomic9" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_atomic_atomics" >&5
+$as_echo "$je_cv_gcc_atomic_atomics" >&6; }
-if test "x${je_cv_atomic9}" = "xyes" ; then
- $as_echo "#define JEMALLOC_ATOMIC9 1" >>confdefs.h
+if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_GCC_ATOMIC_ATOMICS 1" >>confdefs.h
+
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether GCC __sync atomics is compilable" >&5
+$as_echo_n "checking whether GCC __sync atomics is compilable... " >&6; }
+if ${je_cv_gcc_sync_atomics+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+int
+main ()
+{
+
+ int x = 0;
+ int before_add = __sync_fetch_and_add(&x, 1);
+ int after_add = x;
+ return (before_add == 0) && (after_add == 1);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_gcc_sync_atomics=yes
+else
+ je_cv_gcc_sync_atomics=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_gcc_sync_atomics" >&5
+$as_echo "$je_cv_gcc_sync_atomics" >&6; }
+
+if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_GCC_SYNC_ATOMICS 1" >>confdefs.h
fi
@@ -7002,6 +11229,211 @@ fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(2) is compilable" >&5
+$as_echo_n "checking whether madvise(2) is compilable... " >&6; }
+if ${je_cv_madvise+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ madvise((void *)0, 0, 0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_madvise=yes
+else
+ je_cv_madvise=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madvise" >&5
+$as_echo "$je_cv_madvise" >&6; }
+
+if test "x${je_cv_madvise}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_MADVISE " >>confdefs.h
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_FREE) is compilable" >&5
+$as_echo_n "checking whether madvise(..., MADV_FREE) is compilable... " >&6; }
+if ${je_cv_madv_free+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ madvise((void *)0, 0, MADV_FREE);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_madv_free=yes
+else
+ je_cv_madv_free=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_free" >&5
+$as_echo "$je_cv_madv_free" >&6; }
+
+ if test "x${je_cv_madv_free}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+
+ elif test "x${je_cv_madvise}" = "xyes" ; then
+ case "${host_cpu}" in i686|x86_64)
+ case "${host}" in *-*-linux*)
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_FREE " >>confdefs.h
+
+ $as_echo "#define JEMALLOC_DEFINE_MADVISE_FREE " >>confdefs.h
+
+ ;;
+ esac
+ ;;
+ esac
+ fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_DONTNEED) is compilable" >&5
+$as_echo_n "checking whether madvise(..., MADV_DONTNEED) is compilable... " >&6; }
+if ${je_cv_madv_dontneed+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ madvise((void *)0, 0, MADV_DONTNEED);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_madv_dontneed=yes
+else
+ je_cv_madv_dontneed=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_dontneed" >&5
+$as_echo "$je_cv_madv_dontneed" >&6; }
+
+ if test "x${je_cv_madv_dontneed}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED " >>confdefs.h
+
+ fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_DO[NT]DUMP) is compilable" >&5
+$as_echo_n "checking whether madvise(..., MADV_DO[NT]DUMP) is compilable... " >&6; }
+if ${je_cv_madv_dontdump+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ madvise((void *)0, 0, MADV_DONTDUMP);
+ madvise((void *)0, 0, MADV_DODUMP);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_madv_dontdump=yes
+else
+ je_cv_madv_dontdump=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_madv_dontdump" >&5
+$as_echo "$je_cv_madv_dontdump" >&6; }
+
+ if test "x${je_cv_madv_dontdump}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_MADVISE_DONTDUMP " >>confdefs.h
+
+ fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable" >&5
+$as_echo_n "checking whether madvise(..., MADV_[NO]HUGEPAGE) is compilable... " >&6; }
+if ${je_cv_thp+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+ madvise((void *)0, 0, MADV_HUGEPAGE);
+ madvise((void *)0, 0, MADV_NOHUGEPAGE);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_thp=yes
+else
+ je_cv_thp=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_thp" >&5
+$as_echo "$je_cv_thp" >&6; }
+
+case "${host_cpu}" in
+ arm*)
+ ;;
+ *)
+ if test "x${je_cv_thp}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_MADVISE_HUGE " >>confdefs.h
+
+ fi
+ ;;
+esac
+fi
+
+
+
if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
@@ -7097,6 +11529,93 @@ $as_echo "$je_cv_sync_compare_and_swap_8" >&6; }
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
+$as_echo_n "checking for __builtin_clz... " >&6; }
+if ${je_cv_builtin_clz+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ {
+ unsigned x = 0;
+ int y = __builtin_clz(x);
+ }
+ {
+ unsigned long x = 0;
+ int y = __builtin_clzl(x);
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_builtin_clz=yes
+else
+ je_cv_builtin_clz=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_builtin_clz" >&5
+$as_echo "$je_cv_builtin_clz" >&6; }
+
+if test "x${je_cv_builtin_clz}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_BUILTIN_CLZ " >>confdefs.h
+
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin os_unfair_lock_*() is compilable" >&5
+$as_echo_n "checking whether Darwin os_unfair_lock_*() is compilable... " >&6; }
+if ${je_cv_os_unfair_lock+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#include <os/lock.h>
+#include <AvailabilityMacros.h>
+
+int
+main ()
+{
+
+ #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200
+ #error "os_unfair_lock is not supported"
+ #else
+ os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+ os_unfair_lock_lock(&lock);
+ os_unfair_lock_unlock(&lock);
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_os_unfair_lock=yes
+else
+ je_cv_os_unfair_lock=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_os_unfair_lock" >&5
+$as_echo "$je_cv_os_unfair_lock" >&6; }
+
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_OS_UNFAIR_LOCK " >>confdefs.h
+
+fi
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5
$as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; }
@@ -7160,159 +11679,375 @@ if test "x${enable_zone_allocator}" = "x1" ; then
if test "x${abi}" != "xmacho"; then
as_fn_error $? "--enable-zone-allocator is only supported on Darwin" "$LINENO" 5
fi
- $as_echo "#define JEMALLOC_IVSALLOC " >>confdefs.h
-
$as_echo "#define JEMALLOC_ZONE " >>confdefs.h
+fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking malloc zone version" >&5
-$as_echo_n "checking malloc zone version... " >&6; }
+# Check whether --enable-initial-exec-tls was given.
+if test "${enable_initial_exec_tls+set}" = set; then :
+ enableval=$enable_initial_exec_tls; if test "x$enable_initial_exec_tls" = "xno" ; then
+ enable_initial_exec_tls="0"
+else
+ enable_initial_exec_tls="1"
+fi
+else
+ enable_initial_exec_tls="1"
+fi
+
+
+
+if test "x${je_cv_tls_model}" = "xyes" -a \
+ "x${enable_initial_exec_tls}" = "x1" ; then
+ $as_echo "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))" >>confdefs.h
+
+else
+ $as_echo "#define JEMALLOC_TLS_MODEL " >>confdefs.h
+
+fi
+
+
+if test "x${have_pthread}" = "x1" -a "x${have_dlsym}" = "x1" \
+ -a "x${je_cv_os_unfair_lock}" != "xyes" \
+ -a "x${je_cv_osspin}" != "xyes" ; then
+ $as_echo "#define JEMALLOC_BACKGROUND_THREAD 1" >>confdefs.h
+
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether glibc malloc hook is compilable" >&5
+$as_echo_n "checking whether glibc malloc hook is compilable... " >&6; }
+if ${je_cv_glibc_malloc_hook+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+#include <stddef.h>
+
+extern void (* __free_hook)(void *ptr);
+extern void *(* __malloc_hook)(size_t size);
+extern void *(* __realloc_hook)(void *ptr, size_t size);
+
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 14 ? 1 : -1]
+
+ void *ptr = 0L;
+ if (__malloc_hook) ptr = __malloc_hook(1);
+ if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+ if (__free_hook && ptr) __free_hook(ptr);
;
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=3
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_glibc_malloc_hook=yes
else
+ je_cv_glibc_malloc_hook=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_glibc_malloc_hook" >&5
+$as_echo "$je_cv_glibc_malloc_hook" >&6; }
+if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ $as_echo "#define JEMALLOC_GLIBC_MALLOC_HOOK " >>confdefs.h
+
+ wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+ fi
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether glibc memalign hook is compilable" >&5
+$as_echo_n "checking whether glibc memalign hook is compilable... " >&6; }
+if ${je_cv_glibc_memalign_hook+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+#include <stddef.h>
+
+extern void *(* __memalign_hook)(size_t alignment, size_t size);
+
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 15 ? 1 : -1]
+
+ void *ptr = 0L;
+ if (__memalign_hook) ptr = __memalign_hook(16, 7);
;
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=5
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_glibc_memalign_hook=yes
else
+ je_cv_glibc_memalign_hook=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_glibc_memalign_hook" >&5
+$as_echo "$je_cv_glibc_memalign_hook" >&6; }
+
+if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ $as_echo "#define JEMALLOC_GLIBC_MEMALIGN_HOOK " >>confdefs.h
+
+ wrap_syms="${wrap_syms} __memalign_hook"
+ fi
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether pthreads adaptive mutexes is compilable" >&5
+$as_echo_n "checking whether pthreads adaptive mutexes is compilable... " >&6; }
+if ${je_cv_pthread_mutex_adaptive_np+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+#include <pthread.h>
+
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 16 ? 1 : -1]
+
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ pthread_mutexattr_destroy(&attr);
;
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_pthread_mutex_adaptive_np=yes
+else
+ je_cv_pthread_mutex_adaptive_np=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_pthread_mutex_adaptive_np" >&5
+$as_echo "$je_cv_pthread_mutex_adaptive_np" >&6; }
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP " >>confdefs.h
+
+fi
+
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -D_GNU_SOURCE" >&5
+$as_echo_n "checking whether compiler supports -D_GNU_SOURCE... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-D_GNU_SOURCE
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+
int
main ()
{
-static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 9 ? 1 : -1]
+
+ return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=6
+ je_cv_cflags_added=-D_GNU_SOURCE
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-Werror
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
+else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
+
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+
int
main ()
{
-static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 13 ? 1 : -1]
+
+ return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=7
+ je_cv_cflags_added=-Werror
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
else
- JEMALLOC_ZONE_VERSION=
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -herror_on_warning" >&5
+$as_echo_n "checking whether compiler supports -herror_on_warning... " >&6; }
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+T_APPEND_V=-herror_on_warning
+ if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${T_APPEND_V}" = "x" ; then
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}${T_APPEND_V}"
else
+ CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS} ${T_APPEND_V}"
+fi
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+
int
main ()
{
-static foo[sizeof(malloc_zone_t) == sizeof(void *) * 17 ? 1 : -1]
+
+ return 0;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=8
+ je_cv_cflags_added=-herror_on_warning
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
else
+ je_cv_cflags_added=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether strerror_r returns char with gnu source is compilable" >&5
+$as_echo_n "checking whether strerror_r returns char with gnu source is compilable... " >&6; }
+if ${je_cv_strerror_r_returns_char_with_gnu_source+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <malloc/malloc.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
int
main ()
{
-static foo[sizeof(malloc_zone_t) > sizeof(void *) * 17 ? 1 : -1]
+
+ char *buffer = (char *) malloc(100);
+ char *error = strerror_r(EINVAL, buffer, 100);
+ printf("%s\n", error);
;
return 0;
}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- JEMALLOC_ZONE_VERSION=9
+if ac_fn_c_try_link "$LINENO"; then :
+ je_cv_strerror_r_returns_char_with_gnu_source=yes
else
- JEMALLOC_ZONE_VERSION=
-
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ je_cv_strerror_r_returns_char_with_gnu_source=no
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_strerror_r_returns_char_with_gnu_source" >&5
+$as_echo "$je_cv_strerror_r_returns_char_with_gnu_source" >&6; }
+
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+if test "x${CONFIGURE_CFLAGS}" = "x" -o "x${SPECIFIED_CFLAGS}" = "x" ; then
+ CFLAGS="${CONFIGURE_CFLAGS}${SPECIFIED_CFLAGS}"
+else
+ CFLAGS="${CONFIGURE_CFLAGS} ${SPECIFIED_CFLAGS}"
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
-$as_echo "unsupported" >&6; }
- as_fn_error $? "Unsupported malloc zone version" "$LINENO" 5
- fi
- if test "${JEMALLOC_ZONE_VERSION}" = 9; then
- JEMALLOC_ZONE_VERSION=8
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: > 8" >&5
-$as_echo "> 8" >&6; }
- else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JEMALLOC_ZONE_VERSION" >&5
-$as_echo "$JEMALLOC_ZONE_VERSION" >&6; }
- fi
- cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_ZONE_VERSION $JEMALLOC_ZONE_VERSION
-_ACEOF
+
+
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
+ $as_echo "#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE " >>confdefs.h
fi
@@ -7411,11 +12146,11 @@ fi
-ac_config_commands="$ac_config_commands include/jemalloc/internal/private_namespace.h"
+ac_config_commands="$ac_config_commands include/jemalloc/internal/public_symbols.txt"
-ac_config_commands="$ac_config_commands include/jemalloc/internal/private_unnamespace.h"
+ac_config_commands="$ac_config_commands include/jemalloc/internal/private_symbols.awk"
-ac_config_commands="$ac_config_commands include/jemalloc/internal/public_symbols.txt"
+ac_config_commands="$ac_config_commands include/jemalloc/internal/private_symbols_jet.awk"
ac_config_commands="$ac_config_commands include/jemalloc/internal/public_namespace.h"
@@ -7440,7 +12175,7 @@ ac_config_headers="$ac_config_headers $cfghdrs_tup"
-ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc.sh"
+ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc-config bin/jemalloc.sh bin/jeprof"
@@ -8028,6 +12763,7 @@ gives unlimited permission to copy, distribute and modify it."
ac_pwd='$ac_pwd'
srcdir='$srcdir'
INSTALL='$INSTALL'
+AWK='$AWK'
test -n "\$AWK" || AWK=awk
_ACEOF
@@ -8137,17 +12873,24 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
srcdir="${srcdir}"
objroot="${objroot}"
+ mangling_map="${mangling_map}"
+ public_syms="${public_syms}"
+ JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
srcdir="${srcdir}"
objroot="${objroot}"
+ public_syms="${public_syms}"
+ wrap_syms="${wrap_syms}"
+ SYM_PREFIX="${SYM_PREFIX}"
+ JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
srcdir="${srcdir}"
objroot="${objroot}"
- mangling_map="${mangling_map}"
public_syms="${public_syms}"
- JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
+ wrap_syms="${wrap_syms}"
+ SYM_PREFIX="${SYM_PREFIX}"
srcdir="${srcdir}"
@@ -8158,8 +12901,11 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
objroot="${objroot}"
+ SHELL="${SHELL}"
srcdir="${srcdir}"
objroot="${objroot}"
+ LG_QUANTA="${LG_QUANTA}"
+ LG_PAGE_SIZES="${LG_PAGE_SIZES}"
srcdir="${srcdir}"
@@ -8191,9 +12937,9 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
for ac_config_target in $ac_config_targets
do
case $ac_config_target in
- "include/jemalloc/internal/private_namespace.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/private_namespace.h" ;;
- "include/jemalloc/internal/private_unnamespace.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/private_unnamespace.h" ;;
"include/jemalloc/internal/public_symbols.txt") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/public_symbols.txt" ;;
+ "include/jemalloc/internal/private_symbols.awk") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/private_symbols.awk" ;;
+ "include/jemalloc/internal/private_symbols_jet.awk") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/private_symbols_jet.awk" ;;
"include/jemalloc/internal/public_namespace.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/public_namespace.h" ;;
"include/jemalloc/internal/public_unnamespace.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/public_unnamespace.h" ;;
"include/jemalloc/internal/size_classes.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/size_classes.h" ;;
@@ -8205,7 +12951,9 @@ do
"$cfghdrs_tup") CONFIG_HEADERS="$CONFIG_HEADERS $cfghdrs_tup" ;;
"$cfgoutputs_tup") CONFIG_FILES="$CONFIG_FILES $cfgoutputs_tup" ;;
"config.stamp") CONFIG_FILES="$CONFIG_FILES config.stamp" ;;
+ "bin/jemalloc-config") CONFIG_FILES="$CONFIG_FILES bin/jemalloc-config" ;;
"bin/jemalloc.sh") CONFIG_FILES="$CONFIG_FILES bin/jemalloc.sh" ;;
+ "bin/jeprof") CONFIG_FILES="$CONFIG_FILES bin/jeprof" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
@@ -8761,14 +13509,6 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
case $ac_file$ac_mode in
- "include/jemalloc/internal/private_namespace.h":C)
- mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h"
- ;;
- "include/jemalloc/internal/private_unnamespace.h":C)
- mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/private_unnamespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_unnamespace.h"
- ;;
"include/jemalloc/internal/public_symbols.txt":C)
f="${objroot}include/jemalloc/internal/public_symbols.txt"
mkdir -p "${objroot}include/jemalloc/internal"
@@ -8785,6 +13525,18 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
echo "${n}:${m}" >> "${f}"
done
;;
+ "include/jemalloc/internal/private_symbols.awk":C)
+ f="${objroot}include/jemalloc/internal/private_symbols.awk"
+ mkdir -p "${objroot}include/jemalloc/internal"
+ export_syms=`for sym in ${public_syms}; do echo "${JEMALLOC_PREFIX}${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+ "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols.awk"
+ ;;
+ "include/jemalloc/internal/private_symbols_jet.awk":C)
+ f="${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+ mkdir -p "${objroot}include/jemalloc/internal"
+ export_syms=`for sym in ${public_syms}; do echo "jet_${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+ "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+ ;;
"include/jemalloc/internal/public_namespace.h":C)
mkdir -p "${objroot}include/jemalloc/internal"
"${srcdir}/include/jemalloc/internal/public_namespace.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h"
@@ -8795,7 +13547,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
;;
"include/jemalloc/internal/size_classes.h":C)
mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+ "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" 3 "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
;;
"include/jemalloc/jemalloc_protos_jet.h":C)
mkdir -p "${objroot}include/jemalloc"
@@ -8864,16 +13616,32 @@ $as_echo "jemalloc version : ${jemalloc_version}" >&6; }
$as_echo "library revision : ${rev}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
$as_echo "" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIG : ${CONFIG}" >&5
+$as_echo "CONFIG : ${CONFIG}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CC : ${CC}" >&5
$as_echo "CC : ${CC}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIGURE_CFLAGS : ${CONFIGURE_CFLAGS}" >&5
+$as_echo "CONFIGURE_CFLAGS : ${CONFIGURE_CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SPECIFIED_CFLAGS : ${SPECIFIED_CFLAGS}" >&5
+$as_echo "SPECIFIED_CFLAGS : ${SPECIFIED_CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&5
+$as_echo "EXTRA_CFLAGS : ${EXTRA_CFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CPPFLAGS : ${CPPFLAGS}" >&5
$as_echo "CPPFLAGS : ${CPPFLAGS}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CFLAGS : ${CFLAGS}" >&5
-$as_echo "CFLAGS : ${CFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CXX : ${CXX}" >&5
+$as_echo "CXX : ${CXX}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}" >&5
+$as_echo "CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}" >&5
+$as_echo "SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_CXXFLAGS : ${EXTRA_CXXFLAGS}" >&5
+$as_echo "EXTRA_CXXFLAGS : ${EXTRA_CXXFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LDFLAGS : ${LDFLAGS}" >&5
$as_echo "LDFLAGS : ${LDFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&5
$as_echo "EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DSO_LDFLAGS : ${DSO_LDFLAGS}" >&5
+$as_echo "DSO_LDFLAGS : ${DSO_LDFLAGS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBS : ${LIBS}" >&5
$as_echo "LIBS : ${LIBS}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: RPATH_EXTRA : ${RPATH_EXTRA}" >&5
@@ -8890,12 +13658,12 @@ $as_echo "" >&6; }
$as_echo "PREFIX : ${PREFIX}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: BINDIR : ${BINDIR}" >&5
$as_echo "BINDIR : ${BINDIR}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DATADIR : ${DATADIR}" >&5
+$as_echo "DATADIR : ${DATADIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: INCLUDEDIR : ${INCLUDEDIR}" >&5
$as_echo "INCLUDEDIR : ${INCLUDEDIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: LIBDIR : ${LIBDIR}" >&5
$as_echo "LIBDIR : ${LIBDIR}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: DATADIR : ${DATADIR}" >&5
-$as_echo "DATADIR : ${DATADIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: MANDIR : ${MANDIR}" >&5
$as_echo "MANDIR : ${MANDIR}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
@@ -8918,16 +13686,12 @@ $as_echo "JEMALLOC_PRIVATE_NAMESPACE" >&6; }
$as_echo " : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: install_suffix : ${install_suffix}" >&5
$as_echo "install_suffix : ${install_suffix}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: malloc_conf : ${config_malloc_conf}" >&5
+$as_echo "malloc_conf : ${config_malloc_conf}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen : ${enable_autogen}" >&5
$as_echo "autogen : ${enable_autogen}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: experimental : ${enable_experimental}" >&5
-$as_echo "experimental : ${enable_experimental}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence : ${enable_cc_silence}" >&5
-$as_echo "cc-silence : ${enable_cc_silence}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: debug : ${enable_debug}" >&5
$as_echo "debug : ${enable_debug}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: code-coverage : ${enable_code_coverage}" >&5
-$as_echo "code-coverage : ${enable_code_coverage}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: stats : ${enable_stats}" >&5
$as_echo "stats : ${enable_stats}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof : ${enable_prof}" >&5
@@ -8938,25 +13702,19 @@ $as_echo "prof-libunwind : ${enable_prof_libunwind}" >&6; }
$as_echo "prof-libgcc : ${enable_prof_libgcc}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: prof-gcc : ${enable_prof_gcc}" >&5
$as_echo "prof-gcc : ${enable_prof_gcc}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tcache : ${enable_tcache}" >&5
-$as_echo "tcache : ${enable_tcache}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: fill : ${enable_fill}" >&5
$as_echo "fill : ${enable_fill}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: utrace : ${enable_utrace}" >&5
$as_echo "utrace : ${enable_utrace}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: valgrind : ${enable_valgrind}" >&5
-$as_echo "valgrind : ${enable_valgrind}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: xmalloc : ${enable_xmalloc}" >&5
$as_echo "xmalloc : ${enable_xmalloc}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: mremap : ${enable_mremap}" >&5
-$as_echo "mremap : ${enable_mremap}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: munmap : ${enable_munmap}" >&5
-$as_echo "munmap : ${enable_munmap}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: dss : ${enable_dss}" >&5
-$as_echo "dss : ${enable_dss}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: log : ${enable_log}" >&5
+$as_echo "log : ${enable_log}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: lazy_lock : ${enable_lazy_lock}" >&5
$as_echo "lazy_lock : ${enable_lazy_lock}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tls : ${enable_tls}" >&5
-$as_echo "tls : ${enable_tls}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cache-oblivious : ${enable_cache_oblivious}" >&5
+$as_echo "cache-oblivious : ${enable_cache_oblivious}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: cxx : ${enable_cxx}" >&5
+$as_echo "cxx : ${enable_cxx}" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ===============================================================================" >&5
$as_echo "===============================================================================" >&6; }
diff --git a/deps/jemalloc/configure.ac b/deps/jemalloc/configure.ac
index 4de81dc1d..a6a08db08 100644
--- a/deps/jemalloc/configure.ac
+++ b/deps/jemalloc/configure.ac
@@ -1,34 +1,99 @@
dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ(2.68)
AC_INIT([Makefile.in])
+AC_CONFIG_AUX_DIR([build-aux])
+
dnl ============================================================================
dnl Custom macro definitions.
-dnl JE_CFLAGS_APPEND(cflag)
-AC_DEFUN([JE_CFLAGS_APPEND],
-[
-AC_MSG_CHECKING([whether compiler supports $1])
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
- CFLAGS="$1"
+dnl JE_CONCAT_VVV(r, a, b)
+dnl
+dnl Set $r to the concatenation of $a and $b, with a space separating them iff
+dnl both $a and $b are non-empty.
+AC_DEFUN([JE_CONCAT_VVV],
+if test "x[$]{$2}" = "x" -o "x[$]{$3}" = "x" ; then
+ $1="[$]{$2}[$]{$3}"
else
- CFLAGS="${CFLAGS} $1"
+ $1="[$]{$2} [$]{$3}"
fi
+)
+
+dnl JE_APPEND_VS(a, b)
+dnl
+dnl Set $a to the concatenation of $a and b, with a space separating them iff
+dnl both $a and b are non-empty.
+AC_DEFUN([JE_APPEND_VS],
+ T_APPEND_V=$2
+ JE_CONCAT_VVV($1, $1, T_APPEND_V)
+)
+
+CONFIGURE_CFLAGS=
+SPECIFIED_CFLAGS="${CFLAGS}"
+dnl JE_CFLAGS_ADD(cflag)
+dnl
+dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
+dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests. This macro
+dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
+AC_DEFUN([JE_CFLAGS_ADD],
+[
+AC_MSG_CHECKING([whether compiler supports $1])
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+JE_APPEND_VS(CONFIGURE_CFLAGS, $1)
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+ return 0;
+]])],
+ [je_cv_cflags_added=$1]
+ AC_MSG_RESULT([yes]),
+ [je_cv_cflags_added=]
+ AC_MSG_RESULT([no])
+ [CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"]
+)
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
+])
+
+dnl JE_CFLAGS_SAVE()
+dnl JE_CFLAGS_RESTORE()
+dnl
+dnl Save/restore CFLAGS. Nesting is not supported.
+AC_DEFUN([JE_CFLAGS_SAVE],
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+)
+AC_DEFUN([JE_CFLAGS_RESTORE],
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
+)
+
+CONFIGURE_CXXFLAGS=
+SPECIFIED_CXXFLAGS="${CXXFLAGS}"
+dnl JE_CXXFLAGS_ADD(cxxflag)
+AC_DEFUN([JE_CXXFLAGS_ADD],
+[
+AC_MSG_CHECKING([whether compiler supports $1])
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+JE_APPEND_VS(CONFIGURE_CXXFLAGS, $1)
+JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
+AC_LANG_PUSH([C++])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
[[
]], [[
return 0;
]])],
- [je_cv_cflags_appended=$1]
+ [je_cv_cxxflags_added=$1]
AC_MSG_RESULT([yes]),
- [je_cv_cflags_appended=]
+ [je_cv_cxxflags_added=]
AC_MSG_RESULT([no])
- [CFLAGS="${TCFLAGS}"]
+ [CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"]
)
+AC_LANG_POP([C++])
+JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
])
dnl JE_COMPILABLE(label, hcode, mcode, rvar)
-dnl
+dnl
dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
dnl cause failure.
AC_DEFUN([JE_COMPILABLE],
@@ -43,8 +108,11 @@ AC_CACHE_CHECK([whether $1 is compilable],
dnl ============================================================================
+CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
+AC_SUBST([CONFIG])
+
dnl Library revision.
-rev=1
+rev=2
AC_SUBST([rev])
srcroot=$srcdir
@@ -113,6 +181,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise,
dnl just prevent autoconf from molesting CFLAGS.
CFLAGS=$CFLAGS
AC_PROG_CC
+
if test "x$GCC" != "xyes" ; then
AC_CACHE_CHECK([whether compiler is MSVC],
[je_cv_msvc],
@@ -126,42 +195,144 @@ if test "x$GCC" != "xyes" ; then
[je_cv_msvc=no])])
fi
-if test "x$CFLAGS" = "x" ; then
- no_CFLAGS="yes"
- if test "x$GCC" = "xyes" ; then
- JE_CFLAGS_APPEND([-std=gnu99])
- if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+ case "${CC}" in
+ CC|cc)
+ je_cv_cray_prgenv_wrapper="yes"
+ ;;
+ *)
+ ;;
+ esac
+fi
+
+AC_CACHE_CHECK([whether compiler is cray],
+ [je_cv_cray],
+ [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+ [
+#ifndef _CRAYC
+ int fail[-1];
+#endif
+])],
+ [je_cv_cray=yes],
+ [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+ AC_CACHE_CHECK([whether cray compiler version is 8.4],
+ [je_cv_cray_84],
+ [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+ [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+ int fail[-1];
+#endif
+])],
+ [je_cv_cray_84=yes],
+ [je_cv_cray_84=no])])
+fi
+
+if test "x$GCC" = "xyes" ; then
+ JE_CFLAGS_ADD([-std=gnu11])
+ if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
+ AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+ else
+ JE_CFLAGS_ADD([-std=gnu99])
+ if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
fi
- JE_CFLAGS_APPEND([-Wall])
- JE_CFLAGS_APPEND([-pipe])
- JE_CFLAGS_APPEND([-g3])
- elif test "x$je_cv_msvc" = "xyes" ; then
- CC="$CC -nologo"
- JE_CFLAGS_APPEND([-Zi])
- JE_CFLAGS_APPEND([-MT])
- JE_CFLAGS_APPEND([-W3])
- CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
fi
+ JE_CFLAGS_ADD([-Wall])
+ JE_CFLAGS_ADD([-Wshorten-64-to-32])
+ JE_CFLAGS_ADD([-Wsign-compare])
+ JE_CFLAGS_ADD([-Wundef])
+ JE_CFLAGS_ADD([-Wno-format-zero-length])
+ JE_CFLAGS_ADD([-pipe])
+ JE_CFLAGS_ADD([-g3])
+elif test "x$je_cv_msvc" = "xyes" ; then
+ CC="$CC -nologo"
+ JE_CFLAGS_ADD([-Zi])
+ JE_CFLAGS_ADD([-MT])
+ JE_CFLAGS_ADD([-W3])
+ JE_CFLAGS_ADD([-FS])
+ JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat)
+fi
+if test "x$je_cv_cray" = "xyes" ; then
+ dnl cray compiler 8.4 has an inlining bug
+ if test "x$je_cv_cray_84" = "xyes" ; then
+ JE_CFLAGS_ADD([-hipa2])
+ JE_CFLAGS_ADD([-hnognu])
+ fi
+ dnl ignore unreachable code warning
+ JE_CFLAGS_ADD([-hnomessage=128])
+ dnl ignore redefinition of "malloc", "free", etc warning
+ JE_CFLAGS_ADD([-hnomessage=1357])
+fi
+AC_SUBST([CONFIGURE_CFLAGS])
+AC_SUBST([SPECIFIED_CFLAGS])
+AC_SUBST([EXTRA_CFLAGS])
+AC_PROG_CPP
+
+AC_ARG_ENABLE([cxx],
+ [AS_HELP_STRING([--disable-cxx], [Disable C++ integration])],
+if test "x$enable_cxx" = "xno" ; then
+ enable_cxx="0"
+else
+ enable_cxx="1"
fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
- JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
+,
+enable_cxx="1"
+)
+if test "x$enable_cxx" = "x1" ; then
+ dnl Require at least c++14, which is the first version to support sized
+ dnl deallocation. C++ support is not compiled otherwise.
+ m4_include([m4/ax_cxx_compile_stdcxx.m4])
+ AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
+ if test "x${HAVE_CXX14}" = "x1" ; then
+ JE_CXXFLAGS_ADD([-Wall])
+ JE_CXXFLAGS_ADD([-g3])
+
+ SAVED_LIBS="${LIBS}"
+ JE_APPEND_VS(LIBS, -lstdc++)
+ JE_COMPILABLE([libstdc++ linkage], [
+#include <stdlib.h>
+], [[
+ int *arr = (int *)malloc(sizeof(int) * 42);
+ if (arr == NULL)
+ return 1;
+]], [je_cv_libstdcxx])
+ if test "x${je_cv_libstdcxx}" = "xno" ; then
+ LIBS="${SAVED_LIBS}"
+ fi
+ else
+ enable_cxx="0"
+ fi
fi
-AC_PROG_CPP
+AC_SUBST([enable_cxx])
+AC_SUBST([CONFIGURE_CXXFLAGS])
+AC_SUBST([SPECIFIED_CXXFLAGS])
+AC_SUBST([EXTRA_CXXFLAGS])
AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
if test "x${ac_cv_big_endian}" = "x1" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
fi
-AC_CHECK_SIZEOF([void *])
-if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
- LG_SIZEOF_PTR=3
-elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
- LG_SIZEOF_PTR=2
+if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
+ JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat/C99)
+fi
+
+if test "x${je_cv_msvc}" = "xyes" ; then
+ LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN
+ AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit])
else
- AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+ AC_CHECK_SIZEOF([void *])
+ if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+ LG_SIZEOF_PTR=3
+ elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+ LG_SIZEOF_PTR=2
+ else
+ AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+ fi
fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
@@ -185,6 +356,16 @@ else
fi
AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+AC_CHECK_SIZEOF([long long])
+if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
+ LG_SIZEOF_LONG_LONG=3
+elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
+ LG_SIZEOF_LONG_LONG=2
+else
+ AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+
AC_CHECK_SIZEOF([intmax_t])
if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
LG_SIZEOF_INTMAX_T=4
@@ -201,32 +382,120 @@ AC_CANONICAL_HOST
dnl CPU-specific settings.
CPU_SPINWAIT=""
case "${host_cpu}" in
- i[[345]]86)
- ;;
i686|x86_64)
- JE_COMPILABLE([pause instruction], [],
- [[__asm__ volatile("pause"); return 0;]],
- [je_cv_pause])
- if test "x${je_cv_pause}" = "xyes" ; then
- CPU_SPINWAIT='__asm__ volatile("pause")'
- fi
- dnl emmintrin.h fails to compile unless MMX, SSE, and SSE2 are
- dnl supported.
- JE_COMPILABLE([SSE2 intrinsics], [
-#include <emmintrin.h>
-], [], [je_cv_sse2])
- if test "x${je_cv_sse2}" = "xyes" ; then
- AC_DEFINE_UNQUOTED([HAVE_SSE2], [ ])
+ HAVE_CPU_SPINWAIT=1
+ if test "x${je_cv_msvc}" = "xyes" ; then
+ AC_CACHE_VAL([je_cv_pause_msvc],
+ [JE_COMPILABLE([pause instruction MSVC], [],
+ [[_mm_pause(); return 0;]],
+ [je_cv_pause_msvc])])
+ if test "x${je_cv_pause_msvc}" = "xyes" ; then
+ CPU_SPINWAIT='_mm_pause()'
+ fi
+ else
+ AC_CACHE_VAL([je_cv_pause],
+ [JE_COMPILABLE([pause instruction], [],
+ [[__asm__ volatile("pause"); return 0;]],
+ [je_cv_pause])])
+ if test "x${je_cv_pause}" = "xyes" ; then
+ CPU_SPINWAIT='__asm__ volatile("pause")'
+ fi
fi
;;
- powerpc)
- AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
- ;;
*)
+ HAVE_CPU_SPINWAIT=0
;;
esac
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+AC_ARG_WITH([lg_vaddr],
+ [AS_HELP_STRING([--with-lg-vaddr=<lg-vaddr>], [Number of significant virtual address bits])],
+ [LG_VADDR="$with_lg_vaddr"], [LG_VADDR="detect"])
+
+case "${host_cpu}" in
+ aarch64)
+ if test "x$LG_VADDR" = "xdetect"; then
+ AC_MSG_CHECKING([number of significant virtual address bits])
+ if test "x${LG_SIZEOF_PTR}" = "x2" ; then
+ #aarch64 ILP32
+ LG_VADDR=32
+ else
+ #aarch64 LP64
+ LG_VADDR=48
+ fi
+ AC_MSG_RESULT([$LG_VADDR])
+ fi
+ ;;
+ x86_64)
+ if test "x$LG_VADDR" = "xdetect"; then
+ AC_CACHE_CHECK([number of significant virtual address bits],
+ [je_cv_lg_vaddr],
+ AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <stdio.h>
+#ifdef _WIN32
+#include <limits.h>
+#include <intrin.h>
+typedef unsigned __int32 uint32_t;
+#else
+#include <stdint.h>
+#endif
+]], [[
+ uint32_t r[[4]];
+ uint32_t eax_in = 0x80000008U;
+#ifdef _WIN32
+ __cpuid((int *)r, (int)eax_in);
+#else
+ asm volatile ("cpuid"
+ : "=a" (r[[0]]), "=b" (r[[1]]), "=c" (r[[2]]), "=d" (r[[3]])
+ : "a" (eax_in), "c" (0)
+ );
+#endif
+ uint32_t eax_out = r[[0]];
+ uint32_t vaddr = ((eax_out & 0x0000ff00U) >> 8);
+ FILE *f = fopen("conftest.out", "w");
+ if (f == NULL) {
+ return 1;
+ }
+ if (vaddr > (sizeof(void *) << 3)) {
+ vaddr = sizeof(void *) << 3;
+ }
+ fprintf(f, "%u", vaddr);
+ fclose(f);
+ return 0;
+]])],
+ [je_cv_lg_vaddr=`cat conftest.out`],
+ [je_cv_lg_vaddr=error],
+ [je_cv_lg_vaddr=57]))
+ if test "x${je_cv_lg_vaddr}" != "x" ; then
+ LG_VADDR="${je_cv_lg_vaddr}"
+ fi
+ if test "x${LG_VADDR}" != "xerror" ; then
+ AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+ else
+ AC_MSG_ERROR([cannot determine number of significant virtual address bits])
+ fi
+ fi
+ ;;
+ *)
+ if test "x$LG_VADDR" = "xdetect"; then
+ AC_MSG_CHECKING([number of significant virtual address bits])
+ if test "x${LG_SIZEOF_PTR}" = "x3" ; then
+ LG_VADDR=64
+ elif test "x${LG_SIZEOF_PTR}" = "x2" ; then
+ LG_VADDR=32
+ elif test "x${LG_SIZEOF_PTR}" = "xLG_SIZEOF_PTR_WIN" ; then
+ LG_VADDR="(1U << (LG_SIZEOF_PTR_WIN+3))"
+ else
+ AC_MSG_ERROR([Unsupported lg(pointer size): ${LG_SIZEOF_PTR}])
+ fi
+ AC_MSG_RESULT([$LG_VADDR])
+ fi
+ ;;
+esac
+AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+
LD_PRELOAD_VAR="LD_PRELOAD"
so="so"
importlib="${so}"
@@ -234,60 +503,113 @@ o="$ac_objext"
a="a"
exe="$ac_exeext"
libprefix="lib"
+link_whole_archive="0"
DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
RPATH='-Wl,-rpath,$(1)'
SOREV="${so}.${rev}"
PIC_CFLAGS='-fPIC -DPIC'
CTARGET='-o $@'
LDTARGET='-o $@'
+TEST_LD_MODE=
EXTRA_LDFLAGS=
ARFLAGS='crus'
AROUT=' $@'
CC_MM=1
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+ TEST_LD_MODE='-dynamic'
+fi
+
+if test "x${je_cv_cray}" = "xyes" ; then
+ CC_MM=
+fi
+
AN_MAKEVAR([AR], [AC_PROG_AR])
AN_PROGRAM([ar], [AC_PROG_AR])
AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
AC_PROG_AR
+AN_MAKEVAR([NM], [AC_PROG_NM])
+AN_PROGRAM([nm], [AC_PROG_NM])
+AC_DEFUN([AC_PROG_NM], [AC_CHECK_TOOL(NM, nm, :)])
+AC_PROG_NM
+
+AC_PROG_AWK
+
dnl Platform-specific settings. abi and RPATH can probably be determined
dnl programmatically, but doing so is error-prone, which makes it generally
dnl not worth the trouble.
-dnl
+dnl
dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
dnl definitions need to be seen before any headers are included, which is a pain
dnl to make happen otherwise.
-default_munmap="1"
-JEMALLOC_USABLE_SIZE_CONST="const"
+default_retain="0"
+maps_coalesce="1"
+DUMP_SYMS="${NM} -a"
+SYM_PREFIX=""
case "${host}" in
- *-*-darwin*)
- CFLAGS="$CFLAGS"
+ *-*-darwin* | *-*-ios*)
abi="macho"
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
RPATH=""
LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
so="dylib"
importlib="${so}"
force_tls="0"
- DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+ DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
SOREV="${rev}.${so}"
sbrk_deprecated="1"
+ SYM_PREFIX="_"
;;
*-*-freebsd*)
- CFLAGS="$CFLAGS"
abi="elf"
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
force_lazy_lock="1"
;;
+ *-*-dragonfly*)
+ abi="elf"
+ ;;
+ *-*-openbsd*)
+ abi="elf"
+ force_tls="0"
+ ;;
+ *-*-bitrig*)
+ abi="elf"
+ ;;
+ *-*-linux-android)
+ dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+ JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+ abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+ AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
+ AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+ AC_DEFINE([JEMALLOC_C11_ATOMICS])
+ force_tls="0"
+ if test "${LG_SIZEOF_PTR}" = "3"; then
+ default_retain="1"
+ fi
+ ;;
*-*-linux*)
- CFLAGS="$CFLAGS"
- CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+ dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+ JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
abi="elf"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+ AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
- JEMALLOC_USABLE_SIZE_CONST=""
- default_munmap="0"
+ AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+ if test "${LG_SIZEOF_PTR}" = "3"; then
+ default_retain="1"
+ fi
+ ;;
+ *-*-kfreebsd*)
+ dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+ JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+ abi="elf"
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+ AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+ AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+ AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
;;
*-*-netbsd*)
AC_MSG_CHECKING([ABI])
@@ -298,22 +620,19 @@ case "${host}" in
#error aout
#endif
]])],
- [CFLAGS="$CFLAGS"; abi="elf"],
+ [abi="elf"],
[abi="aout"])
AC_MSG_RESULT([$abi])
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
;;
*-*-solaris2*)
- CFLAGS="$CFLAGS"
abi="elf"
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
RPATH='-Wl,-R,$(1)'
dnl Solaris needs this for sigwait().
- CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
- LIBS="$LIBS -lposix4 -lsocket -lnsl"
+ JE_APPEND_VS(CPPFLAGS, -D_POSIX_PTHREAD_SEMANTICS)
+ JE_APPEND_VS(LIBS, -lposix4 -lsocket -lnsl)
;;
*-ibm-aix*)
- if "$LG_SIZEOF_PTR" = "8"; then
+ if test "${LG_SIZEOF_PTR}" = "3"; then
dnl 64bit AIX
LD_PRELOAD_VAR="LDR_PRELOAD64"
else
@@ -322,9 +641,10 @@ case "${host}" in
fi
abi="xcoff"
;;
- *-*-mingw*)
+ *-*-mingw* | *-*-cygwin*)
abi="pecoff"
force_tls="0"
+ maps_coalesce="0"
RPATH=""
so="dll"
if test "x$je_cv_msvc" = "xyes" ; then
@@ -340,7 +660,15 @@ case "${host}" in
else
importlib="${so}"
DSO_LDFLAGS="-shared"
+ link_whole_archive="1"
fi
+ case "${host}" in
+ *-*-cygwin*)
+ DUMP_SYMS="dumpbin /SYMBOLS"
+ ;;
+ *)
+ ;;
+ esac
a="lib"
libprefix=""
SOREV="${so}"
@@ -351,6 +679,22 @@ case "${host}" in
abi="elf"
;;
esac
+
+JEMALLOC_USABLE_SIZE_CONST=const
+AC_CHECK_HEADERS([malloc.h], [
+ AC_MSG_CHECKING([whether malloc_usable_size definition can use const argument])
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+ [#include <malloc.h>
+ #include <stddef.h>
+ size_t malloc_usable_size(const void *ptr);
+ ],
+ [])],[
+ AC_MSG_RESULT([yes])
+ ],[
+ JEMALLOC_USABLE_SIZE_CONST=
+ AC_MSG_RESULT([no])
+ ])
+])
AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST])
AC_SUBST([abi])
AC_SUBST([RPATH])
@@ -361,17 +705,29 @@ AC_SUBST([o])
AC_SUBST([a])
AC_SUBST([exe])
AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
AC_SUBST([DSO_LDFLAGS])
AC_SUBST([EXTRA_LDFLAGS])
AC_SUBST([SOREV])
AC_SUBST([PIC_CFLAGS])
AC_SUBST([CTARGET])
AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
AC_SUBST([MKLIB])
AC_SUBST([ARFLAGS])
AC_SUBST([AROUT])
+AC_SUBST([DUMP_SYMS])
AC_SUBST([CC_MM])
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+ LM="$ac_cv_search_log"
+else
+ LM=
+fi
+AC_SUBST(LM)
+
JE_COMPILABLE([__attribute__ syntax],
[static __attribute__((unused)) void foo(void){}],
[],
@@ -379,23 +735,55 @@ JE_COMPILABLE([__attribute__ syntax],
if test "x${je_cv_attribute}" = "xyes" ; then
AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
- JE_CFLAGS_APPEND([-fvisibility=hidden])
+ JE_CFLAGS_ADD([-fvisibility=hidden])
+ JE_CXXFLAGS_ADD([-fvisibility=hidden])
fi
fi
dnl Check for tls_model attribute support (clang 3.0 still lacks support).
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
JE_COMPILABLE([tls_model attribute], [],
[static __thread int
- __attribute__((tls_model("initial-exec"))) foo;
+ __attribute__((tls_model("initial-exec"), unused)) foo;
foo = 0;],
[je_cv_tls_model])
-CFLAGS="${SAVED_CFLAGS}"
-if test "x${je_cv_tls_model}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_TLS_MODEL],
- [__attribute__((tls_model("initial-exec")))])
-else
- AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+JE_CFLAGS_RESTORE()
+dnl (Setting of JEMALLOC_TLS_MODEL is done later, after we've checked for
+dnl --disable-initial-exec-tls)
+
+dnl Check for alloc_size attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
+ [void *foo(size_t size) __attribute__((alloc_size(1)));],
+ [je_cv_alloc_size])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_alloc_size}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
+fi
+dnl Check for format(gnu_printf, ...) attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
+ [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
+ [je_cv_format_gnu_printf])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
+fi
+dnl Check for format(printf, ...) attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
+ [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
+ [je_cv_format_printf])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_format_printf}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
fi
dnl Support optional additions to rpath.
@@ -428,59 +816,6 @@ AC_PROG_RANLIB
AC_PATH_PROG([LD], [ld], [false], [$PATH])
AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
-
-dnl Check for allocator-related functions that should be wrapped.
-AC_CHECK_FUNC([memalign],
- [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
- public_syms="${public_syms} memalign"])
-AC_CHECK_FUNC([valloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
- public_syms="${public_syms} valloc"])
-
-dnl Support the experimental API by default.
-AC_ARG_ENABLE([experimental],
- [AS_HELP_STRING([--disable-experimental],
- [Disable support for the experimental API])],
-[if test "x$enable_experimental" = "xno" ; then
- enable_experimental="0"
-else
- enable_experimental="1"
-fi
-],
-[enable_experimental="1"]
-)
-if test "x$enable_experimental" = "x1" ; then
- AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ])
- public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
-fi
-AC_SUBST([enable_experimental])
-
-dnl Do not compute test code coverage by default.
-GCOV_FLAGS=
-AC_ARG_ENABLE([code-coverage],
- [AS_HELP_STRING([--enable-code-coverage],
- [Enable code coverage])],
-[if test "x$enable_code_coverage" = "xno" ; then
- enable_code_coverage="0"
-else
- enable_code_coverage="1"
-fi
-],
-[enable_code_coverage="0"]
-)
-if test "x$enable_code_coverage" = "x1" ; then
- deoptimize="no"
- echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes"
- if test "x${deoptimize}" = "xyes" ; then
- JE_CFLAGS_APPEND([-O0])
- fi
- JE_CFLAGS_APPEND([-fprofile-arcs -ftest-coverage])
- EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage"
- AC_DEFINE([JEMALLOC_CODE_COVERAGE], [ ])
-fi
-AC_SUBST([enable_code_coverage])
-
dnl Perform no name mangling by default.
AC_ARG_WITH([mangling],
[AS_HELP_STRING([--with-mangling=<map>], [Mangle symbols in <map>])],
@@ -496,11 +831,15 @@ else
JEMALLOC_PREFIX="je_"
fi]
)
-if test "x$JEMALLOC_PREFIX" != "x" ; then
+if test "x$JEMALLOC_PREFIX" = "x" ; then
+ AC_DEFINE([JEMALLOC_IS_MALLOC])
+else
JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
fi
+AC_SUBST([JEMALLOC_PREFIX])
+AC_SUBST([JEMALLOC_CPREFIX])
AC_ARG_WITH([export],
[AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
@@ -509,6 +848,49 @@ AC_ARG_WITH([export],
fi]
)
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+dnl Check for additional platform-specific public API functions.
+AC_CHECK_FUNC([memalign],
+ [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+ public_syms="${public_syms} memalign"])
+AC_CHECK_FUNC([valloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+ public_syms="${public_syms} valloc"])
+
+dnl Check for allocator-related functions that should be wrapped.
+wrap_syms=
+if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ AC_CHECK_FUNC([__libc_calloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ])
+ wrap_syms="${wrap_syms} __libc_calloc"])
+ AC_CHECK_FUNC([__libc_free],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ])
+ wrap_syms="${wrap_syms} __libc_free"])
+ AC_CHECK_FUNC([__libc_malloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ])
+ wrap_syms="${wrap_syms} __libc_malloc"])
+ AC_CHECK_FUNC([__libc_memalign],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ])
+ wrap_syms="${wrap_syms} __libc_memalign"])
+ AC_CHECK_FUNC([__libc_realloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ])
+ wrap_syms="${wrap_syms} __libc_realloc"])
+ AC_CHECK_FUNC([__libc_valloc],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ])
+ wrap_syms="${wrap_syms} __libc_valloc"])
+ AC_CHECK_FUNC([__posix_memalign],
+ [AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ])
+ wrap_syms="${wrap_syms} __posix_memalign"])
+fi
+
+case "${host}" in
+ *-*-mingw* | *-*-cygwin*)
+ wrap_syms="${wrap_syms} tls_callback"
+ ;;
+ *)
+ ;;
+esac
+
dnl Mangle library-private APIs.
AC_ARG_WITH([private_namespace],
[AS_HELP_STRING([--with-private-namespace=<prefix>], [Prefix to prepend to all library-private APIs])],
@@ -528,58 +910,72 @@ AC_ARG_WITH([install_suffix],
install_suffix="$INSTALL_SUFFIX"
AC_SUBST([install_suffix])
+dnl Specify default malloc_conf.
+AC_ARG_WITH([malloc_conf],
+ [AS_HELP_STRING([--with-malloc-conf=<malloc_conf>], [config.malloc_conf options string])],
+ [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"],
+ [JEMALLOC_CONFIG_MALLOC_CONF=""]
+)
+config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+
dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
dnl jemalloc_protos_jet.h easy.
je_="je_"
AC_SUBST([je_])
-cfgoutputs_in="${srcroot}Makefile.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_macros.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/test.sh.in"
-cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/include/test/jemalloc_test.h.in"
+cfgoutputs_in="Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} jemalloc.pc.in"
+cfgoutputs_in="${cfgoutputs_in} doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_preamble.h.in"
+cfgoutputs_in="${cfgoutputs_in} test/test.sh.in"
+cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in"
cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} jemalloc.pc"
cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h"
cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h"
-cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_preamble.h"
cfgoutputs_out="${cfgoutputs_out} test/test.sh"
cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h"
cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} jemalloc.pc:jemalloc.pc.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in"
cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in"
-cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_preamble.h"
cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in"
cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in"
-cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/private_symbols.txt"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_namespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/public_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_rename.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc_mangle.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/jemalloc.sh"
-cfghdrs_in="${cfghdrs_in} ${srcroot}test/include/test/jemalloc_test_defs.h.in"
+cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh"
+cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
cfghdrs_out="include/jemalloc/jemalloc_defs.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols.awk"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols_jet.awk"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h"
cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h"
@@ -592,29 +988,13 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/jemalloc_internal_defs.h"
cfghdrs_out="${cfghdrs_out} test/include/test/jemalloc_test_defs.h"
cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:${srcroot}include/jemalloc/internal/jemalloc_internal_defs.h.in"
-cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:${srcroot}test/include/test/jemalloc_test_defs.h.in"
-
-dnl Do not silence irrelevant compiler warnings by default, since enabling this
-dnl option incurs a performance penalty.
-AC_ARG_ENABLE([cc-silence],
- [AS_HELP_STRING([--enable-cc-silence],
- [Silence irrelevant compiler warnings])],
-[if test "x$enable_cc_silence" = "xno" ; then
- enable_cc_silence="0"
-else
- enable_cc_silence="1"
-fi
-],
-[enable_cc_silence="0"]
-)
-if test "x$enable_cc_silence" = "x1" ; then
- AC_DEFINE([JEMALLOC_CC_SILENCE], [ ])
-fi
+cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
dnl Do not compile with debugging by default.
AC_ARG_ENABLE([debug],
- [AS_HELP_STRING([--enable-debug], [Build debugging code (implies --enable-ivsalloc)])],
+ [AS_HELP_STRING([--enable-debug],
+ [Build debugging code])],
[if test "x$enable_debug" = "xno" ; then
enable_debug="0"
else
@@ -625,39 +1005,24 @@ fi
)
if test "x$enable_debug" = "x1" ; then
AC_DEFINE([JEMALLOC_DEBUG], [ ])
- enable_ivsalloc="1"
fi
-AC_SUBST([enable_debug])
-
-dnl Do not validate pointers by default.
-AC_ARG_ENABLE([ivsalloc],
- [AS_HELP_STRING([--enable-ivsalloc], [Validate pointers passed through the public API])],
-[if test "x$enable_ivsalloc" = "xno" ; then
- enable_ivsalloc="0"
-else
- enable_ivsalloc="1"
-fi
-],
-[enable_ivsalloc="0"]
-)
-if test "x$enable_ivsalloc" = "x1" ; then
- AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
+if test "x$enable_debug" = "x1" ; then
+ AC_DEFINE([JEMALLOC_DEBUG], [ ])
fi
+AC_SUBST([enable_debug])
dnl Only optimize if not debugging.
-if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
- dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
- optimize="no"
- echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || optimize="yes"
- if test "x${optimize}" = "xyes" ; then
- if test "x$GCC" = "xyes" ; then
- JE_CFLAGS_APPEND([-O3])
- JE_CFLAGS_APPEND([-funroll-loops])
- elif test "x$je_cv_msvc" = "xyes" ; then
- JE_CFLAGS_APPEND([-O2])
- else
- JE_CFLAGS_APPEND([-O])
- fi
+if test "x$enable_debug" = "x0" ; then
+ if test "x$GCC" = "xyes" ; then
+ JE_CFLAGS_ADD([-O3])
+ JE_CXXFLAGS_ADD([-O3])
+ JE_CFLAGS_ADD([-funroll-loops])
+ elif test "x$je_cv_msvc" = "xyes" ; then
+ JE_CFLAGS_ADD([-O2])
+ JE_CXXFLAGS_ADD([-O2])
+ else
+ JE_CFLAGS_ADD([-O])
+ JE_CXXFLAGS_ADD([-O])
fi
fi
@@ -721,10 +1086,10 @@ fi,
if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
if test "x$LUNWIND" = "x-lunwind" ; then
- AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+ AC_CHECK_LIB([unwind], [unw_backtrace], [JE_APPEND_VS(LIBS, $LUNWIND)],
[enable_prof_libunwind="0"])
else
- LIBS="$LIBS $LUNWIND"
+ JE_APPEND_VS(LIBS, $LUNWIND)
fi
if test "x${enable_prof_libunwind}" = "x1" ; then
backtrace_method="libunwind"
@@ -746,7 +1111,9 @@ fi
if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
-a "x$GCC" = "xyes" ; then
AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
- AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+ if test "x${enable_prof_libgcc}" = "x1" ; then
+ AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [JE_APPEND_VS(LIBS, -lgcc)], [enable_prof_libgcc="0"])
+ fi
if test "x${enable_prof_libgcc}" = "x1" ; then
backtrace_method="libgcc"
AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
@@ -768,7 +1135,7 @@ fi
)
if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
-a "x$GCC" = "xyes" ; then
- JE_CFLAGS_APPEND([-fno-omit-frame-pointer])
+ JE_CFLAGS_ADD([-fno-omit-frame-pointer])
backtrace_method="gcc intrinsics"
AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
else
@@ -782,112 +1149,44 @@ fi
AC_MSG_CHECKING([configured backtracing method])
AC_MSG_RESULT([$backtrace_method])
if test "x$enable_prof" = "x1" ; then
- if test "x${force_tls}" = "x0" ; then
- AC_MSG_ERROR([Heap profiling requires TLS]);
- fi
- force_tls="1"
-
- if test "x$abi" != "xpecoff"; then
- dnl Heap profiling uses the log(3) function.
- LIBS="$LIBS -lm"
- fi
+ dnl Heap profiling uses the log(3) function.
+ JE_APPEND_VS(LIBS, $LM)
AC_DEFINE([JEMALLOC_PROF], [ ])
fi
AC_SUBST([enable_prof])
-dnl Enable thread-specific caching by default.
-AC_ARG_ENABLE([tcache],
- [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
-[if test "x$enable_tcache" = "xno" ; then
- enable_tcache="0"
-else
- enable_tcache="1"
-fi
-],
-[enable_tcache="1"]
-)
-if test "x$enable_tcache" = "x1" ; then
- AC_DEFINE([JEMALLOC_TCACHE], [ ])
+dnl Indicate whether adjacent virtual memory mappings automatically coalesce
+dnl (and fragment on demand).
+if test "x${maps_coalesce}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
fi
-AC_SUBST([enable_tcache])
-dnl Disable mremap() for huge realloc() by default.
-AC_ARG_ENABLE([mremap],
- [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])],
-[if test "x$enable_mremap" = "xno" ; then
- enable_mremap="0"
-else
- enable_mremap="1"
-fi
-],
-[enable_mremap="0"]
-)
-if test "x$enable_mremap" = "x1" ; then
- JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
-#define _GNU_SOURCE
-#include <sys/mman.h>
-], [
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
-], [je_cv_mremap_fixed])
- if test "x${je_cv_mremap_fixed}" = "xno" ; then
- enable_mremap="0"
- fi
-fi
-if test "x$enable_mremap" = "x1" ; then
- AC_DEFINE([JEMALLOC_MREMAP], [ ])
+dnl Indicate whether to retain memory (rather than using munmap()) by default.
+if test "x$default_retain" = "x1" ; then
+ AC_DEFINE([JEMALLOC_RETAIN], [ ])
fi
-AC_SUBST([enable_mremap])
-dnl Enable VM deallocation via munmap() by default.
-AC_ARG_ENABLE([munmap],
- [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])],
-[if test "x$enable_munmap" = "xno" ; then
- enable_munmap="0"
-else
- enable_munmap="1"
-fi
-],
-[enable_munmap="${default_munmap}"]
-)
-if test "x$enable_munmap" = "x1" ; then
- AC_DEFINE([JEMALLOC_MUNMAP], [ ])
-fi
-AC_SUBST([enable_munmap])
-
-dnl Do not enable allocation from DSS by default.
-AC_ARG_ENABLE([dss],
- [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])],
-[if test "x$enable_dss" = "xno" ; then
- enable_dss="0"
-else
- enable_dss="1"
-fi
-],
-[enable_dss="0"]
-)
+dnl Enable allocation from DSS if supported by the OS.
+have_dss="1"
dnl Check whether the BSD/SUSv1 sbrk() exists. If not, disable DSS support.
AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"])
if test "x$have_sbrk" = "x1" ; then
- if test "x$sbrk_deprecated" == "x1" ; then
+ if test "x$sbrk_deprecated" = "x1" ; then
AC_MSG_RESULT([Disabling dss allocation because sbrk is deprecated])
- enable_dss="0"
- else
- AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ])
+ have_dss="0"
fi
else
- enable_dss="0"
+ have_dss="0"
fi
-if test "x$enable_dss" = "x1" ; then
+if test "x$have_dss" = "x1" ; then
AC_DEFINE([JEMALLOC_DSS], [ ])
fi
-AC_SUBST([enable_dss])
dnl Support the junk/zero filling option by default.
AC_ARG_ENABLE([fill],
- [AS_HELP_STRING([--disable-fill],
- [Disable support for junk/zero filling, quarantine, and redzones])],
+ [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])],
[if test "x$enable_fill" = "xno" ; then
enable_fill="0"
else
@@ -929,35 +1228,6 @@ if test "x$enable_utrace" = "x1" ; then
fi
AC_SUBST([enable_utrace])
-dnl Support Valgrind by default.
-AC_ARG_ENABLE([valgrind],
- [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])],
-[if test "x$enable_valgrind" = "xno" ; then
- enable_valgrind="0"
-else
- enable_valgrind="1"
-fi
-],
-[enable_valgrind="1"]
-)
-if test "x$enable_valgrind" = "x1" ; then
- JE_COMPILABLE([valgrind], [
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-
-#if !defined(VALGRIND_RESIZEINPLACE_BLOCK)
-# error "Incompatible Valgrind version"
-#endif
-], [], [je_cv_valgrind])
- if test "x${je_cv_valgrind}" = "xno" ; then
- enable_valgrind="0"
- fi
- if test "x$enable_valgrind" = "x1" ; then
- AC_DEFINE([JEMALLOC_VALGRIND], [ ])
- fi
-fi
-AC_SUBST([enable_valgrind])
-
dnl Do not support the xmalloc option by default.
AC_ARG_ENABLE([xmalloc],
[AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
@@ -974,8 +1244,110 @@ if test "x$enable_xmalloc" = "x1" ; then
fi
AC_SUBST([enable_xmalloc])
-AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
- [je_cv_static_page_shift],
+dnl Support cache-oblivious allocation alignment by default.
+AC_ARG_ENABLE([cache-oblivious],
+ [AS_HELP_STRING([--disable-cache-oblivious],
+ [Disable support for cache-oblivious allocation alignment])],
+[if test "x$enable_cache_oblivious" = "xno" ; then
+ enable_cache_oblivious="0"
+else
+ enable_cache_oblivious="1"
+fi
+],
+[enable_cache_oblivious="1"]
+)
+if test "x$enable_cache_oblivious" = "x1" ; then
+ AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
+fi
+AC_SUBST([enable_cache_oblivious])
+
+dnl Do not log by default.
+AC_ARG_ENABLE([log],
+ [AS_HELP_STRING([--enable-log], [Support debug logging])],
+[if test "x$enable_log" = "xno" ; then
+ enable_log="0"
+else
+ enable_log="1"
+fi
+],
+[enable_log="0"]
+)
+if test "x$enable_log" = "x1" ; then
+ AC_DEFINE([JEMALLOC_LOG], [ ])
+fi
+AC_SUBST([enable_log])
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+ __builtin_unreachable();
+}
+], [
+ {
+ foo();
+ }
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+ AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
+dnl ============================================================================
+dnl Check for __builtin_ffsl(), then ffsl(3), and fail if neither are found.
+dnl One of those two functions should (theoretically) exist on all platforms
+dnl that jemalloc currently has a chance of functioning on without modification.
+dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if
+dnl ffsl() or __builtin_ffsl() are defined, respectively.
+JE_COMPILABLE([a program using __builtin_ffsl], [
+#include <stdio.h>
+#include <strings.h>
+#include <string.h>
+], [
+ {
+ int rv = __builtin_ffsl(0x08);
+ printf("%d\n", rv);
+ }
+], [je_cv_gcc_builtin_ffsl])
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
+else
+ JE_COMPILABLE([a program using ffsl], [
+ #include <stdio.h>
+ #include <strings.h>
+ #include <string.h>
+ ], [
+ {
+ int rv = ffsl(0x08);
+ printf("%d\n", rv);
+ }
+ ], [je_cv_function_ffsl])
+ if test "x${je_cv_function_ffsl}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
+ else
+ AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
+ fi
+fi
+
+AC_ARG_WITH([lg_quantum],
+ [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
+ [Base 2 log of minimum allocation alignment])],
+ [LG_QUANTA="$with_lg_quantum"],
+ [LG_QUANTA="3 4"])
+if test "x$with_lg_quantum" != "x" ; then
+ AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
+fi
+
+AC_ARG_WITH([lg_page],
+ [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
+ [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+if test "x$LG_PAGE" = "xdetect"; then
+ AC_CACHE_CHECK([LG_PAGE],
+ [je_cv_lg_page],
AC_RUN_IFELSE([AC_LANG_PROGRAM(
[[
#include <strings.h>
@@ -1000,35 +1372,117 @@ AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
if (result == -1) {
return 1;
}
- result = ffsl(result) - 1;
+ result = JEMALLOC_INTERNAL_FFSL(result) - 1;
f = fopen("conftest.out", "w");
if (f == NULL) {
return 1;
}
- fprintf(f, "%d\n", result);
+ fprintf(f, "%d", result);
fclose(f);
return 0;
]])],
- [je_cv_static_page_shift=`cat conftest.out`],
- [je_cv_static_page_shift=undefined]))
-
-if test "x$je_cv_static_page_shift" != "xundefined"; then
- AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift])
+ [je_cv_lg_page=`cat conftest.out`],
+ [je_cv_lg_page=undefined],
+ [je_cv_lg_page=12]))
+fi
+if test "x${je_cv_lg_page}" != "x" ; then
+ LG_PAGE="${je_cv_lg_page}"
+fi
+if test "x${LG_PAGE}" != "xundefined" ; then
+ AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
else
- AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT])
+ AC_MSG_ERROR([cannot determine value for LG_PAGE])
+fi
+
+AC_ARG_WITH([lg_hugepage],
+ [AS_HELP_STRING([--with-lg-hugepage=<lg-hugepage>],
+ [Base 2 log of system huge page size])],
+ [je_cv_lg_hugepage="${with_lg_hugepage}"],
+ [je_cv_lg_hugepage=""])
+if test "x${je_cv_lg_hugepage}" = "x" ; then
+ dnl Look in /proc/meminfo (Linux-specific) for information on the default huge
+ dnl page size, if any. The relevant line looks like:
+ dnl
+ dnl Hugepagesize: 2048 kB
+ if test -e "/proc/meminfo" ; then
+ hpsk=[`cat /proc/meminfo 2>/dev/null | \
+ grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+ awk '{print $2}'`]
+ if test "x${hpsk}" != "x" ; then
+ je_cv_lg_hugepage=10
+ while test "${hpsk}" -gt 1 ; do
+ hpsk="$((hpsk / 2))"
+ je_cv_lg_hugepage="$((je_cv_lg_hugepage + 1))"
+ done
+ fi
+ fi
+
+ dnl Set default if unable to automatically configure.
+ if test "x${je_cv_lg_hugepage}" = "x" ; then
+ je_cv_lg_hugepage=21
+ fi
+fi
+if test "x${LG_PAGE}" != "xundefined" -a \
+ "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
+ AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
fi
+AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
+
+AC_ARG_WITH([lg_page_sizes],
+ [AS_HELP_STRING([--with-lg-page-sizes=<lg-page-sizes>],
+ [Base 2 logs of system page sizes to support])],
+ [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"])
dnl ============================================================================
dnl jemalloc configuration.
-dnl
-
-dnl Set VERSION if source directory has an embedded git repository.
-if test -d "${srcroot}.git" ; then
- git describe --long --abbrev=40 > ${srcroot}VERSION
+dnl
+
+AC_ARG_WITH([version],
+ [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+ [Version string])],
+ [
+ echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+ if test $? -eq 0 ; then
+ echo "$with_version" > "${objroot}VERSION"
+ else
+ echo "${with_version}" | grep ['^VERSION$'] 2>&1 1>/dev/null
+ if test $? -ne 0 ; then
+ AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid> or VERSION])
+ fi
+ fi
+ ], [
+ dnl Set VERSION if source directory is inside a git repository.
+ if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+ dnl Pattern globs aren't powerful enough to match both single- and
+ dnl double-digit version numbers, so iterate over patterns to support up
+ dnl to version 99.99.99 without any accidental matches.
+ for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+ '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9]' \
+ '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+ (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+ if test $? -eq 0 ; then
+ mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+ break
+ fi
+ done
+ fi
+ rm -f "${objroot}VERSION.tmp"
+ ])
+
+if test ! -e "${objroot}VERSION" ; then
+ if test ! -e "${srcroot}VERSION" ; then
+ AC_MSG_RESULT(
+ [Missing VERSION file, and unable to generate it; creating bogus VERSION])
+ echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+ else
+ cp ${srcroot}VERSION ${objroot}VERSION
+ fi
fi
-jemalloc_version=`cat ${srcroot}VERSION`
+jemalloc_version=`cat "${objroot}VERSION"`
jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
@@ -1045,15 +1499,164 @@ dnl ============================================================================
dnl Configure pthreads.
if test "x$abi" != "xpecoff" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
dnl Some systems may embed pthreads functionality in libc; check for libpthread
dnl first, but try libc too before failing.
- AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+ AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
[AC_SEARCH_LIBS([pthread_create], , ,
AC_MSG_ERROR([libpthread is missing]))])
+ wrap_syms="${wrap_syms} pthread_create"
+ have_pthread="1"
+ dnl Check if we have dlsym support.
+ have_dlsym="1"
+ AC_CHECK_HEADERS([dlfcn.h],
+ AC_CHECK_FUNC([dlsym], [],
+ [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
+ [have_dlsym="0"])
+ if test "x$have_dlsym" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+ fi
+ JE_COMPILABLE([pthread_atfork(3)], [
+#include <pthread.h>
+], [
+ pthread_atfork((void *)0, (void *)0, (void *)0);
+], [je_cv_pthread_atfork])
+ if test "x${je_cv_pthread_atfork}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+ fi
+ dnl Check if pthread_setname_np is available with the expected API.
+ JE_COMPILABLE([pthread_setname_np(3)], [
+#include <pthread.h>
+], [
+ pthread_setname_np(pthread_self(), "setname_test");
+], [je_cv_pthread_setname_np])
+ if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+ fi
+fi
+
+JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
+
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
+
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+ if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+ JE_CFLAGS_SAVE()
+
+ unset ac_cv_search_clock_gettime
+ JE_CFLAGS_ADD([-dynamic])
+ AC_SEARCH_LIBS([clock_gettime], [rt])
+
+ JE_CFLAGS_RESTORE()
+ fi
fi
-CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
+#include <time.h>
+], [
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+# error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+ mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
+dnl Use syscall(2) (if available) by default.
+AC_ARG_ENABLE([syscall],
+ [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
+[if test "x$enable_syscall" = "xno" ; then
+ enable_syscall="0"
+else
+ enable_syscall="1"
+fi
+],
+[enable_syscall="1"]
+)
+if test "x$enable_syscall" = "x1" ; then
+ dnl Check if syscall(2) is usable. Treat warnings as errors, so that e.g. OS
+ dnl X 10.12's deprecation warning prevents use.
+ JE_CFLAGS_SAVE()
+ JE_CFLAGS_ADD([-Werror])
+ JE_COMPILABLE([syscall(2)], [
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+ syscall(SYS_write, 2, "hello", 5);
+],
+ [je_cv_syscall])
+ JE_CFLAGS_RESTORE()
+ if test "x$je_cv_syscall" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+ fi
+fi
+
+dnl Check if the GNU-specific secure_getenv function exists.
+AC_CHECK_FUNC([secure_getenv],
+ [have_secure_getenv="1"],
+ [have_secure_getenv="0"]
+ )
+if test "x$have_secure_getenv" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
+fi
+
+dnl Check if the GNU-specific sched_getcpu function exists.
+AC_CHECK_FUNC([sched_getcpu],
+ [have_sched_getcpu="1"],
+ [have_sched_getcpu="0"]
+ )
+if test "x$have_sched_getcpu" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
+fi
+
+dnl Check if the GNU-specific sched_setaffinity function exists.
+AC_CHECK_FUNC([sched_setaffinity],
+ [have_sched_setaffinity="1"],
+ [have_sched_setaffinity="0"]
+ )
+if test "x$have_sched_setaffinity" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
+fi
+
+dnl Check if the Solaris/BSD issetugid function exists.
+AC_CHECK_FUNC([issetugid],
+ [have_issetugid="1"],
+ [have_issetugid="0"]
+ )
+if test "x$have_issetugid" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ])
+fi
dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use
dnl it rather than pthreads TSD cleanup functions to support cleanup during
@@ -1065,6 +1668,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
)
if test "x$have__malloc_thread_cleanup" = "x1" ; then
AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
+ wrap_syms="${wrap_syms} _malloc_thread_cleanup"
force_tls="1"
fi
@@ -1077,6 +1681,7 @@ AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
)
if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+ wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
fi
dnl Disable lazy locking by default.
@@ -1089,42 +1694,37 @@ else
enable_lazy_lock="1"
fi
],
-[enable_lazy_lock="0"]
+[enable_lazy_lock=""]
)
-if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
- AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
- enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+ if test "x${force_lazy_lock}" = "x1" ; then
+ AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+ enable_lazy_lock="1"
+ else
+ enable_lazy_lock="0"
+ fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+ AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+ enable_lazy_lock="0"
fi
if test "x$enable_lazy_lock" = "x1" ; then
- if test "x$abi" != "xpecoff" ; then
- AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
- AC_CHECK_FUNC([dlsym], [],
- [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
- [AC_MSG_ERROR([libdl is missing])])
- ])
+ if test "x$have_dlsym" = "x1" ; then
+ AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+ else
+ AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
fi
- AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
fi
AC_SUBST([enable_lazy_lock])
-AC_ARG_ENABLE([tls],
- [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
-if test "x$enable_tls" = "xno" ; then
+dnl Automatically configure TLS.
+if test "x${force_tls}" = "x1" ; then
+ enable_tls="1"
+elif test "x${force_tls}" = "x0" ; then
enable_tls="0"
else
enable_tls="1"
fi
-,
-enable_tls="1"
-)
-if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
- AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
- enable_tls="1"
-fi
-if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
- AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
- enable_tls="0"
-fi
if test "x${enable_tls}" = "x1" ; then
AC_MSG_CHECKING([for TLS])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
@@ -1138,57 +1738,68 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
AC_MSG_RESULT([yes]),
AC_MSG_RESULT([no])
enable_tls="0")
+else
+ enable_tls="0"
fi
AC_SUBST([enable_tls])
if test "x${enable_tls}" = "x1" ; then
AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
-elif test "x${force_tls}" = "x1" ; then
- AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function])
fi
dnl ============================================================================
-dnl Check for ffsl(3), and fail if not found. This function exists on all
-dnl platforms that jemalloc currently has a chance of functioning on without
-dnl modification.
-JE_COMPILABLE([a program using ffsl], [
-#include <stdio.h>
-#include <strings.h>
-#include <string.h>
+dnl Check for C11 atomics.
+
+JE_COMPILABLE([C11 atomics], [
+#include <stdint.h>
+#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+#include <stdatomic.h>
+#else
+#error Atomics not available
+#endif
], [
- {
- int rv = ffsl(0x08);
- printf("%d\n", rv);
- }
-], [je_cv_function_ffsl])
-if test "x${je_cv_function_ffsl}" != "xyes" ; then
- AC_MSG_ERROR([Cannot build without ffsl(3)])
+ uint64_t *p = (uint64_t *)0;
+ uint64_t x = 1;
+ volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
+ uint64_t r = atomic_fetch_add(a, x) + x;
+ return r == 0;
+], [je_cv_c11_atomics])
+if test "x${je_cv_c11_atomics}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_C11_ATOMICS])
fi
dnl ============================================================================
-dnl Check for atomic(9) operations as provided on FreeBSD.
+dnl Check for GCC-style __atomic atomics.
-JE_COMPILABLE([atomic(9)], [
-#include <sys/types.h>
-#include <machine/atomic.h>
-#include <inttypes.h>
+JE_COMPILABLE([GCC __atomic atomics], [
], [
- {
- uint32_t x32 = 0;
- volatile uint32_t *x32p = &x32;
- atomic_fetchadd_32(x32p, 1);
- }
- {
- unsigned long xlong = 0;
- volatile unsigned long *xlongp = &xlong;
- atomic_fetchadd_long(xlongp, 1);
- }
-], [je_cv_atomic9])
-if test "x${je_cv_atomic9}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_ATOMIC9])
+ int x = 0;
+ int val = 1;
+ int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED);
+ int after_add = x;
+ return after_add == 1;
+], [je_cv_gcc_atomic_atomics])
+if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+fi
+
+dnl ============================================================================
+dnl Check for GCC-style __sync atomics.
+
+JE_COMPILABLE([GCC __sync atomics], [
+], [
+ int x = 0;
+ int before_add = __sync_fetch_and_add(&x, 1);
+ int after_add = x;
+ return (before_add == 0) && (after_add == 1);
+], [je_cv_gcc_sync_atomics])
+if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
fi
dnl ============================================================================
dnl Check for atomic(3) operations as provided on Darwin.
+dnl We need this not for the atomic operations (which are provided above), but
+dnl rather for the OSSpinLock type it exposes.
JE_COMPILABLE([Darwin OSAtomic*()], [
#include <libkern/OSAtomic.h>
@@ -1210,6 +1821,75 @@ if test "x${je_cv_osatomic}" = "xyes" ; then
fi
dnl ============================================================================
+dnl Check for madvise(2).
+
+JE_COMPILABLE([madvise(2)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, 0);
+], [je_cv_madvise])
+if test "x${je_cv_madvise}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+
+ dnl Check for madvise(..., MADV_FREE).
+ JE_COMPILABLE([madvise(..., MADV_FREE)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, MADV_FREE);
+], [je_cv_madv_free])
+ if test "x${je_cv_madv_free}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ elif test "x${je_cv_madvise}" = "xyes" ; then
+ case "${host_cpu}" in i686|x86_64)
+ case "${host}" in *-*-linux*)
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+ ;;
+ esac
+ ;;
+ esac
+ fi
+
+ dnl Check for madvise(..., MADV_DONTNEED).
+ JE_COMPILABLE([madvise(..., MADV_DONTNEED)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, MADV_DONTNEED);
+], [je_cv_madv_dontneed])
+ if test "x${je_cv_madv_dontneed}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+ fi
+
+ dnl Check for madvise(..., MADV_DO[NT]DUMP).
+ JE_COMPILABLE([madvise(..., MADV_DO[[NT]]DUMP)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, MADV_DONTDUMP);
+ madvise((void *)0, 0, MADV_DODUMP);
+], [je_cv_madv_dontdump])
+ if test "x${je_cv_madv_dontdump}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+ fi
+
+ dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
+ JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, MADV_HUGEPAGE);
+ madvise((void *)0, 0, MADV_NOHUGEPAGE);
+], [je_cv_thp])
+case "${host_cpu}" in
+ arm*)
+ ;;
+ *)
+ if test "x${je_cv_thp}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+ fi
+ ;;
+esac
+fi
+
+dnl ============================================================================
dnl Check whether __sync_{add,sub}_and_fetch() are available despite
dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@@ -1244,6 +1924,48 @@ if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
fi
dnl ============================================================================
+dnl Check for __builtin_clz() and __builtin_clzl().
+
+AC_CACHE_CHECK([for __builtin_clz],
+ [je_cv_builtin_clz],
+ [AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+ [
+ {
+ unsigned x = 0;
+ int y = __builtin_clz(x);
+ }
+ {
+ unsigned long x = 0;
+ int y = __builtin_clzl(x);
+ }
+ ])],
+ [je_cv_builtin_clz=yes],
+ [je_cv_builtin_clz=no])])
+
+if test "x${je_cv_builtin_clz}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+fi
+
+dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+#include <AvailabilityMacros.h>
+], [
+ #if MAC_OS_X_VERSION_MIN_REQUIRED < 101200
+ #error "os_unfair_lock is not supported"
+ #else
+ os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+ os_unfair_lock_lock(&lock);
+ os_unfair_lock_unlock(&lock);
+ #endif
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
+dnl ============================================================================
dnl Check for spinlock(3) operations as provided on Darwin.
JE_COMPILABLE([Darwin OSSpin*()], [
@@ -1281,39 +2003,107 @@ if test "x${enable_zone_allocator}" = "x1" ; then
if test "x${abi}" != "xmacho"; then
AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
fi
- AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
AC_DEFINE([JEMALLOC_ZONE], [ ])
+fi
- dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
- dnl releases. malloc_zone_t and malloc_introspection_t have new fields in
- dnl 10.6, which is the only source-level indication of the change.
- AC_MSG_CHECKING([malloc zone version])
- AC_DEFUN([JE_ZONE_PROGRAM],
- [AC_LANG_PROGRAM(
- [#include <malloc/malloc.h>],
- [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
- )])
-
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=]
- )])],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[
- AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=]
- )])])])])
- if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
- AC_MSG_RESULT([unsupported])
- AC_MSG_ERROR([Unsupported malloc zone version])
+dnl ============================================================================
+dnl Use initial-exec TLS by default.
+AC_ARG_ENABLE([initial-exec-tls],
+ [AS_HELP_STRING([--disable-initial-exec-tls],
+ [Disable the initial-exec tls model])],
+[if test "x$enable_initial_exec_tls" = "xno" ; then
+ enable_initial_exec_tls="0"
+else
+ enable_initial_exec_tls="1"
+fi
+],
+[enable_initial_exec_tls="1"]
+)
+AC_SUBST([enable_initial_exec_tls])
+
+if test "x${je_cv_tls_model}" = "xyes" -a \
+ "x${enable_initial_exec_tls}" = "x1" ; then
+ AC_DEFINE([JEMALLOC_TLS_MODEL],
+ [__attribute__((tls_model("initial-exec")))])
+else
+ AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+fi
+
+dnl ============================================================================
+dnl Enable background threads if possible.
+
+if test "x${have_pthread}" = "x1" -a "x${have_dlsym}" = "x1" \
+ -a "x${je_cv_os_unfair_lock}" != "xyes" \
+ -a "x${je_cv_osspin}" != "xyes" ; then
+ AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
+fi
+
+dnl ============================================================================
+dnl Check for glibc malloc hooks
+
+JE_COMPILABLE([glibc malloc hook], [
+#include <stddef.h>
+
+extern void (* __free_hook)(void *ptr);
+extern void *(* __malloc_hook)(size_t size);
+extern void *(* __realloc_hook)(void *ptr, size_t size);
+], [
+ void *ptr = 0L;
+ if (__malloc_hook) ptr = __malloc_hook(1);
+ if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+ if (__free_hook && ptr) __free_hook(ptr);
+], [je_cv_glibc_malloc_hook])
+if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+ wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
fi
- if test "${JEMALLOC_ZONE_VERSION}" = 9; then
- JEMALLOC_ZONE_VERSION=8
- AC_MSG_RESULT([> 8])
- else
- AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION])
+fi
+
+JE_COMPILABLE([glibc memalign hook], [
+#include <stddef.h>
+
+extern void *(* __memalign_hook)(size_t alignment, size_t size);
+], [
+ void *ptr = 0L;
+ if (__memalign_hook) ptr = __memalign_hook(16, 7);
+], [je_cv_glibc_memalign_hook])
+if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+ wrap_syms="${wrap_syms} __memalign_hook"
fi
- AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION])
+fi
+
+JE_COMPILABLE([pthreads adaptive mutexes], [
+#include <pthread.h>
+], [
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+ pthread_mutexattr_destroy(&attr);
+], [je_cv_pthread_mutex_adaptive_np])
+if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
+fi
+
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-D_GNU_SOURCE])
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([strerror_r returns char with gnu source], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [
+ char *buffer = (char *) malloc(100);
+ char *error = strerror_r(EINVAL, buffer, 100);
+ printf("%s\n", error);
+], [je_cv_strerror_r_returns_char_with_gnu_source])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
fi
dnl ============================================================================
@@ -1323,20 +2113,6 @@ AC_HEADER_STDBOOL
dnl ============================================================================
dnl Define commands that generate output files.
-AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [
- mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h"
-], [
- srcdir="${srcdir}"
- objroot="${objroot}"
-])
-AC_CONFIG_COMMANDS([include/jemalloc/internal/private_unnamespace.h], [
- mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/private_unnamespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_unnamespace.h"
-], [
- srcdir="${srcdir}"
- objroot="${objroot}"
-])
AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [
f="${objroot}include/jemalloc/internal/public_symbols.txt"
mkdir -p "${objroot}include/jemalloc/internal"
@@ -1360,6 +2136,31 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [
public_syms="${public_syms}"
JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
])
+AC_CONFIG_COMMANDS([include/jemalloc/internal/private_symbols.awk], [
+ f="${objroot}include/jemalloc/internal/private_symbols.awk"
+ mkdir -p "${objroot}include/jemalloc/internal"
+ export_syms=`for sym in ${public_syms}; do echo "${JEMALLOC_PREFIX}${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+ "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols.awk"
+], [
+ srcdir="${srcdir}"
+ objroot="${objroot}"
+ public_syms="${public_syms}"
+ wrap_syms="${wrap_syms}"
+ SYM_PREFIX="${SYM_PREFIX}"
+ JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
+])
+AC_CONFIG_COMMANDS([include/jemalloc/internal/private_symbols_jet.awk], [
+ f="${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+ mkdir -p "${objroot}include/jemalloc/internal"
+ export_syms=`for sym in ${public_syms}; do echo "jet_${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+ "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+], [
+ srcdir="${srcdir}"
+ objroot="${objroot}"
+ public_syms="${public_syms}"
+ wrap_syms="${wrap_syms}"
+ SYM_PREFIX="${SYM_PREFIX}"
+])
AC_CONFIG_COMMANDS([include/jemalloc/internal/public_namespace.h], [
mkdir -p "${objroot}include/jemalloc/internal"
"${srcdir}/include/jemalloc/internal/public_namespace.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h"
@@ -1376,10 +2177,13 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
])
AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
mkdir -p "${objroot}include/jemalloc/internal"
- "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+ "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" 3 "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
], [
+ SHELL="${SHELL}"
srcdir="${srcdir}"
objroot="${objroot}"
+ LG_QUANTA="${LG_QUANTA}"
+ LG_PAGE_SIZES="${LG_PAGE_SIZES}"
])
AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
mkdir -p "${objroot}include/jemalloc"
@@ -1426,7 +2230,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup])
dnl ============================================================================
dnl Generate outputs.
-AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh])
+AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc-config bin/jemalloc.sh bin/jeprof])
AC_SUBST([cfgoutputs_in])
AC_SUBST([cfgoutputs_out])
AC_OUTPUT
@@ -1437,11 +2241,19 @@ AC_MSG_RESULT([=================================================================
AC_MSG_RESULT([jemalloc version : ${jemalloc_version}])
AC_MSG_RESULT([library revision : ${rev}])
AC_MSG_RESULT([])
+AC_MSG_RESULT([CONFIG : ${CONFIG}])
AC_MSG_RESULT([CC : ${CC}])
+AC_MSG_RESULT([CONFIGURE_CFLAGS : ${CONFIGURE_CFLAGS}])
+AC_MSG_RESULT([SPECIFIED_CFLAGS : ${SPECIFIED_CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS : ${EXTRA_CFLAGS}])
AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}])
-AC_MSG_RESULT([CFLAGS : ${CFLAGS}])
+AC_MSG_RESULT([CXX : ${CXX}])
+AC_MSG_RESULT([CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}])
+AC_MSG_RESULT([SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}])
+AC_MSG_RESULT([EXTRA_CXXFLAGS : ${EXTRA_CXXFLAGS}])
AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}])
AC_MSG_RESULT([EXTRA_LDFLAGS : ${EXTRA_LDFLAGS}])
+AC_MSG_RESULT([DSO_LDFLAGS : ${DSO_LDFLAGS}])
AC_MSG_RESULT([LIBS : ${LIBS}])
AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}])
AC_MSG_RESULT([])
@@ -1450,9 +2262,9 @@ AC_MSG_RESULT([XSLROOT : ${XSLROOT}])
AC_MSG_RESULT([])
AC_MSG_RESULT([PREFIX : ${PREFIX}])
AC_MSG_RESULT([BINDIR : ${BINDIR}])
+AC_MSG_RESULT([DATADIR : ${DATADIR}])
AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}])
AC_MSG_RESULT([LIBDIR : ${LIBDIR}])
-AC_MSG_RESULT([DATADIR : ${DATADIR}])
AC_MSG_RESULT([MANDIR : ${MANDIR}])
AC_MSG_RESULT([])
AC_MSG_RESULT([srcroot : ${srcroot}])
@@ -1464,24 +2276,19 @@ AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}])
AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
AC_MSG_RESULT([ : ${JEMALLOC_PRIVATE_NAMESPACE}])
AC_MSG_RESULT([install_suffix : ${install_suffix}])
+AC_MSG_RESULT([malloc_conf : ${config_malloc_conf}])
AC_MSG_RESULT([autogen : ${enable_autogen}])
-AC_MSG_RESULT([experimental : ${enable_experimental}])
-AC_MSG_RESULT([cc-silence : ${enable_cc_silence}])
AC_MSG_RESULT([debug : ${enable_debug}])
-AC_MSG_RESULT([code-coverage : ${enable_code_coverage}])
AC_MSG_RESULT([stats : ${enable_stats}])
AC_MSG_RESULT([prof : ${enable_prof}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}])
-AC_MSG_RESULT([tcache : ${enable_tcache}])
AC_MSG_RESULT([fill : ${enable_fill}])
AC_MSG_RESULT([utrace : ${enable_utrace}])
-AC_MSG_RESULT([valgrind : ${enable_valgrind}])
AC_MSG_RESULT([xmalloc : ${enable_xmalloc}])
-AC_MSG_RESULT([mremap : ${enable_mremap}])
-AC_MSG_RESULT([munmap : ${enable_munmap}])
-AC_MSG_RESULT([dss : ${enable_dss}])
+AC_MSG_RESULT([log : ${enable_log}])
AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}])
-AC_MSG_RESULT([tls : ${enable_tls}])
+AC_MSG_RESULT([cache-oblivious : ${enable_cache_oblivious}])
+AC_MSG_RESULT([cxx : ${enable_cxx}])
AC_MSG_RESULT([===============================================================================])
diff --git a/deps/jemalloc/coverage.sh b/deps/jemalloc/coverage.sh
deleted file mode 100755
index 6d1362a8c..000000000
--- a/deps/jemalloc/coverage.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-
-set -e
-
-objdir=$1
-suffix=$2
-shift 2
-objs=$@
-
-gcov -b -p -f -o "${objdir}" ${objs}
-
-# Move gcov outputs so that subsequent gcov invocations won't clobber results
-# for the same sources with different compilation flags.
-for f in `find . -maxdepth 1 -type f -name '*.gcov'` ; do
- mv "${f}" "${f}.${suffix}"
-done
diff --git a/deps/jemalloc/doc/html.xsl.in b/deps/jemalloc/doc/html.xsl.in
index a91d9746f..ec4fa6552 100644
--- a/deps/jemalloc/doc/html.xsl.in
+++ b/deps/jemalloc/doc/html.xsl.in
@@ -1,4 +1,5 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:import href="@XSLROOT@/html/docbook.xsl"/>
<xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+ <xsl:output method="xml" encoding="utf-8"/>
</xsl:stylesheet>
diff --git a/deps/jemalloc/doc/jemalloc.3 b/deps/jemalloc/doc/jemalloc.3
deleted file mode 100644
index d04fbb498..000000000
--- a/deps/jemalloc/doc/jemalloc.3
+++ /dev/null
@@ -1,1630 +0,0 @@
-'\" t
-.\" Title: JEMALLOC
-.\" Author: Jason Evans
-.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
-.\" Date: 03/31/2014
-.\" Manual: User Manual
-.\" Source: jemalloc 3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340
-.\" Language: English
-.\"
-.TH "JEMALLOC" "3" "03/31/2014" "jemalloc 3.6.0-0-g46c0af68bd24" "User Manual"
-.\" -----------------------------------------------------------------
-.\" * Define some portability stuff
-.\" -----------------------------------------------------------------
-.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.\" http://bugs.debian.org/507673
-.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
-.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.ie \n(.g .ds Aq \(aq
-.el .ds Aq '
-.\" -----------------------------------------------------------------
-.\" * set default formatting
-.\" -----------------------------------------------------------------
-.\" disable hyphenation
-.nh
-.\" disable justification (adjust text to left margin only)
-.ad l
-.\" -----------------------------------------------------------------
-.\" * MAIN CONTENT STARTS HERE *
-.\" -----------------------------------------------------------------
-.SH "NAME"
-jemalloc \- general purpose memory allocation functions
-.SH "LIBRARY"
-.PP
-This manual describes jemalloc 3\&.6\&.0\-0\-g46c0af68bd248b04df75e4f92d5fb804c3d75340\&. More information can be found at the
-\m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
-.SH "SYNOPSIS"
-.sp
-.ft B
-.nf
-#include <stdlib\&.h>
-#include <jemalloc/jemalloc\&.h>
-.fi
-.ft
-.SS "Standard API"
-.HP \w'void\ *malloc('u
-.BI "void *malloc(size_t\ " "size" ");"
-.HP \w'void\ *calloc('u
-.BI "void *calloc(size_t\ " "number" ", size_t\ " "size" ");"
-.HP \w'int\ posix_memalign('u
-.BI "int posix_memalign(void\ **" "ptr" ", size_t\ " "alignment" ", size_t\ " "size" ");"
-.HP \w'void\ *aligned_alloc('u
-.BI "void *aligned_alloc(size_t\ " "alignment" ", size_t\ " "size" ");"
-.HP \w'void\ *realloc('u
-.BI "void *realloc(void\ *" "ptr" ", size_t\ " "size" ");"
-.HP \w'void\ free('u
-.BI "void free(void\ *" "ptr" ");"
-.SS "Non\-standard API"
-.HP \w'void\ *mallocx('u
-.BI "void *mallocx(size_t\ " "size" ", int\ " "flags" ");"
-.HP \w'void\ *rallocx('u
-.BI "void *rallocx(void\ *" "ptr" ", size_t\ " "size" ", int\ " "flags" ");"
-.HP \w'size_t\ xallocx('u
-.BI "size_t xallocx(void\ *" "ptr" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");"
-.HP \w'size_t\ sallocx('u
-.BI "size_t sallocx(void\ *" "ptr" ", int\ " "flags" ");"
-.HP \w'void\ dallocx('u
-.BI "void dallocx(void\ *" "ptr" ", int\ " "flags" ");"
-.HP \w'size_t\ nallocx('u
-.BI "size_t nallocx(size_t\ " "size" ", int\ " "flags" ");"
-.HP \w'int\ mallctl('u
-.BI "int mallctl(const\ char\ *" "name" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");"
-.HP \w'int\ mallctlnametomib('u
-.BI "int mallctlnametomib(const\ char\ *" "name" ", size_t\ *" "mibp" ", size_t\ *" "miblenp" ");"
-.HP \w'int\ mallctlbymib('u
-.BI "int mallctlbymib(const\ size_t\ *" "mib" ", size_t\ " "miblen" ", void\ *" "oldp" ", size_t\ *" "oldlenp" ", void\ *" "newp" ", size_t\ " "newlen" ");"
-.HP \w'void\ malloc_stats_print('u
-.BI "void malloc_stats_print(void\ " "(*write_cb)" "\ (void\ *,\ const\ char\ *), void\ *" "cbopaque" ", const\ char\ *" "opts" ");"
-.HP \w'size_t\ malloc_usable_size('u
-.BI "size_t malloc_usable_size(const\ void\ *" "ptr" ");"
-.HP \w'void\ (*malloc_message)('u
-.BI "void (*malloc_message)(void\ *" "cbopaque" ", const\ char\ *" "s" ");"
-.PP
-const char *\fImalloc_conf\fR;
-.SS "Experimental API"
-.HP \w'int\ allocm('u
-.BI "int allocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
-.HP \w'int\ rallocm('u
-.BI "int rallocm(void\ **" "ptr" ", size_t\ *" "rsize" ", size_t\ " "size" ", size_t\ " "extra" ", int\ " "flags" ");"
-.HP \w'int\ sallocm('u
-.BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");"
-.HP \w'int\ dallocm('u
-.BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");"
-.HP \w'int\ nallocm('u
-.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
-.SH "DESCRIPTION"
-.SS "Standard API"
-.PP
-The
-\fBmalloc\fR\fB\fR
-function allocates
-\fIsize\fR
-bytes of uninitialized memory\&. The allocated space is suitably aligned (after possible pointer coercion) for storage of any type of object\&.
-.PP
-The
-\fBcalloc\fR\fB\fR
-function allocates space for
-\fInumber\fR
-objects, each
-\fIsize\fR
-bytes in length\&. The result is identical to calling
-\fBmalloc\fR\fB\fR
-with an argument of
-\fInumber\fR
-*
-\fIsize\fR, with the exception that the allocated memory is explicitly initialized to zero bytes\&.
-.PP
-The
-\fBposix_memalign\fR\fB\fR
-function allocates
-\fIsize\fR
-bytes of memory such that the allocation\*(Aqs base address is an even multiple of
-\fIalignment\fR, and returns the allocation in the value pointed to by
-\fIptr\fR\&. The requested
-\fIalignment\fR
-must be a power of 2 at least as large as
-sizeof(\fBvoid *\fR)\&.
-.PP
-The
-\fBaligned_alloc\fR\fB\fR
-function allocates
-\fIsize\fR
-bytes of memory such that the allocation\*(Aqs base address is an even multiple of
-\fIalignment\fR\&. The requested
-\fIalignment\fR
-must be a power of 2\&. Behavior is undefined if
-\fIsize\fR
-is not an integral multiple of
-\fIalignment\fR\&.
-.PP
-The
-\fBrealloc\fR\fB\fR
-function changes the size of the previously allocated memory referenced by
-\fIptr\fR
-to
-\fIsize\fR
-bytes\&. The contents of the memory are unchanged up to the lesser of the new and old sizes\&. If the new size is larger, the contents of the newly allocated portion of the memory are undefined\&. Upon success, the memory referenced by
-\fIptr\fR
-is freed and a pointer to the newly allocated memory is returned\&. Note that
-\fBrealloc\fR\fB\fR
-may move the memory allocation, resulting in a different return value than
-\fIptr\fR\&. If
-\fIptr\fR
-is
-\fBNULL\fR, the
-\fBrealloc\fR\fB\fR
-function behaves identically to
-\fBmalloc\fR\fB\fR
-for the specified size\&.
-.PP
-The
-\fBfree\fR\fB\fR
-function causes the allocated memory referenced by
-\fIptr\fR
-to be made available for future allocations\&. If
-\fIptr\fR
-is
-\fBNULL\fR, no action occurs\&.
-.SS "Non\-standard API"
-.PP
-The
-\fBmallocx\fR\fB\fR,
-\fBrallocx\fR\fB\fR,
-\fBxallocx\fR\fB\fR,
-\fBsallocx\fR\fB\fR,
-\fBdallocx\fR\fB\fR, and
-\fBnallocx\fR\fB\fR
-functions all have a
-\fIflags\fR
-argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following:
-.PP
-\fBMALLOCX_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-(1 << \fIla\fR)\&. This macro does not validate that
-\fIla\fR
-is within the valid range\&.
-.RE
-.PP
-\fBMALLOCX_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-\fIa\fR, where
-\fIa\fR
-is a power of two\&. This macro does not validate that
-\fIa\fR
-is a power of 2\&.
-.RE
-.PP
-\fBMALLOCX_ZERO\fR
-.RS 4
-Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&.
-.RE
-.PP
-\fBMALLOCX_ARENA(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Use the arena specified by the index
-\fIa\fR
-(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that
-\fIa\fR
-specifies an arena index in the valid range\&.
-.RE
-.PP
-The
-\fBmallocx\fR\fB\fR
-function allocates at least
-\fIsize\fR
-bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.PP
-The
-\fBrallocx\fR\fB\fR
-function resizes the allocation at
-\fIptr\fR
-to be at least
-\fIsize\fR
-bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.PP
-The
-\fBxallocx\fR\fB\fR
-function resizes the allocation at
-\fIptr\fR
-in place to be at least
-\fIsize\fR
-bytes, and returns the real size of the allocation\&. If
-\fIextra\fR
-is non\-zero, an attempt is made to resize the allocation to be at least
-(\fIsize\fR + \fIextra\fR)
-bytes, though inability to allocate the extra byte(s) will not by itself result in failure to resize\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if
-(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&.
-.PP
-The
-\fBsallocx\fR\fB\fR
-function returns the real size of the allocation at
-\fIptr\fR\&.
-.PP
-The
-\fBdallocx\fR\fB\fR
-function causes the memory referenced by
-\fIptr\fR
-to be made available for future allocations\&.
-.PP
-The
-\fBnallocx\fR\fB\fR
-function allocates no memory, but it performs the same size computation as the
-\fBmallocx\fR\fB\fR
-function, and returns the real size of the allocation that would result from the equivalent
-\fBmallocx\fR\fB\fR
-function call\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.PP
-The
-\fBmallctl\fR\fB\fR
-function provides a general interface for introspecting the memory allocator, as well as setting modifiable parameters and triggering actions\&. The period\-separated
-\fIname\fR
-argument specifies a location in a tree\-structured namespace; see the
-MALLCTL NAMESPACE
-section for documentation on the tree contents\&. To read a value, pass a pointer via
-\fIoldp\fR
-to adequate space to contain the value, and a pointer to its length via
-\fIoldlenp\fR; otherwise pass
-\fBNULL\fR
-and
-\fBNULL\fR\&. Similarly, to write a value, pass a pointer to the value via
-\fInewp\fR, and its length via
-\fInewlen\fR; otherwise pass
-\fBNULL\fR
-and
-\fB0\fR\&.
-.PP
-The
-\fBmallctlnametomib\fR\fB\fR
-function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace, by translating a name to a \(lqManagement Information Base\(rq (MIB) that can be passed repeatedly to
-\fBmallctlbymib\fR\fB\fR\&. Upon successful return from
-\fBmallctlnametomib\fR\fB\fR,
-\fImibp\fR
-contains an array of
-\fI*miblenp\fR
-integers, where
-\fI*miblenp\fR
-is the lesser of the number of components in
-\fIname\fR
-and the input value of
-\fI*miblenp\fR\&. Thus it is possible to pass a
-\fI*miblenp\fR
-that is smaller than the number of period\-separated name components, which results in a partial MIB that can be used as the basis for constructing a complete MIB\&. For name components that are integers (e\&.g\&. the 2 in
-"arenas\&.bin\&.2\&.size"), the corresponding MIB component will always be that integer\&. Therefore, it is legitimate to construct code like the following:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-unsigned nbins, i;
-size_t mib[4];
-size_t len, miblen;
-
-len = sizeof(nbins);
-mallctl("arenas\&.nbins", &nbins, &len, NULL, 0);
-
-miblen = 4;
-mallctlnametomib("arenas\&.bin\&.0\&.size", mib, &miblen);
-for (i = 0; i < nbins; i++) {
- size_t bin_size;
-
- mib[2] = i;
- len = sizeof(bin_size);
- mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
- /* Do something with bin_size\&.\&.\&. */
-}
-.fi
-.if n \{\
-.RE
-.\}
-.PP
-The
-\fBmalloc_stats_print\fR\fB\fR
-function writes human\-readable summary statistics via the
-\fIwrite_cb\fR
-callback function pointer and
-\fIcbopaque\fR
-data passed to
-\fIwrite_cb\fR, or
-\fBmalloc_message\fR\fB\fR
-if
-\fIwrite_cb\fR
-is
-\fBNULL\fR\&. This function can be called repeatedly\&. General information that never changes during execution can be omitted by specifying "g" as a character within the
-\fIopts\fR
-string\&. Note that
-\fBmalloc_message\fR\fB\fR
-uses the
-\fBmallctl*\fR\fB\fR
-functions internally, so inconsistent statistics can be reported if multiple threads use these functions simultaneously\&. If
-\fB\-\-enable\-stats\fR
-is specified during configuration, \(lqm\(rq and \(lqa\(rq can be specified to omit merged arena and per arena statistics, respectively; \(lqb\(rq and \(lql\(rq can be specified to omit per size class statistics for bins and large objects, respectively\&. Unrecognized characters are silently ignored\&. Note that thread caching may prevent some statistics from being completely up to date, since extra locking would be required to merge counters that track thread cache operations\&.
-.PP
-The
-\fBmalloc_usable_size\fR\fB\fR
-function returns the usable size of the allocation pointed to by
-\fIptr\fR\&. The return value may be larger than the size that was requested during allocation\&. The
-\fBmalloc_usable_size\fR\fB\fR
-function is not a mechanism for in\-place
-\fBrealloc\fR\fB\fR; rather it is provided solely as a tool for introspection purposes\&. Any discrepancy between the requested allocation size and the size reported by
-\fBmalloc_usable_size\fR\fB\fR
-should not be depended on, since such behavior is entirely implementation\-dependent\&.
-.SS "Experimental API"
-.PP
-The experimental API is subject to change or removal without regard for backward compatibility\&. If
-\fB\-\-disable\-experimental\fR
-is specified during configuration, the experimental API is omitted\&.
-.PP
-The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR,
-\fBdallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions all have a
-\fIflags\fR
-argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following:
-.PP
-\fBALLOCM_LG_ALIGN(\fR\fB\fIla\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-(1 << \fIla\fR)\&. This macro does not validate that
-\fIla\fR
-is within the valid range\&.
-.RE
-.PP
-\fBALLOCM_ALIGN(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Align the memory allocation to start at an address that is a multiple of
-\fIa\fR, where
-\fIa\fR
-is a power of two\&. This macro does not validate that
-\fIa\fR
-is a power of 2\&.
-.RE
-.PP
-\fBALLOCM_ZERO\fR
-.RS 4
-Initialize newly allocated memory to contain zero bytes\&. In the growing reallocation case, the real size prior to reallocation defines the boundary between untouched bytes and those that are initialized to contain zero bytes\&. If this macro is absent, newly allocated memory is uninitialized\&.
-.RE
-.PP
-\fBALLOCM_NO_MOVE\fR
-.RS 4
-For reallocation, fail rather than moving the object\&. This constraint can apply to both growth and shrinkage\&.
-.RE
-.PP
-\fBALLOCM_ARENA(\fR\fB\fIa\fR\fR\fB) \fR
-.RS 4
-Use the arena specified by the index
-\fIa\fR
-(and by necessity bypass the thread cache)\&. This macro has no effect for huge regions, nor for regions that were allocated via an arena other than the one specified\&. This macro does not validate that
-\fIa\fR
-specifies an arena index in the valid range\&.
-.RE
-.PP
-The
-\fBallocm\fR\fB\fR
-function allocates at least
-\fIsize\fR
-bytes of memory, sets
-\fI*ptr\fR
-to the base address of the allocation, and sets
-\fI*rsize\fR
-to the real size of the allocation if
-\fIrsize\fR
-is not
-\fBNULL\fR\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.PP
-The
-\fBrallocm\fR\fB\fR
-function resizes the allocation at
-\fI*ptr\fR
-to be at least
-\fIsize\fR
-bytes, sets
-\fI*ptr\fR
-to the base address of the allocation if it moved, and sets
-\fI*rsize\fR
-to the real size of the allocation if
-\fIrsize\fR
-is not
-\fBNULL\fR\&. If
-\fIextra\fR
-is non\-zero, an attempt is made to resize the allocation to be at least
-(\fIsize\fR + \fIextra\fR)
-bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, if request size overflows due to size class and/or alignment constraints, or if
-(\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&.
-.PP
-The
-\fBsallocm\fR\fB\fR
-function sets
-\fI*rsize\fR
-to the real size of the allocation\&.
-.PP
-The
-\fBdallocm\fR\fB\fR
-function causes the memory referenced by
-\fIptr\fR
-to be made available for future allocations\&.
-.PP
-The
-\fBnallocm\fR\fB\fR
-function allocates no memory, but it performs the same size computation as the
-\fBallocm\fR\fB\fR
-function, and if
-\fIrsize\fR
-is not
-\fBNULL\fR
-it sets
-\fI*rsize\fR
-to the real size of the allocation that would result from the equivalent
-\fBallocm\fR\fB\fR
-function call\&. Behavior is undefined if
-\fIsize\fR
-is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
-.SH "TUNING"
-.PP
-Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&.
-.PP
-The string pointed to by the global variable
-\fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named
-/etc/malloc\&.conf, and the value of the environment variable
-\fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. Note that
-\fImalloc_conf\fR
-may be read before
-\fBmain\fR\fB\fR
-is entered, so the declaration of
-\fImalloc_conf\fR
-should specify an initializer that contains the final value to be read by jemalloc\&.
-\fImalloc_conf\fR
-is a compile\-time setting, whereas
-/etc/malloc\&.conf
-and
-\fBMALLOC_CONF\fR
-can be safely set any time prior to program invocation\&.
-.PP
-An options string is a comma\-separated list of option:value pairs\&. There is one key corresponding to each
-"opt\&.*"
-mallctl (see the
-MALLCTL NAMESPACE
-section for options documentation)\&. For example,
-abort:true,narenas:1
-sets the
-"opt\&.abort"
-and
-"opt\&.narenas"
-options\&. Some options have boolean values (true/false), others have integer values (base 8, 10, or 16, depending on prefix), and yet others have raw string values\&.
-.SH "IMPLEMENTATION NOTES"
-.PP
-Traditionally, allocators have used
-\fBsbrk\fR(2)
-to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If
-\fB\-\-enable\-dss\fR
-is specified during configuration, this allocator uses both
-\fBmmap\fR(2)
-and
-\fBsbrk\fR(2), in that order of preference; otherwise only
-\fBmmap\fR(2)
-is used\&.
-.PP
-This allocator uses multiple arenas in order to reduce lock contention for threaded programs on multi\-processor systems\&. This works well with regard to threading scalability, but incurs some costs\&. There is a small fixed per\-arena overhead, and additionally, arenas manage memory completely independently of each other, which means a small fixed increase in overall memory fragmentation\&. These overheads are not generally an issue, given the number of arenas normally used\&. Note that using substantially more arenas than the default is not likely to improve performance, mainly due to reduced cache performance\&. However, it may make sense to reduce the number of arenas if an application does not make much use of the allocation functions\&.
-.PP
-In addition to multiple arenas, unless
-\fB\-\-disable\-tcache\fR
-is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&.
-.PP
-Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&.
-.PP
-User objects are broken into three categories according to size: small, large, and huge\&. Small objects are smaller than one page\&. Large objects are smaller than the chunk size\&. Huge objects are a multiple of the chunk size\&. Small and large objects are managed by arenas; huge objects are managed separately in a single data structure that is shared by all threads\&. Huge objects are used by applications infrequently enough that this single data structure is not a scalability issue\&.
-.PP
-Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
-.PP
-Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
-sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the
-"opt\&.lg_chunk"
-option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&.
-.PP
-Allocations are packed tightly together, which can be an issue for multi\-threaded applications\&. If you need to assure that allocations do not suffer from cacheline sharing, round your allocation requests up to the nearest multiple of the cacheline size, or specify cacheline alignment when allocating\&.
-.PP
-Assuming 4 MiB chunks, 4 KiB pages, and a 16\-byte quantum on a 64\-bit system, the size classes in each category are as shown in
-Table 1\&.
-.sp
-.it 1 an-trap
-.nr an-no-space-flag 1
-.nr an-break-flag 1
-.br
-.B Table\ \&1.\ \&Size classes
-.TS
-allbox tab(:);
-lB rB lB.
-T{
-Category
-T}:T{
-Spacing
-T}:T{
-Size
-T}
-.T&
-l r l
-^ r l
-^ r l
-^ r l
-^ r l
-^ r l
-^ r l
-l r l
-l r l.
-T{
-Small
-T}:T{
-lg
-T}:T{
-[8]
-T}
-:T{
-16
-T}:T{
-[16, 32, 48, \&.\&.\&., 128]
-T}
-:T{
-32
-T}:T{
-[160, 192, 224, 256]
-T}
-:T{
-64
-T}:T{
-[320, 384, 448, 512]
-T}
-:T{
-128
-T}:T{
-[640, 768, 896, 1024]
-T}
-:T{
-256
-T}:T{
-[1280, 1536, 1792, 2048]
-T}
-:T{
-512
-T}:T{
-[2560, 3072, 3584]
-T}
-T{
-Large
-T}:T{
-4 KiB
-T}:T{
-[4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB]
-T}
-T{
-Huge
-T}:T{
-4 MiB
-T}:T{
-[4 MiB, 8 MiB, 12 MiB, \&.\&.\&.]
-T}
-.TE
-.sp 1
-.SH "MALLCTL NAMESPACE"
-.PP
-The following names are defined in the namespace accessible via the
-\fBmallctl*\fR\fB\fR
-functions\&. Value types are specified in parentheses, their readable/writable statuses are encoded as
-rw,
-r\-,
-\-w, or
-\-\-, and required build configuration flags follow, if any\&. A name element encoded as
-<i>
-or
-<j>
-indicates an integer component, where the integer varies from 0 to some upper value that must be determined via introspection\&. In the case of
-"stats\&.arenas\&.<i>\&.*",
-<i>
-equal to
-"arenas\&.narenas"
-can be used to access the summation of statistics from all arenas\&. Take special note of the
-"epoch"
-mallctl, which controls refreshing of cached dynamic statistics\&.
-.PP
-"version" (\fBconst char *\fR) r\-
-.RS 4
-Return the jemalloc version string\&.
-.RE
-.PP
-"epoch" (\fBuint64_t\fR) rw
-.RS 4
-If a value is passed in, refresh the data from which the
-\fBmallctl*\fR\fB\fR
-functions report values, and increment the epoch\&. Return the current epoch\&. This is useful for detecting whether another thread caused a refresh\&.
-.RE
-.PP
-"config\&.debug" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-debug\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.dss" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-dss\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.fill" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-fill\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.lazy_lock" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-lazy\-lock\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.mremap" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-mremap\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.munmap" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-munmap\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.prof" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-prof\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.prof_libgcc" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-disable\-prof\-libgcc\fR
-was not specified during build configuration\&.
-.RE
-.PP
-"config\&.prof_libunwind" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-prof\-libunwind\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.stats" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-stats\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.tcache" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-disable\-tcache\fR
-was not specified during build configuration\&.
-.RE
-.PP
-"config\&.tls" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-disable\-tls\fR
-was not specified during build configuration\&.
-.RE
-.PP
-"config\&.utrace" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-utrace\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.valgrind" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-valgrind\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.xmalloc" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-xmalloc\fR
-was specified during build configuration\&.
-.RE
-.PP
-"opt\&.abort" (\fBbool\fR) r\-
-.RS 4
-Abort\-on\-warning enabled/disabled\&. If true, most warnings are fatal\&. The process will call
-\fBabort\fR(3)
-in these cases\&. This option is disabled by default unless
-\fB\-\-enable\-debug\fR
-is specified during configuration, in which case it is enabled by default\&.
-.RE
-.PP
-"opt\&.dss" (\fBconst char *\fR) r\-
-.RS 4
-dss (\fBsbrk\fR(2)) allocation precedence as related to
-\fBmmap\fR(2)
-allocation\&. The following settings are supported: \(lqdisabled\(rq, \(lqprimary\(rq, and \(lqsecondary\(rq\&. The default is \(lqsecondary\(rq if
-"config\&.dss"
-is true, \(lqdisabled\(rq otherwise\&.
-.RE
-.PP
-"opt\&.lg_chunk" (\fBsize_t\fR) r\-
-.RS 4
-Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 4 MiB (2^22)\&.
-.RE
-.PP
-"opt\&.narenas" (\fBsize_t\fR) r\-
-.RS 4
-Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&.
-.RE
-.PP
-"opt\&.lg_dirty_mult" (\fBssize_t\fR) r\-
-.RS 4
-Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via
-\fBmadvise\fR(2)
-or a similar system call\&. This provides the kernel with sufficient information to recycle dirty pages if physical memory becomes scarce and the pages remain unused\&. The default minimum ratio is 8:1 (2^3:1); an option value of \-1 will disable dirty page purging\&.
-.RE
-.PP
-"opt\&.stats_print" (\fBbool\fR) r\-
-.RS 4
-Enable/disable statistics printing at exit\&. If enabled, the
-\fBmalloc_stats_print\fR\fB\fR
-function is called at program exit via an
-\fBatexit\fR(3)
-function\&. If
-\fB\-\-enable\-stats\fR
-is specified during configuration, this has the potential to cause deadlock for a multi\-threaded process that exits while one or more threads are executing in the memory allocation functions\&. Therefore, this option should only be used with care; it is primarily intended as a performance tuning aid during application development\&. This option is disabled by default\&.
-.RE
-.PP
-"opt\&.junk" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
-.RS 4
-Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to
-0xa5\&. All deallocated memory will be initialized to
-0x5a\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default unless
-\fB\-\-enable\-debug\fR
-is specified during configuration, in which case it is enabled by default unless running inside
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&.
-.RE
-.PP
-"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR]
-.RS 4
-Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the
-"opt\&.junk"
-option is enabled\&. This feature is of particular use in combination with
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0 unless running inside Valgrind, in which case the default is 16 MiB\&.
-.RE
-.PP
-"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
-.RS 4
-Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the
-"opt\&.junk"
-option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default unless running inside Valgrind\&.
-.RE
-.PP
-"opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
-.RS 4
-Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so
-\fBrealloc\fR\fB\fR,
-\fBrallocx\fR\fB\fR
-and
-\fBrallocm\fR\fB\fR
-calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
-.RE
-.PP
-"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR]
-.RS 4
-Allocation tracing based on
-\fButrace\fR(2)
-enabled/disabled\&. This option is disabled by default\&.
-.RE
-.PP
-"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR]
-.RS 4
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
-support enabled/disabled\&. This option is vestigal because jemalloc auto\-detects whether it is running inside Valgrind\&. This option is disabled by default, unless running inside Valgrind\&.
-.RE
-.PP
-"opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR]
-.RS 4
-Abort\-on\-out\-of\-memory enabled/disabled\&. If enabled, rather than returning failure for any allocation function, display a diagnostic message on
-\fBSTDERR_FILENO\fR
-and cause the program to drop core (using
-\fBabort\fR(3))\&. If an application is designed to depend on this behavior, set the option at compile time by including the following in the source code:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-malloc_conf = "xmalloc:true";
-.fi
-.if n \{\
-.RE
-.\}
-.sp
-This option is disabled by default\&.
-.RE
-.PP
-"opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the
-"opt\&.lg_tcache_max"
-option for related tuning information\&. This option is enabled by default unless running inside
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2\&.
-.RE
-.PP
-"opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&.
-.RE
-.PP
-"opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the
-"opt\&.prof_active"
-option for on\-the\-fly activation/deactivation\&. See the
-"opt\&.lg_prof_sample"
-option for probabilistic sampling control\&. See the
-"opt\&.prof_accum"
-option for control of cumulative sample reporting\&. See the
-"opt\&.lg_prof_interval"
-option for information on interval\-triggered profile dumping, the
-"opt\&.prof_gdump"
-option for information on high\-water\-triggered profile dumping, and the
-"opt\&.prof_final"
-option for final profile dumping\&. Profile output is compatible with the included
-\fBpprof\fR
-Perl script, which originates from the
-\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&.
-.RE
-.PP
-"opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Filename prefix for profile dumps\&. If the prefix is set to the empty string, no automatic dumps will occur; this is primarily useful for disabling the automatic final heap dump (which also disables leak reporting, if enabled)\&. The default prefix is
-jeprof\&.
-.RE
-.PP
-"opt\&.prof_active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
-.RS 4
-Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the
-"opt\&.prof"
-option) but inactive, then toggle profiling at any time during program execution with the
-"prof\&.active"
-mallctl\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&.
-.RE
-.PP
-"opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&.
-.RE
-.PP
-"opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Average interval (log base 2) between memory profile dumps, as measured in bytes of allocation activity\&. The actual interval between dumps may be sporadic because decentralized allocation counters are used to avoid synchronization bottlenecks\&. Profiles are dumped to files named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.i<iseq>\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. By default, interval\-triggered profile dumping is disabled (encoded as \-1)\&.
-.RE
-.PP
-"opt\&.prof_gdump" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Trigger a memory profile dump every time the total virtual memory exceeds the previous maximum\&. Profiles are dumped to files named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.u<useq>\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. This option is disabled by default\&.
-.RE
-.PP
-"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Use an
-\fBatexit\fR(3)
-function to dump final memory usage to a file named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.f\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Leak reporting enabled/disabled\&. If enabled, use an
-\fBatexit\fR(3)
-function to report memory leaks detected by allocation sampling\&. See the
-"opt\&.prof"
-option for information on analyzing heap profile output\&. This option is disabled by default\&.
-.RE
-.PP
-"thread\&.arena" (\fBunsigned\fR) rw
-.RS 4
-Get or set the arena associated with the calling thread\&. If the specified arena was not initialized beforehand (see the
-"arenas\&.initialized"
-mallctl), it will be automatically initialized as a side effect of calling this interface\&.
-.RE
-.PP
-"thread\&.allocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Get the total number of bytes ever allocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&.
-.RE
-.PP
-"thread\&.allocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Get a pointer to the the value that is returned by the
-"thread\&.allocated"
-mallctl\&. This is useful for avoiding the overhead of repeated
-\fBmallctl*\fR\fB\fR
-calls\&.
-.RE
-.PP
-"thread\&.deallocated" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Get the total number of bytes ever deallocated by the calling thread\&. This counter has the potential to wrap around; it is up to the application to appropriately interpret the counter in such cases\&.
-.RE
-.PP
-"thread\&.deallocatedp" (\fBuint64_t *\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Get a pointer to the the value that is returned by the
-"thread\&.deallocated"
-mallctl\&. This is useful for avoiding the overhead of repeated
-\fBmallctl*\fR\fB\fR
-calls\&.
-.RE
-.PP
-"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR]
-.RS 4
-Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see
-"thread\&.tcache\&.flush")\&.
-.RE
-.PP
-"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
-.RE
-.PP
-"arena\&.<i>\&.purge" (\fBunsigned\fR) \-\-
-.RS 4
-Purge unused dirty pages for arena <i>, or for all arenas if <i> equals
-"arenas\&.narenas"\&.
-.RE
-.PP
-"arena\&.<i>\&.dss" (\fBconst char *\fR) rw
-.RS 4
-Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals
-"arenas\&.narenas"\&. Note that even during huge allocation this setting is read from the arena that would be chosen for small or large allocation so that applications can depend on consistent dss versus mmap allocation regardless of allocation size\&. See
-"opt\&.dss"
-for supported settings\&.
-.RE
-.PP
-"arenas\&.narenas" (\fBunsigned\fR) r\-
-.RS 4
-Current limit on number of arenas\&.
-.RE
-.PP
-"arenas\&.initialized" (\fBbool *\fR) r\-
-.RS 4
-An array of
-"arenas\&.narenas"
-booleans\&. Each boolean indicates whether the corresponding arena is initialized\&.
-.RE
-.PP
-"arenas\&.quantum" (\fBsize_t\fR) r\-
-.RS 4
-Quantum size\&.
-.RE
-.PP
-"arenas\&.page" (\fBsize_t\fR) r\-
-.RS 4
-Page size\&.
-.RE
-.PP
-"arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Maximum thread\-cached size class\&.
-.RE
-.PP
-"arenas\&.nbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of bin size classes\&.
-.RE
-.PP
-"arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Total number of thread cache bin size classes\&.
-.RE
-.PP
-"arenas\&.bin\&.<i>\&.size" (\fBsize_t\fR) r\-
-.RS 4
-Maximum size supported by size class\&.
-.RE
-.PP
-"arenas\&.bin\&.<i>\&.nregs" (\fBuint32_t\fR) r\-
-.RS 4
-Number of regions per page run\&.
-.RE
-.PP
-"arenas\&.bin\&.<i>\&.run_size" (\fBsize_t\fR) r\-
-.RS 4
-Number of bytes per page run\&.
-.RE
-.PP
-"arenas\&.nlruns" (\fBsize_t\fR) r\-
-.RS 4
-Total number of large size classes\&.
-.RE
-.PP
-"arenas\&.lrun\&.<i>\&.size" (\fBsize_t\fR) r\-
-.RS 4
-Maximum size supported by this large size class\&.
-.RE
-.PP
-"arenas\&.purge" (\fBunsigned\fR) \-w
-.RS 4
-Purge unused dirty pages for the specified arena, or for all arenas if none is specified\&.
-.RE
-.PP
-"arenas\&.extend" (\fBunsigned\fR) r\-
-.RS 4
-Extend the array of arenas by appending a new arena, and returning the new arena index\&.
-.RE
-.PP
-"prof\&.active" (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
-.RS 4
-Control whether sampling is currently active\&. See the
-"opt\&.prof_active"
-option for additional information\&.
-.RE
-.PP
-"prof\&.dump" (\fBconst char *\fR) \-w [\fB\-\-enable\-prof\fR]
-.RS 4
-Dump a memory profile to the specified file, or if NULL is specified, to a file according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.m<mseq>\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&.
-.RE
-.PP
-"prof\&.interval" (\fBuint64_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Average number of bytes allocated between inverval\-based profile dumps\&. See the
-"opt\&.lg_prof_interval"
-option for additional information\&.
-.RE
-.PP
-"stats\&.cactive" (\fBsize_t *\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Pointer to a counter that contains an approximate count of the current number of bytes in active pages\&. The estimate may be high, but never low, because each arena rounds up to the nearest multiple of the chunk size when computing its contribution to the counter\&. Note that the
-"epoch"
-mallctl has no bearing on this counter\&. Furthermore, counter consistency is maintained via atomic operations, so it is necessary to use an atomic operation in order to guarantee a consistent read when dereferencing the pointer\&.
-.RE
-.PP
-"stats\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of bytes allocated by the application\&.
-.RE
-.PP
-"stats\&.active" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of bytes in active pages allocated by the application\&. This is a multiple of the page size, and greater than or equal to
-"stats\&.allocated"\&. This does not include
-"stats\&.arenas\&.<i>\&.pdirty"
-and pages entirely devoted to allocator metadata\&.
-.RE
-.PP
-"stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as
-"stats\&.active"\&. This does not include inactive chunks\&.
-.RE
-.PP
-"stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks\&.
-.RE
-.PP
-"stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of chunks allocated\&.
-.RE
-.PP
-"stats\&.chunks\&.high" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of active chunks at any time thus far\&.
-.RE
-.PP
-"stats\&.huge\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of bytes currently allocated by huge objects\&.
-.RE
-.PP
-"stats\&.huge\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of huge allocation requests\&.
-.RE
-.PP
-"stats\&.huge\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of huge deallocation requests\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.dss" (\fBconst char *\fR) r\-
-.RS 4
-dss (\fBsbrk\fR(2)) allocation precedence as related to
-\fBmmap\fR(2)
-allocation\&. See
-"opt\&.dss"
-for details\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.nthreads" (\fBunsigned\fR) r\-
-.RS 4
-Number of threads currently assigned to arena\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.pactive" (\fBsize_t\fR) r\-
-.RS 4
-Number of pages in active runs\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.pdirty" (\fBsize_t\fR) r\-
-.RS 4
-Number of pages within unused runs that are potentially dirty, and for which
-\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR
-or similar has not been called\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of mapped bytes\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.npurge" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of dirty page purge sweeps performed\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.nmadvise" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of
-\fBmadvise\fR\fB\fI\&.\&.\&.\fR\fR\fB \fR\fB\fI\fBMADV_DONTNEED\fR\fR\fR
-or similar calls made to purge dirty pages\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.purged" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of pages purged\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.small\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of bytes currently allocated by small objects\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.small\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocation requests served by small bins\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.small\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of small objects returned to bins\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.small\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of small allocation requests\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.large\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Number of bytes currently allocated by large objects\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.large\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of large allocation requests served directly by the arena\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.large\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of large deallocation requests served directly by the arena\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.large\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of large allocation requests\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.allocated" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Current number of bytes allocated by bin\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocations served by bin\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocations returned to bin\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocation requests\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nfills" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR]
-.RS 4
-Cumulative number of tcache fills\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nflushes" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR \fB\-\-enable\-tcache\fR]
-.RS 4
-Cumulative number of tcache flushes\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of runs created\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.nreruns" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of times the current run from which to allocate changed\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Current number of runs\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.nmalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocation requests for this size class served directly by the arena\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.ndalloc" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of deallocation requests for this size class served directly by the arena\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.nrequests" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Cumulative number of allocation requests for this size class\&.
-.RE
-.PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Current number of runs for this size class\&.
-.RE
-.SH "DEBUGGING MALLOC PROBLEMS"
-.PP
-When debugging, it is a good idea to configure/build jemalloc with the
-\fB\-\-enable\-debug\fR
-and
-\fB\-\-enable\-fill\fR
-options, and recompile the program with suitable options and symbols for debugger support\&. When so configured, jemalloc incorporates a wide variety of run\-time assertions that catch application errors such as double\-free, write\-after\-free, etc\&.
-.PP
-Programs often accidentally depend on \(lquninitialized\(rq memory actually being filled with zero bytes\&. Junk filling (see the
-"opt\&.junk"
-option) tends to expose such bugs in the form of obviously incorrect results and/or coredumps\&. Conversely, zero filling (see the
-"opt\&.zero"
-option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&.
-.PP
-This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent
-\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
-tool if the
-\fB\-\-enable\-valgrind\fR
-configuration option is enabled\&.
-.SH "DIAGNOSTIC MESSAGES"
-.PP
-If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor
-\fBSTDERR_FILENO\fR\&. Errors will result in the process dumping core\&. If the
-"opt\&.abort"
-option is set, most warnings are treated as errors\&.
-.PP
-The
-\fImalloc_message\fR
-variable allows the programmer to override the function which emits the text strings forming the errors and warnings if for some reason the
-\fBSTDERR_FILENO\fR
-file descriptor is not suitable for this\&.
-\fBmalloc_message\fR\fB\fR
-takes the
-\fIcbopaque\fR
-pointer argument that is
-\fBNULL\fR
-unless overridden by the arguments in a call to
-\fBmalloc_stats_print\fR\fB\fR, followed by a string pointer\&. Please note that doing anything which tries to allocate memory in this function is likely to result in a crash or deadlock\&.
-.PP
-All messages are prefixed by \(lq<jemalloc>:\(rq\&.
-.SH "RETURN VALUES"
-.SS "Standard API"
-.PP
-The
-\fBmalloc\fR\fB\fR
-and
-\fBcalloc\fR\fB\fR
-functions return a pointer to the allocated memory if successful; otherwise a
-\fBNULL\fR
-pointer is returned and
-\fIerrno\fR
-is set to
-ENOMEM\&.
-.PP
-The
-\fBposix_memalign\fR\fB\fR
-function returns the value 0 if successful; otherwise it returns an error value\&. The
-\fBposix_memalign\fR\fB\fR
-function will fail if:
-.PP
-EINVAL
-.RS 4
-The
-\fIalignment\fR
-parameter is not a power of 2 at least as large as
-sizeof(\fBvoid *\fR)\&.
-.RE
-.PP
-ENOMEM
-.RS 4
-Memory allocation error\&.
-.RE
-.PP
-The
-\fBaligned_alloc\fR\fB\fR
-function returns a pointer to the allocated memory if successful; otherwise a
-\fBNULL\fR
-pointer is returned and
-\fIerrno\fR
-is set\&. The
-\fBaligned_alloc\fR\fB\fR
-function will fail if:
-.PP
-EINVAL
-.RS 4
-The
-\fIalignment\fR
-parameter is not a power of 2\&.
-.RE
-.PP
-ENOMEM
-.RS 4
-Memory allocation error\&.
-.RE
-.PP
-The
-\fBrealloc\fR\fB\fR
-function returns a pointer, possibly identical to
-\fIptr\fR, to the allocated memory if successful; otherwise a
-\fBNULL\fR
-pointer is returned, and
-\fIerrno\fR
-is set to
-ENOMEM
-if the error was the result of an allocation failure\&. The
-\fBrealloc\fR\fB\fR
-function always leaves the original buffer intact when an error occurs\&.
-.PP
-The
-\fBfree\fR\fB\fR
-function returns no value\&.
-.SS "Non\-standard API"
-.PP
-The
-\fBmallocx\fR\fB\fR
-and
-\fBrallocx\fR\fB\fR
-functions return a pointer to the allocated memory if successful; otherwise a
-\fBNULL\fR
-pointer is returned to indicate insufficient contiguous memory was available to service the allocation request\&.
-.PP
-The
-\fBxallocx\fR\fB\fR
-function returns the real size of the resulting resized allocation pointed to by
-\fIptr\fR, which is a value less than
-\fIsize\fR
-if the allocation could not be adequately grown in place\&.
-.PP
-The
-\fBsallocx\fR\fB\fR
-function returns the real size of the allocation pointed to by
-\fIptr\fR\&.
-.PP
-The
-\fBnallocx\fR\fB\fR
-returns the real size that would result from a successful equivalent
-\fBmallocx\fR\fB\fR
-function call, or zero if insufficient memory is available to perform the size computation\&.
-.PP
-The
-\fBmallctl\fR\fB\fR,
-\fBmallctlnametomib\fR\fB\fR, and
-\fBmallctlbymib\fR\fB\fR
-functions return 0 on success; otherwise they return an error value\&. The functions will fail if:
-.PP
-EINVAL
-.RS 4
-\fInewp\fR
-is not
-\fBNULL\fR, and
-\fInewlen\fR
-is too large or too small\&. Alternatively,
-\fI*oldlenp\fR
-is too large or too small; in this case as much data as possible are read despite the error\&.
-.RE
-.PP
-ENOENT
-.RS 4
-\fIname\fR
-or
-\fImib\fR
-specifies an unknown/invalid value\&.
-.RE
-.PP
-EPERM
-.RS 4
-Attempt to read or write void value, or attempt to write read\-only value\&.
-.RE
-.PP
-EAGAIN
-.RS 4
-A memory allocation failure occurred\&.
-.RE
-.PP
-EFAULT
-.RS 4
-An interface with side effects failed in some way not directly related to
-\fBmallctl*\fR\fB\fR
-read/write processing\&.
-.RE
-.PP
-The
-\fBmalloc_usable_size\fR\fB\fR
-function returns the usable size of the allocation pointed to by
-\fIptr\fR\&.
-.SS "Experimental API"
-.PP
-The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR,
-\fBdallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions return
-\fBALLOCM_SUCCESS\fR
-on success; otherwise they return an error value\&. The
-\fBallocm\fR\fB\fR,
-\fBrallocm\fR\fB\fR, and
-\fBnallocm\fR\fB\fR
-functions will fail if:
-.PP
-ALLOCM_ERR_OOM
-.RS 4
-Out of memory\&. Insufficient contiguous memory was available to service the allocation request\&. The
-\fBallocm\fR\fB\fR
-function additionally sets
-\fI*ptr\fR
-to
-\fBNULL\fR, whereas the
-\fBrallocm\fR\fB\fR
-function leaves
-\fB*ptr\fR
-unmodified\&.
-.RE
-The
-\fBrallocm\fR\fB\fR
-function will also fail if:
-.PP
-ALLOCM_ERR_NOT_MOVED
-.RS 4
-\fBALLOCM_NO_MOVE\fR
-was specified, but the reallocation request could not be serviced without moving the object\&.
-.RE
-.SH "ENVIRONMENT"
-.PP
-The following environment variable affects the execution of the allocation functions:
-.PP
-\fBMALLOC_CONF\fR
-.RS 4
-If the environment variable
-\fBMALLOC_CONF\fR
-is set, the characters it contains will be interpreted as options\&.
-.RE
-.SH "EXAMPLES"
-.PP
-To dump core whenever a problem occurs:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-ln \-s \*(Aqabort:true\*(Aq /etc/malloc\&.conf
-.fi
-.if n \{\
-.RE
-.\}
-.PP
-To specify in the source a chunk size that is 16 MiB:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-malloc_conf = "lg_chunk:24";
-.fi
-.if n \{\
-.RE
-.\}
-.SH "SEE ALSO"
-.PP
-\fBmadvise\fR(2),
-\fBmmap\fR(2),
-\fBsbrk\fR(2),
-\fButrace\fR(2),
-\fBalloca\fR(3),
-\fBatexit\fR(3),
-\fBgetpagesize\fR(3)
-.SH "STANDARDS"
-.PP
-The
-\fBmalloc\fR\fB\fR,
-\fBcalloc\fR\fB\fR,
-\fBrealloc\fR\fB\fR, and
-\fBfree\fR\fB\fR
-functions conform to ISO/IEC 9899:1990 (\(lqISO C90\(rq)\&.
-.PP
-The
-\fBposix_memalign\fR\fB\fR
-function conforms to IEEE Std 1003\&.1\-2001 (\(lqPOSIX\&.1\(rq)\&.
-.SH "AUTHOR"
-.PP
-\fBJason Evans\fR
-.RS 4
-.RE
-.SH "NOTES"
-.IP " 1." 4
-jemalloc website
-.RS 4
-\%http://www.canonware.com/jemalloc/
-.RE
-.IP " 2." 4
-Valgrind
-.RS 4
-\%http://valgrind.org/
-.RE
-.IP " 3." 4
-gperftools package
-.RS 4
-\%http://code.google.com/p/gperftools/
-.RE
diff --git a/deps/jemalloc/doc/jemalloc.html b/deps/jemalloc/doc/jemalloc.html
deleted file mode 100644
index 5a9fc7789..000000000
--- a/deps/jemalloc/doc/jemalloc.html
+++ /dev/null
@@ -1,1508 +0,0 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>JEMALLOC</title><meta name="generator" content="DocBook XSL Stylesheets V1.78.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry"><a name="idm316394519664"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>jemalloc &#8212; general purpose memory allocation functions</p></div><div class="refsect1"><a name="library"></a><h2>LIBRARY</h2><p>This manual describes jemalloc 3.6.0-0-g46c0af68bd248b04df75e4f92d5fb804c3d75340. More information
- can be found at the <a class="ulink" href="http://www.canonware.com/jemalloc/" target="_top">jemalloc website</a>.</p></div><div class="refsynopsisdiv"><h2>SYNOPSIS</h2><div class="funcsynopsis"><pre class="funcsynopsisinfo">#include &lt;<code class="filename">stdlib.h</code>&gt;
-#include &lt;<code class="filename">jemalloc/jemalloc.h</code>&gt;</pre><div class="refsect2"><a name="idm316394002288"></a><h3>Standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">malloc</b>(</code></td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">calloc</b>(</code></td><td>size_t <var class="pdparam">number</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">posix_memalign</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">aligned_alloc</b>(</code></td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">realloc</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">free</b>(</code></td><td>void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="refsect2"><a name="idm316393986160"></a><h3>Non-standard API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">mallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void *<b class="fsfunc">rallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">xallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">sallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">dallocx</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">nallocx</b>(</code></td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctl</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlnametomib</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">mibp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">miblenp</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlbymib</b>(</code></td><td>const size_t *<var class="pdparam">mib</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">miblen</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">malloc_stats_print</b>(</code></td><td>void <var class="pdparam">(*write_cb)</var>
- <code>(</code>void *, const char *<code>)</code>
- , </td></tr><tr><td> </td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">opts</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">size_t <b class="fsfunc">malloc_usable_size</b>(</code></td><td>const void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">void <b class="fsfunc">(*malloc_message)</b>(</code></td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">s</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><p><span class="type">const char *</span><code class="varname">malloc_conf</code>;</p></div><div class="refsect2"><a name="idm316388684112"></a><h3>Experimental API</h3><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">allocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">rallocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">sallocm</b>(</code></td><td>const void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">dallocm</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" class="funcprototype-table" summary="Function synopsis" style="cellspacing: 0; cellpadding: 0;"><tr><td><code class="funcdef">int <b class="fsfunc">nallocm</b>(</code></td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div></div></div><div class="refsect1"><a name="description"></a><h2>DESCRIPTION</h2><div class="refsect2"><a name="idm316388663504"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) function allocates
- <em class="parameter"><code>size</code></em> bytes of uninitialized memory. The allocated
- space is suitably aligned (after possible pointer coercion) for storage
- of any type of object.</p><p>The <code class="function">calloc</code>(<em class="parameter"><code></code></em>) function allocates
- space for <em class="parameter"><code>number</code></em> objects, each
- <em class="parameter"><code>size</code></em> bytes in length. The result is identical to
- calling <code class="function">malloc</code>(<em class="parameter"><code></code></em>) with an argument of
- <em class="parameter"><code>number</code></em> * <em class="parameter"><code>size</code></em>, with the
- exception that the allocated memory is explicitly initialized to zero
- bytes.</p><p>The <code class="function">posix_memalign</code>(<em class="parameter"><code></code></em>) function
- allocates <em class="parameter"><code>size</code></em> bytes of memory such that the
- allocation's base address is an even multiple of
- <em class="parameter"><code>alignment</code></em>, and returns the allocation in the value
- pointed to by <em class="parameter"><code>ptr</code></em>. The requested
- <em class="parameter"><code>alignment</code></em> must be a power of 2 at least as large
- as <code class="code">sizeof(<span class="type">void *</span>)</code>.</p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function
- allocates <em class="parameter"><code>size</code></em> bytes of memory such that the
- allocation's base address is an even multiple of
- <em class="parameter"><code>alignment</code></em>. The requested
- <em class="parameter"><code>alignment</code></em> must be a power of 2. Behavior is
- undefined if <em class="parameter"><code>size</code></em> is not an integral multiple of
- <em class="parameter"><code>alignment</code></em>.</p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function changes the
- size of the previously allocated memory referenced by
- <em class="parameter"><code>ptr</code></em> to <em class="parameter"><code>size</code></em> bytes. The
- contents of the memory are unchanged up to the lesser of the new and old
- sizes. If the new size is larger, the contents of the newly allocated
- portion of the memory are undefined. Upon success, the memory referenced
- by <em class="parameter"><code>ptr</code></em> is freed and a pointer to the newly
- allocated memory is returned. Note that
- <code class="function">realloc</code>(<em class="parameter"><code></code></em>) may move the memory allocation,
- resulting in a different return value than <em class="parameter"><code>ptr</code></em>.
- If <em class="parameter"><code>ptr</code></em> is <code class="constant">NULL</code>, the
- <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function behaves identically to
- <code class="function">malloc</code>(<em class="parameter"><code></code></em>) for the specified size.</p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function causes the
- allocated memory referenced by <em class="parameter"><code>ptr</code></em> to be made
- available for future allocations. If <em class="parameter"><code>ptr</code></em> is
- <code class="constant">NULL</code>, no action occurs.</p></div><div class="refsect2"><a name="idm316388639904"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocx</code>(<em class="parameter"><code></code></em>),
- <code class="function">xallocx</code>(<em class="parameter"><code></code></em>),
- <code class="function">sallocx</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocx</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) functions all have a
- <em class="parameter"><code>flags</code></em> argument that can be used to specify
- options. The functions only check the options that are contextually
- relevant. Use bitwise or (<code class="code">|</code>) operations to
- specify one or more of the following:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="constant">MALLOCX_LG_ALIGN(<em class="parameter"><code>la</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <code class="code">(1 &lt;&lt;
- <em class="parameter"><code>la</code></em>)</code>. This macro does not validate
- that <em class="parameter"><code>la</code></em> is within the valid
- range.</p></dd><dt><span class="term"><code class="constant">MALLOCX_ALIGN(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <em class="parameter"><code>a</code></em>, where
- <em class="parameter"><code>a</code></em> is a power of two. This macro does not
- validate that <em class="parameter"><code>a</code></em> is a power of 2.
- </p></dd><dt><span class="term"><code class="constant">MALLOCX_ZERO</code></span></dt><dd><p>Initialize newly allocated memory to contain zero
- bytes. In the growing reallocation case, the real size prior to
- reallocation defines the boundary between untouched bytes and those
- that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</p></dd><dt><span class="term"><code class="constant">MALLOCX_ARENA(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Use the arena specified by the index
- <em class="parameter"><code>a</code></em> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <em class="parameter"><code>a</code></em>
- specifies an arena index in the valid range.</p></dd></dl></div><p>
- </p><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function allocates at
- least <em class="parameter"><code>size</code></em> bytes of memory, and returns a pointer
- to the base address of the allocation. Behavior is undefined if
- <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or if request size
- overflows due to size class and/or alignment constraints.</p><p>The <code class="function">rallocx</code>(<em class="parameter"><code></code></em>) function resizes the
- allocation at <em class="parameter"><code>ptr</code></em> to be at least
- <em class="parameter"><code>size</code></em> bytes, and returns a pointer to the base
- address of the resulting allocation, which may or may not have moved from
- its original location. Behavior is undefined if
- <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or if request size
- overflows due to size class and/or alignment constraints.</p><p>The <code class="function">xallocx</code>(<em class="parameter"><code></code></em>) function resizes the
- allocation at <em class="parameter"><code>ptr</code></em> in place to be at least
- <em class="parameter"><code>size</code></em> bytes, and returns the real size of the
- allocation. If <em class="parameter"><code>extra</code></em> is non-zero, an attempt is
- made to resize the allocation to be at least <code class="code">(<em class="parameter"><code>size</code></em> +
- <em class="parameter"><code>extra</code></em>)</code> bytes, though inability to allocate
- the extra byte(s) will not by itself result in failure to resize.
- Behavior is undefined if <em class="parameter"><code>size</code></em> is
- <code class="constant">0</code>, or if <code class="code">(<em class="parameter"><code>size</code></em> + <em class="parameter"><code>extra</code></em>
- &gt; <code class="constant">SIZE_T_MAX</code>)</code>.</p><p>The <code class="function">sallocx</code>(<em class="parameter"><code></code></em>) function returns the
- real size of the allocation at <em class="parameter"><code>ptr</code></em>.</p><p>The <code class="function">dallocx</code>(<em class="parameter"><code></code></em>) function causes the
- memory referenced by <em class="parameter"><code>ptr</code></em> to be made available for
- future allocations.</p><p>The <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) function allocates no
- memory, but it performs the same size computation as the
- <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function, and returns the real
- size of the allocation that would result from the equivalent
- <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function call. Behavior is
- undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or if
- request size overflows due to size class and/or alignment
- constraints.</p><p>The <code class="function">mallctl</code>(<em class="parameter"><code></code></em>) function provides a
- general interface for introspecting the memory allocator, as well as
- setting modifiable parameters and triggering actions. The
- period-separated <em class="parameter"><code>name</code></em> argument specifies a
- location in a tree-structured namespace; see the <a class="xref" href="#mallctl_namespace" title="MALLCTL NAMESPACE">MALLCTL NAMESPACE</a> section for
- documentation on the tree contents. To read a value, pass a pointer via
- <em class="parameter"><code>oldp</code></em> to adequate space to contain the value, and a
- pointer to its length via <em class="parameter"><code>oldlenp</code></em>; otherwise pass
- <code class="constant">NULL</code> and <code class="constant">NULL</code>. Similarly, to
- write a value, pass a pointer to the value via
- <em class="parameter"><code>newp</code></em>, and its length via
- <em class="parameter"><code>newlen</code></em>; otherwise pass <code class="constant">NULL</code>
- and <code class="constant">0</code>.</p><p>The <code class="function">mallctlnametomib</code>(<em class="parameter"><code></code></em>) function
- provides a way to avoid repeated name lookups for applications that
- repeatedly query the same portion of the namespace, by translating a name
- to a &#8220;Management Information Base&#8221; (MIB) that can be passed
- repeatedly to <code class="function">mallctlbymib</code>(<em class="parameter"><code></code></em>). Upon
- successful return from <code class="function">mallctlnametomib</code>(<em class="parameter"><code></code></em>),
- <em class="parameter"><code>mibp</code></em> contains an array of
- <em class="parameter"><code>*miblenp</code></em> integers, where
- <em class="parameter"><code>*miblenp</code></em> is the lesser of the number of components
- in <em class="parameter"><code>name</code></em> and the input value of
- <em class="parameter"><code>*miblenp</code></em>. Thus it is possible to pass a
- <em class="parameter"><code>*miblenp</code></em> that is smaller than the number of
- period-separated name components, which results in a partial MIB that can
- be used as the basis for constructing a complete MIB. For name
- components that are integers (e.g. the 2 in
- <a class="link" href="#arenas.bin.i.size">
- "<code class="mallctl">arenas.bin.2.size</code>"
- </a>),
- the corresponding MIB component will always be that integer. Therefore,
- it is legitimate to construct code like the following: </p><pre class="programlisting">
-unsigned nbins, i;
-size_t mib[4];
-size_t len, miblen;
-
-len = sizeof(nbins);
-mallctl("arenas.nbins", &amp;nbins, &amp;len, NULL, 0);
-
-miblen = 4;
-mallctlnametomib("arenas.bin.0.size", mib, &amp;miblen);
-for (i = 0; i &lt; nbins; i++) {
- size_t bin_size;
-
- mib[2] = i;
- len = sizeof(bin_size);
- mallctlbymib(mib, miblen, &amp;bin_size, &amp;len, NULL, 0);
- /* Do something with bin_size... */
-}</pre><p>The <code class="function">malloc_stats_print</code>(<em class="parameter"><code></code></em>) function
- writes human-readable summary statistics via the
- <em class="parameter"><code>write_cb</code></em> callback function pointer and
- <em class="parameter"><code>cbopaque</code></em> data passed to
- <em class="parameter"><code>write_cb</code></em>, or
- <code class="function">malloc_message</code>(<em class="parameter"><code></code></em>) if
- <em class="parameter"><code>write_cb</code></em> is <code class="constant">NULL</code>. This
- function can be called repeatedly. General information that never
- changes during execution can be omitted by specifying "g" as a character
- within the <em class="parameter"><code>opts</code></em> string. Note that
- <code class="function">malloc_message</code>(<em class="parameter"><code></code></em>) uses the
- <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions internally, so
- inconsistent statistics can be reported if multiple threads use these
- functions simultaneously. If <code class="option">--enable-stats</code> is
- specified during configuration, &#8220;m&#8221; and &#8220;a&#8221; can
- be specified to omit merged arena and per arena statistics, respectively;
- &#8220;b&#8221; and &#8220;l&#8221; can be specified to omit per size
- class statistics for bins and large objects, respectively. Unrecognized
- characters are silently ignored. Note that thread caching may prevent
- some statistics from being completely up to date, since extra locking
- would be required to merge counters that track thread cache operations.
- </p><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
- returns the usable size of the allocation pointed to by
- <em class="parameter"><code>ptr</code></em>. The return value may be larger than the size
- that was requested during allocation. The
- <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function is not a
- mechanism for in-place <code class="function">realloc</code>(<em class="parameter"><code></code></em>); rather
- it is provided solely as a tool for introspection purposes. Any
- discrepancy between the requested allocation size and the size reported
- by <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) should not be
- depended on, since such behavior is entirely implementation-dependent.
- </p></div><div class="refsect2"><a name="idm316388574208"></a><h3>Experimental API</h3><p>The experimental API is subject to change or removal without regard
- for backward compatibility. If <code class="option">--disable-experimental</code>
- is specified during configuration, the experimental API is
- omitted.</p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions all have a
- <em class="parameter"><code>flags</code></em> argument that can be used to specify
- options. The functions only check the options that are contextually
- relevant. Use bitwise or (<code class="code">|</code>) operations to
- specify one or more of the following:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="constant">ALLOCM_LG_ALIGN(<em class="parameter"><code>la</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <code class="code">(1 &lt;&lt;
- <em class="parameter"><code>la</code></em>)</code>. This macro does not validate
- that <em class="parameter"><code>la</code></em> is within the valid
- range.</p></dd><dt><span class="term"><code class="constant">ALLOCM_ALIGN(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Align the memory allocation to start at an address
- that is a multiple of <em class="parameter"><code>a</code></em>, where
- <em class="parameter"><code>a</code></em> is a power of two. This macro does not
- validate that <em class="parameter"><code>a</code></em> is a power of 2.
- </p></dd><dt><span class="term"><code class="constant">ALLOCM_ZERO</code></span></dt><dd><p>Initialize newly allocated memory to contain zero
- bytes. In the growing reallocation case, the real size prior to
- reallocation defines the boundary between untouched bytes and those
- that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</p></dd><dt><span class="term"><code class="constant">ALLOCM_NO_MOVE</code></span></dt><dd><p>For reallocation, fail rather than moving the
- object. This constraint can apply to both growth and
- shrinkage.</p></dd><dt><span class="term"><code class="constant">ALLOCM_ARENA(<em class="parameter"><code>a</code></em>)
- </code></span></dt><dd><p>Use the arena specified by the index
- <em class="parameter"><code>a</code></em> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <em class="parameter"><code>a</code></em>
- specifies an arena index in the valid range.</p></dd></dl></div><p>
- </p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function allocates at
- least <em class="parameter"><code>size</code></em> bytes of memory, sets
- <em class="parameter"><code>*ptr</code></em> to the base address of the allocation, and
- sets <em class="parameter"><code>*rsize</code></em> to the real size of the allocation if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>. Behavior
- is undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or
- if request size overflows due to size class and/or alignment
- constraints.</p><p>The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function resizes the
- allocation at <em class="parameter"><code>*ptr</code></em> to be at least
- <em class="parameter"><code>size</code></em> bytes, sets <em class="parameter"><code>*ptr</code></em> to
- the base address of the allocation if it moved, and sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>. If
- <em class="parameter"><code>extra</code></em> is non-zero, an attempt is made to resize
- the allocation to be at least <code class="code">(<em class="parameter"><code>size</code></em> +
- <em class="parameter"><code>extra</code></em>)</code> bytes, though inability to allocate
- the extra byte(s) will not by itself result in failure. Behavior is
- undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, if
- request size overflows due to size class and/or alignment constraints, or
- if <code class="code">(<em class="parameter"><code>size</code></em> +
- <em class="parameter"><code>extra</code></em> &gt;
- <code class="constant">SIZE_T_MAX</code>)</code>.</p><p>The <code class="function">sallocm</code>(<em class="parameter"><code></code></em>) function sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation.</p><p>The <code class="function">dallocm</code>(<em class="parameter"><code></code></em>) function causes the
- memory referenced by <em class="parameter"><code>ptr</code></em> to be made available for
- future allocations.</p><p>The <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) function allocates no
- memory, but it performs the same size computation as the
- <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function, and if
- <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code> it sets
- <em class="parameter"><code>*rsize</code></em> to the real size of the allocation that
- would result from the equivalent <code class="function">allocm</code>(<em class="parameter"><code></code></em>)
- function call. Behavior is undefined if <em class="parameter"><code>size</code></em> is
- <code class="constant">0</code>, or if request size overflows due to size class
- and/or alignment constraints.</p></div></div><div class="refsect1"><a name="tuning"></a><h2>TUNING</h2><p>Once, when the first call is made to one of the memory allocation
- routines, the allocator initializes its internals based in part on various
- options that can be specified at compile- or run-time.</p><p>The string pointed to by the global variable
- <code class="varname">malloc_conf</code>, the &#8220;name&#8221; of the file
- referenced by the symbolic link named <code class="filename">/etc/malloc.conf</code>, and the value of the
- environment variable <code class="envar">MALLOC_CONF</code>, will be interpreted, in
- that order, from left to right as options. Note that
- <code class="varname">malloc_conf</code> may be read before
- <code class="function">main</code>(<em class="parameter"><code></code></em>) is entered, so the declaration of
- <code class="varname">malloc_conf</code> should specify an initializer that contains
- the final value to be read by jemalloc. <code class="varname">malloc_conf</code> is
- a compile-time setting, whereas <code class="filename">/etc/malloc.conf</code> and <code class="envar">MALLOC_CONF</code>
- can be safely set any time prior to program invocation.</p><p>An options string is a comma-separated list of option:value pairs.
- There is one key corresponding to each <a class="link" href="#opt.abort">
- "<code class="mallctl">opt.*</code>"
- </a> mallctl (see the <a class="xref" href="#mallctl_namespace" title="MALLCTL NAMESPACE">MALLCTL NAMESPACE</a> section for options
- documentation). For example, <code class="literal">abort:true,narenas:1</code> sets
- the <a class="link" href="#opt.abort">
- "<code class="mallctl">opt.abort</code>"
- </a> and <a class="link" href="#opt.narenas">
- "<code class="mallctl">opt.narenas</code>"
- </a> options. Some
- options have boolean values (true/false), others have integer values (base
- 8, 10, or 16, depending on prefix), and yet others have raw string
- values.</p></div><div class="refsect1"><a name="implementation_notes"></a><h2>IMPLEMENTATION NOTES</h2><p>Traditionally, allocators have used
- <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> to obtain memory, which is
- suboptimal for several reasons, including race conditions, increased
- fragmentation, and artificial limitations on maximum usable memory. If
- <code class="option">--enable-dss</code> is specified during configuration, this
- allocator uses both <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> and
- <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>, in that order of preference;
- otherwise only <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> is used.</p><p>This allocator uses multiple arenas in order to reduce lock
- contention for threaded programs on multi-processor systems. This works
- well with regard to threading scalability, but incurs some costs. There is
- a small fixed per-arena overhead, and additionally, arenas manage memory
- completely independently of each other, which means a small fixed increase
- in overall memory fragmentation. These overheads are not generally an
- issue, given the number of arenas normally used. Note that using
- substantially more arenas than the default is not likely to improve
- performance, mainly due to reduced cache performance. However, it may make
- sense to reduce the number of arenas if an application does not make much
- use of the allocation functions.</p><p>In addition to multiple arenas, unless
- <code class="option">--disable-tcache</code> is specified during configuration, this
- allocator supports thread-specific caching for small and large objects, in
- order to make it possible to completely avoid synchronization for most
- allocation requests. Such caching allows very fast allocation in the
- common case, but it increases memory usage and fragmentation, since a
- bounded number of objects can remain allocated in each thread cache.</p><p>Memory is conceptually broken into equal-sized chunks, where the
- chunk size is a power of two that is greater than the page size. Chunks
- are always aligned to multiples of the chunk size. This alignment makes it
- possible to find metadata for user objects very quickly.</p><p>User objects are broken into three categories according to size:
- small, large, and huge. Small objects are smaller than one page. Large
- objects are smaller than the chunk size. Huge objects are a multiple of
- the chunk size. Small and large objects are managed by arenas; huge
- objects are managed separately in a single data structure that is shared by
- all threads. Huge objects are used by applications infrequently enough
- that this single data structure is not a scalability issue.</p><p>Each chunk that is managed by an arena tracks its contents as runs of
- contiguous pages (unused, backing a set of small objects, or backing one
- large object). The combination of chunk alignment and chunk page maps
- makes it possible to determine all metadata regarding small and large
- allocations in constant time.</p><p>Small objects are managed in groups by page runs. Each run maintains
- a frontier and free list to track which regions are in use. Allocation
- requests that are no more than half the quantum (8 or 16, depending on
- architecture) are rounded up to the nearest power of two that is at least
- <code class="code">sizeof(<span class="type">double</span>)</code>. All other small
- object size classes are multiples of the quantum, spaced such that internal
- fragmentation is limited to approximately 25% for all but the smallest size
- classes. Allocation requests that are larger than the maximum small size
- class, but small enough to fit in an arena-managed chunk (see the <a class="link" href="#opt.lg_chunk">
- "<code class="mallctl">opt.lg_chunk</code>"
- </a> option), are
- rounded up to the nearest run size. Allocation requests that are too large
- to fit in an arena-managed chunk are rounded up to the nearest multiple of
- the chunk size.</p><p>Allocations are packed tightly together, which can be an issue for
- multi-threaded applications. If you need to assure that allocations do not
- suffer from cacheline sharing, round your allocation requests up to the
- nearest multiple of the cacheline size, or specify cacheline alignment when
- allocating.</p><p>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
- system, the size classes in each category are as shown in <a class="xref" href="#size_classes" title="Table 1. Size classes">Table 1</a>.</p><div class="table"><a name="size_classes"></a><p class="title"><b>Table 1. Size classes</b></p><div class="table-contents"><table summary="Size classes" border="1"><colgroup><col align="left" class="c1"><col align="right" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Category</th><th align="right">Spacing</th><th align="left">Size</th></tr></thead><tbody><tr><td rowspan="7" align="left">Small</td><td align="right">lg</td><td align="left">[8]</td></tr><tr><td align="right">16</td><td align="left">[16, 32, 48, ..., 128]</td></tr><tr><td align="right">32</td><td align="left">[160, 192, 224, 256]</td></tr><tr><td align="right">64</td><td align="left">[320, 384, 448, 512]</td></tr><tr><td align="right">128</td><td align="left">[640, 768, 896, 1024]</td></tr><tr><td align="right">256</td><td align="left">[1280, 1536, 1792, 2048]</td></tr><tr><td align="right">512</td><td align="left">[2560, 3072, 3584]</td></tr><tr><td align="left">Large</td><td align="right">4 KiB</td><td align="left">[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</td></tr><tr><td align="left">Huge</td><td align="right">4 MiB</td><td align="left">[4 MiB, 8 MiB, 12 MiB, ...]</td></tr></tbody></table></div></div><br class="table-break"></div><div class="refsect1"><a name="mallctl_namespace"></a><h2>MALLCTL NAMESPACE</h2><p>The following names are defined in the namespace accessible via the
- <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions. Value types are
- specified in parentheses, their readable/writable statuses are encoded as
- <code class="literal">rw</code>, <code class="literal">r-</code>, <code class="literal">-w</code>, or
- <code class="literal">--</code>, and required build configuration flags follow, if
- any. A name element encoded as <code class="literal">&lt;i&gt;</code> or
- <code class="literal">&lt;j&gt;</code> indicates an integer component, where the
- integer varies from 0 to some upper value that must be determined via
- introspection. In the case of
- "<code class="mallctl">stats.arenas.&lt;i&gt;.*</code>"
- ,
- <code class="literal">&lt;i&gt;</code> equal to <a class="link" href="#arenas.narenas">
- "<code class="mallctl">arenas.narenas</code>"
- </a> can be
- used to access the summation of statistics from all arenas. Take special
- note of the <a class="link" href="#epoch">
- "<code class="mallctl">epoch</code>"
- </a> mallctl,
- which controls refreshing of cached dynamic statistics.</p><div class="variablelist"><dl class="variablelist"><dt><a name="version"></a><span class="term">
-
- "<code class="mallctl">version</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Return the jemalloc version string.</p></dd><dt><a name="epoch"></a><span class="term">
-
- "<code class="mallctl">epoch</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">rw</code>
- </span></dt><dd><p>If a value is passed in, refresh the data from which
- the <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions report values,
- and increment the epoch. Return the current epoch. This is useful for
- detecting whether another thread caused a refresh.</p></dd><dt><a name="config.debug"></a><span class="term">
-
- "<code class="mallctl">config.debug</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-debug</code> was specified during
- build configuration.</p></dd><dt><a name="config.dss"></a><span class="term">
-
- "<code class="mallctl">config.dss</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-dss</code> was specified during
- build configuration.</p></dd><dt><a name="config.fill"></a><span class="term">
-
- "<code class="mallctl">config.fill</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-fill</code> was specified during
- build configuration.</p></dd><dt><a name="config.lazy_lock"></a><span class="term">
-
- "<code class="mallctl">config.lazy_lock</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-lazy-lock</code> was specified
- during build configuration.</p></dd><dt><a name="config.mremap"></a><span class="term">
-
- "<code class="mallctl">config.mremap</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-mremap</code> was specified during
- build configuration.</p></dd><dt><a name="config.munmap"></a><span class="term">
-
- "<code class="mallctl">config.munmap</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-munmap</code> was specified during
- build configuration.</p></dd><dt><a name="config.prof"></a><span class="term">
-
- "<code class="mallctl">config.prof</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-prof</code> was specified during
- build configuration.</p></dd><dt><a name="config.prof_libgcc"></a><span class="term">
-
- "<code class="mallctl">config.prof_libgcc</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--disable-prof-libgcc</code> was not
- specified during build configuration.</p></dd><dt><a name="config.prof_libunwind"></a><span class="term">
-
- "<code class="mallctl">config.prof_libunwind</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-prof-libunwind</code> was specified
- during build configuration.</p></dd><dt><a name="config.stats"></a><span class="term">
-
- "<code class="mallctl">config.stats</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-stats</code> was specified during
- build configuration.</p></dd><dt><a name="config.tcache"></a><span class="term">
-
- "<code class="mallctl">config.tcache</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--disable-tcache</code> was not specified
- during build configuration.</p></dd><dt><a name="config.tls"></a><span class="term">
-
- "<code class="mallctl">config.tls</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--disable-tls</code> was not specified during
- build configuration.</p></dd><dt><a name="config.utrace"></a><span class="term">
-
- "<code class="mallctl">config.utrace</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-utrace</code> was specified during
- build configuration.</p></dd><dt><a name="config.valgrind"></a><span class="term">
-
- "<code class="mallctl">config.valgrind</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-valgrind</code> was specified during
- build configuration.</p></dd><dt><a name="config.xmalloc"></a><span class="term">
-
- "<code class="mallctl">config.xmalloc</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p><code class="option">--enable-xmalloc</code> was specified during
- build configuration.</p></dd><dt><a name="opt.abort"></a><span class="term">
-
- "<code class="mallctl">opt.abort</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Abort-on-warning enabled/disabled. If true, most
- warnings are fatal. The process will call
- <span class="citerefentry"><span class="refentrytitle">abort</span>(3)</span> in these cases. This option is
- disabled by default unless <code class="option">--enable-debug</code> is
- specified during configuration, in which case it is enabled by default.
- </p></dd><dt><a name="opt.dss"></a><span class="term">
-
- "<code class="mallctl">opt.dss</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>dss (<span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>) allocation precedence as
- related to <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> allocation. The following
- settings are supported: &#8220;disabled&#8221;, &#8220;primary&#8221;,
- and &#8220;secondary&#8221;. The default is &#8220;secondary&#8221; if
- <a class="link" href="#config.dss">
- "<code class="mallctl">config.dss</code>"
- </a> is
- true, &#8220;disabled&#8221; otherwise.
- </p></dd><dt><a name="opt.lg_chunk"></a><span class="term">
-
- "<code class="mallctl">opt.lg_chunk</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Virtual memory chunk size (log base 2). If a chunk
- size outside the supported size range is specified, the size is
- silently clipped to the minimum/maximum supported size. The default
- chunk size is 4 MiB (2^22).
- </p></dd><dt><a name="opt.narenas"></a><span class="term">
-
- "<code class="mallctl">opt.narenas</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Maximum number of arenas to use for automatic
- multiplexing of threads and arenas. The default is four times the
- number of CPUs, or one if there is a single CPU.</p></dd><dt><a name="opt.lg_dirty_mult"></a><span class="term">
-
- "<code class="mallctl">opt.lg_dirty_mult</code>"
-
- (<span class="type">ssize_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Per-arena minimum ratio (log base 2) of active to dirty
- pages. Some dirty unused pages may be allowed to accumulate, within
- the limit set by the ratio (or one chunk worth of dirty pages,
- whichever is greater), before informing the kernel about some of those
- pages via <span class="citerefentry"><span class="refentrytitle">madvise</span>(2)</span> or a similar system call. This
- provides the kernel with sufficient information to recycle dirty pages
- if physical memory becomes scarce and the pages remain unused. The
- default minimum ratio is 8:1 (2^3:1); an option value of -1 will
- disable dirty page purging.</p></dd><dt><a name="opt.stats_print"></a><span class="term">
-
- "<code class="mallctl">opt.stats_print</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Enable/disable statistics printing at exit. If
- enabled, the <code class="function">malloc_stats_print</code>(<em class="parameter"><code></code></em>)
- function is called at program exit via an
- <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function. If
- <code class="option">--enable-stats</code> is specified during configuration, this
- has the potential to cause deadlock for a multi-threaded process that
- exits while one or more threads are executing in the memory allocation
- functions. Therefore, this option should only be used with care; it is
- primarily intended as a performance tuning aid during application
- development. This option is disabled by default.</p></dd><dt><a name="opt.junk"></a><span class="term">
-
- "<code class="mallctl">opt.junk</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-fill</code>]
- </span></dt><dd><p>Junk filling enabled/disabled. If enabled, each byte
- of uninitialized allocated memory will be initialized to
- <code class="literal">0xa5</code>. All deallocated memory will be initialized to
- <code class="literal">0x5a</code>. This is intended for debugging and will
- impact performance negatively. This option is disabled by default
- unless <code class="option">--enable-debug</code> is specified during
- configuration, in which case it is enabled by default unless running
- inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>.</p></dd><dt><a name="opt.quarantine"></a><span class="term">
-
- "<code class="mallctl">opt.quarantine</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-fill</code>]
- </span></dt><dd><p>Per thread quarantine size in bytes. If non-zero, each
- thread maintains a FIFO object quarantine that stores up to the
- specified number of bytes of memory. The quarantined memory is not
- freed until it is released from quarantine, though it is immediately
- junk-filled if the <a class="link" href="#opt.junk">
- "<code class="mallctl">opt.junk</code>"
- </a> option is
- enabled. This feature is of particular use in combination with <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>, which can detect attempts
- to access quarantined objects. This is intended for debugging and will
- impact performance negatively. The default quarantine size is 0 unless
- running inside Valgrind, in which case the default is 16
- MiB.</p></dd><dt><a name="opt.redzone"></a><span class="term">
-
- "<code class="mallctl">opt.redzone</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-fill</code>]
- </span></dt><dd><p>Redzones enabled/disabled. If enabled, small
- allocations have redzones before and after them. Furthermore, if the
- <a class="link" href="#opt.junk">
- "<code class="mallctl">opt.junk</code>"
- </a> option is
- enabled, the redzones are checked for corruption during deallocation.
- However, the primary intended purpose of this feature is to be used in
- combination with <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>,
- which needs redzones in order to do effective buffer overflow/underflow
- detection. This option is intended for debugging and will impact
- performance negatively. This option is disabled by
- default unless running inside Valgrind.</p></dd><dt><a name="opt.zero"></a><span class="term">
-
- "<code class="mallctl">opt.zero</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-fill</code>]
- </span></dt><dd><p>Zero filling enabled/disabled. If enabled, each byte
- of uninitialized allocated memory will be initialized to 0. Note that
- this initialization only happens once for each byte, so
- <code class="function">realloc</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocx</code>(<em class="parameter"><code></code></em>) and
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) calls do not zero memory that
- was previously allocated. This is intended for debugging and will
- impact performance negatively. This option is disabled by default.
- </p></dd><dt><a name="opt.utrace"></a><span class="term">
-
- "<code class="mallctl">opt.utrace</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-utrace</code>]
- </span></dt><dd><p>Allocation tracing based on
- <span class="citerefentry"><span class="refentrytitle">utrace</span>(2)</span> enabled/disabled. This option
- is disabled by default.</p></dd><dt><a name="opt.valgrind"></a><span class="term">
-
- "<code class="mallctl">opt.valgrind</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-valgrind</code>]
- </span></dt><dd><p><a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>
- support enabled/disabled. This option is vestigal because jemalloc
- auto-detects whether it is running inside Valgrind. This option is
- disabled by default, unless running inside Valgrind.</p></dd><dt><a name="opt.xmalloc"></a><span class="term">
-
- "<code class="mallctl">opt.xmalloc</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-xmalloc</code>]
- </span></dt><dd><p>Abort-on-out-of-memory enabled/disabled. If enabled,
- rather than returning failure for any allocation function, display a
- diagnostic message on <code class="constant">STDERR_FILENO</code> and cause the
- program to drop core (using
- <span class="citerefentry"><span class="refentrytitle">abort</span>(3)</span>). If an application is
- designed to depend on this behavior, set the option at compile time by
- including the following in the source code:
- </p><pre class="programlisting">
-malloc_conf = "xmalloc:true";</pre><p>
- This option is disabled by default.</p></dd><dt><a name="opt.tcache"></a><span class="term">
-
- "<code class="mallctl">opt.tcache</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Thread-specific caching enabled/disabled. When there
- are multiple threads, each thread uses a thread-specific cache for
- objects up to a certain size. Thread-specific caching allows many
- allocations to be satisfied without performing any thread
- synchronization, at the cost of increased memory use. See the
- <a class="link" href="#opt.lg_tcache_max">
- "<code class="mallctl">opt.lg_tcache_max</code>"
- </a>
- option for related tuning information. This option is enabled by
- default unless running inside <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>.</p></dd><dt><a name="opt.lg_tcache_max"></a><span class="term">
-
- "<code class="mallctl">opt.lg_tcache_max</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Maximum size class (log base 2) to cache in the
- thread-specific cache. At a minimum, all small size classes are
- cached, and at a maximum all large size classes are cached. The
- default maximum is 32 KiB (2^15).</p></dd><dt><a name="opt.prof"></a><span class="term">
-
- "<code class="mallctl">opt.prof</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Memory profiling enabled/disabled. If enabled, profile
- memory allocation activity. See the <a class="link" href="#opt.prof_active">
- "<code class="mallctl">opt.prof_active</code>"
- </a>
- option for on-the-fly activation/deactivation. See the <a class="link" href="#opt.lg_prof_sample">
- "<code class="mallctl">opt.lg_prof_sample</code>"
- </a>
- option for probabilistic sampling control. See the <a class="link" href="#opt.prof_accum">
- "<code class="mallctl">opt.prof_accum</code>"
- </a>
- option for control of cumulative sample reporting. See the <a class="link" href="#opt.lg_prof_interval">
- "<code class="mallctl">opt.lg_prof_interval</code>"
- </a>
- option for information on interval-triggered profile dumping, the <a class="link" href="#opt.prof_gdump">
- "<code class="mallctl">opt.prof_gdump</code>"
- </a>
- option for information on high-water-triggered profile dumping, and the
- <a class="link" href="#opt.prof_final">
- "<code class="mallctl">opt.prof_final</code>"
- </a>
- option for final profile dumping. Profile output is compatible with
- the included <span class="command"><strong>pprof</strong></span> Perl script, which originates
- from the <a class="ulink" href="http://code.google.com/p/gperftools/" target="_top">gperftools
- package</a>.</p></dd><dt><a name="opt.prof_prefix"></a><span class="term">
-
- "<code class="mallctl">opt.prof_prefix</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Filename prefix for profile dumps. If the prefix is
- set to the empty string, no automatic dumps will occur; this is
- primarily useful for disabling the automatic final heap dump (which
- also disables leak reporting, if enabled). The default prefix is
- <code class="filename">jeprof</code>.</p></dd><dt><a name="opt.prof_active"></a><span class="term">
-
- "<code class="mallctl">opt.prof_active</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">rw</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Profiling activated/deactivated. This is a secondary
- control mechanism that makes it possible to start the application with
- profiling enabled (see the <a class="link" href="#opt.prof">
- "<code class="mallctl">opt.prof</code>"
- </a> option) but
- inactive, then toggle profiling at any time during program execution
- with the <a class="link" href="#prof.active">
- "<code class="mallctl">prof.active</code>"
- </a> mallctl.
- This option is enabled by default.</p></dd><dt><a name="opt.lg_prof_sample"></a><span class="term">
-
- "<code class="mallctl">opt.lg_prof_sample</code>"
-
- (<span class="type">ssize_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Average interval (log base 2) between allocation
- samples, as measured in bytes of allocation activity. Increasing the
- sampling interval decreases profile fidelity, but also decreases the
- computational overhead. The default sample interval is 512 KiB (2^19
- B).</p></dd><dt><a name="opt.prof_accum"></a><span class="term">
-
- "<code class="mallctl">opt.prof_accum</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Reporting of cumulative object/byte counts in profile
- dumps enabled/disabled. If this option is enabled, every unique
- backtrace must be stored for the duration of execution. Depending on
- the application, this can impose a large memory overhead, and the
- cumulative counts are not always of interest. This option is disabled
- by default.</p></dd><dt><a name="opt.lg_prof_interval"></a><span class="term">
-
- "<code class="mallctl">opt.lg_prof_interval</code>"
-
- (<span class="type">ssize_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Average interval (log base 2) between memory profile
- dumps, as measured in bytes of allocation activity. The actual
- interval between dumps may be sporadic because decentralized allocation
- counters are used to avoid synchronization bottlenecks. Profiles are
- dumped to files named according to the pattern
- <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</code>,
- where <code class="literal">&lt;prefix&gt;</code> is controlled by the
- <a class="link" href="#opt.prof_prefix">
- "<code class="mallctl">opt.prof_prefix</code>"
- </a>
- option. By default, interval-triggered profile dumping is disabled
- (encoded as -1).
- </p></dd><dt><a name="opt.prof_gdump"></a><span class="term">
-
- "<code class="mallctl">opt.prof_gdump</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Trigger a memory profile dump every time the total
- virtual memory exceeds the previous maximum. Profiles are dumped to
- files named according to the pattern
- <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</code>,
- where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
- "<code class="mallctl">opt.prof_prefix</code>"
- </a>
- option. This option is disabled by default.</p></dd><dt><a name="opt.prof_final"></a><span class="term">
-
- "<code class="mallctl">opt.prof_final</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Use an
- <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function to dump final memory
- usage to a file named according to the pattern
- <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</code>,
- where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
- "<code class="mallctl">opt.prof_prefix</code>"
- </a>
- option. This option is enabled by default.</p></dd><dt><a name="opt.prof_leak"></a><span class="term">
-
- "<code class="mallctl">opt.prof_leak</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Leak reporting enabled/disabled. If enabled, use an
- <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function to report memory leaks
- detected by allocation sampling. See the
- <a class="link" href="#opt.prof">
- "<code class="mallctl">opt.prof</code>"
- </a> option for
- information on analyzing heap profile output. This option is disabled
- by default.</p></dd><dt><a name="thread.arena"></a><span class="term">
-
- "<code class="mallctl">thread.arena</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">rw</code>
- </span></dt><dd><p>Get or set the arena associated with the calling
- thread. If the specified arena was not initialized beforehand (see the
- <a class="link" href="#arenas.initialized">
- "<code class="mallctl">arenas.initialized</code>"
- </a>
- mallctl), it will be automatically initialized as a side effect of
- calling this interface.</p></dd><dt><a name="thread.allocated"></a><span class="term">
-
- "<code class="mallctl">thread.allocated</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Get the total number of bytes ever allocated by the
- calling thread. This counter has the potential to wrap around; it is
- up to the application to appropriately interpret the counter in such
- cases.</p></dd><dt><a name="thread.allocatedp"></a><span class="term">
-
- "<code class="mallctl">thread.allocatedp</code>"
-
- (<span class="type">uint64_t *</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Get a pointer to the the value that is returned by the
- <a class="link" href="#thread.allocated">
- "<code class="mallctl">thread.allocated</code>"
- </a>
- mallctl. This is useful for avoiding the overhead of repeated
- <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) calls.</p></dd><dt><a name="thread.deallocated"></a><span class="term">
-
- "<code class="mallctl">thread.deallocated</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Get the total number of bytes ever deallocated by the
- calling thread. This counter has the potential to wrap around; it is
- up to the application to appropriately interpret the counter in such
- cases.</p></dd><dt><a name="thread.deallocatedp"></a><span class="term">
-
- "<code class="mallctl">thread.deallocatedp</code>"
-
- (<span class="type">uint64_t *</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Get a pointer to the the value that is returned by the
- <a class="link" href="#thread.deallocated">
- "<code class="mallctl">thread.deallocated</code>"
- </a>
- mallctl. This is useful for avoiding the overhead of repeated
- <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) calls.</p></dd><dt><a name="thread.tcache.enabled"></a><span class="term">
-
- "<code class="mallctl">thread.tcache.enabled</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">rw</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Enable/disable calling thread's tcache. The tcache is
- implicitly flushed as a side effect of becoming
- disabled (see
- "<code class="mallctl">thread.tcache.flush</code>"
- ).
- </p></dd><dt><a name="thread.tcache.flush"></a><span class="term">
-
- "<code class="mallctl">thread.tcache.flush</code>"
-
- (<span class="type">void</span>)
- <code class="literal">--</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Flush calling thread's tcache. This interface releases
- all cached objects and internal data structures associated with the
- calling thread's thread-specific cache. Ordinarily, this interface
- need not be called, since automatic periodic incremental garbage
- collection occurs, and the thread cache is automatically discarded when
- a thread exits. However, garbage collection is triggered by allocation
- activity, so it is possible for a thread that stops
- allocating/deallocating to retain its cache indefinitely, in which case
- the developer may find manual flushing useful.</p></dd><dt><a name="arena.i.purge"></a><span class="term">
-
- "<code class="mallctl">arena.&lt;i&gt;.purge</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">--</code>
- </span></dt><dd><p>Purge unused dirty pages for arena &lt;i&gt;, or for
- all arenas if &lt;i&gt; equals <a class="link" href="#arenas.narenas">
- "<code class="mallctl">arenas.narenas</code>"
- </a>.
- </p></dd><dt><a name="arena.i.dss"></a><span class="term">
-
- "<code class="mallctl">arena.&lt;i&gt;.dss</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">rw</code>
- </span></dt><dd><p>Set the precedence of dss allocation as related to mmap
- allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
- <a class="link" href="#arenas.narenas">
- "<code class="mallctl">arenas.narenas</code>"
- </a>. Note
- that even during huge allocation this setting is read from the arena
- that would be chosen for small or large allocation so that applications
- can depend on consistent dss versus mmap allocation regardless of
- allocation size. See <a class="link" href="#opt.dss">
- "<code class="mallctl">opt.dss</code>"
- </a> for supported
- settings.
- </p></dd><dt><a name="arenas.narenas"></a><span class="term">
-
- "<code class="mallctl">arenas.narenas</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Current limit on number of arenas.</p></dd><dt><a name="arenas.initialized"></a><span class="term">
-
- "<code class="mallctl">arenas.initialized</code>"
-
- (<span class="type">bool *</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>An array of <a class="link" href="#arenas.narenas">
- "<code class="mallctl">arenas.narenas</code>"
- </a>
- booleans. Each boolean indicates whether the corresponding arena is
- initialized.</p></dd><dt><a name="arenas.quantum"></a><span class="term">
-
- "<code class="mallctl">arenas.quantum</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Quantum size.</p></dd><dt><a name="arenas.page"></a><span class="term">
-
- "<code class="mallctl">arenas.page</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Page size.</p></dd><dt><a name="arenas.tcache_max"></a><span class="term">
-
- "<code class="mallctl">arenas.tcache_max</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Maximum thread-cached size class.</p></dd><dt><a name="arenas.nbins"></a><span class="term">
-
- "<code class="mallctl">arenas.nbins</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of bin size classes.</p></dd><dt><a name="arenas.nhbins"></a><span class="term">
-
- "<code class="mallctl">arenas.nhbins</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Total number of thread cache bin size
- classes.</p></dd><dt><a name="arenas.bin.i.size"></a><span class="term">
-
- "<code class="mallctl">arenas.bin.&lt;i&gt;.size</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Maximum size supported by size class.</p></dd><dt><a name="arenas.bin.i.nregs"></a><span class="term">
-
- "<code class="mallctl">arenas.bin.&lt;i&gt;.nregs</code>"
-
- (<span class="type">uint32_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of regions per page run.</p></dd><dt><a name="arenas.bin.i.run_size"></a><span class="term">
-
- "<code class="mallctl">arenas.bin.&lt;i&gt;.run_size</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of bytes per page run.</p></dd><dt><a name="arenas.nlruns"></a><span class="term">
-
- "<code class="mallctl">arenas.nlruns</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Total number of large size classes.</p></dd><dt><a name="arenas.lrun.i.size"></a><span class="term">
-
- "<code class="mallctl">arenas.lrun.&lt;i&gt;.size</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Maximum size supported by this large size
- class.</p></dd><dt><a name="arenas.purge"></a><span class="term">
-
- "<code class="mallctl">arenas.purge</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">-w</code>
- </span></dt><dd><p>Purge unused dirty pages for the specified arena, or
- for all arenas if none is specified.</p></dd><dt><a name="arenas.extend"></a><span class="term">
-
- "<code class="mallctl">arenas.extend</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Extend the array of arenas by appending a new arena,
- and returning the new arena index.</p></dd><dt><a name="prof.active"></a><span class="term">
-
- "<code class="mallctl">prof.active</code>"
-
- (<span class="type">bool</span>)
- <code class="literal">rw</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Control whether sampling is currently active. See the
- <a class="link" href="#opt.prof_active">
- "<code class="mallctl">opt.prof_active</code>"
- </a>
- option for additional information.
- </p></dd><dt><a name="prof.dump"></a><span class="term">
-
- "<code class="mallctl">prof.dump</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">-w</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Dump a memory profile to the specified file, or if NULL
- is specified, to a file according to the pattern
- <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</code>,
- where <code class="literal">&lt;prefix&gt;</code> is controlled by the
- <a class="link" href="#opt.prof_prefix">
- "<code class="mallctl">opt.prof_prefix</code>"
- </a>
- option.</p></dd><dt><a name="prof.interval"></a><span class="term">
-
- "<code class="mallctl">prof.interval</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-prof</code>]
- </span></dt><dd><p>Average number of bytes allocated between
- inverval-based profile dumps. See the
- <a class="link" href="#opt.lg_prof_interval">
- "<code class="mallctl">opt.lg_prof_interval</code>"
- </a>
- option for additional information.</p></dd><dt><a name="stats.cactive"></a><span class="term">
-
- "<code class="mallctl">stats.cactive</code>"
-
- (<span class="type">size_t *</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Pointer to a counter that contains an approximate count
- of the current number of bytes in active pages. The estimate may be
- high, but never low, because each arena rounds up to the nearest
- multiple of the chunk size when computing its contribution to the
- counter. Note that the <a class="link" href="#epoch">
- "<code class="mallctl">epoch</code>"
- </a> mallctl has no bearing
- on this counter. Furthermore, counter consistency is maintained via
- atomic operations, so it is necessary to use an atomic operation in
- order to guarantee a consistent read when dereferencing the pointer.
- </p></dd><dt><a name="stats.allocated"></a><span class="term">
-
- "<code class="mallctl">stats.allocated</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of bytes allocated by the
- application.</p></dd><dt><a name="stats.active"></a><span class="term">
-
- "<code class="mallctl">stats.active</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of bytes in active pages allocated by the
- application. This is a multiple of the page size, and greater than or
- equal to <a class="link" href="#stats.allocated">
- "<code class="mallctl">stats.allocated</code>"
- </a>.
- This does not include <a class="link" href="#stats.arenas.i.pdirty">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.pdirty</code>"
- </a> and pages
- entirely devoted to allocator metadata.</p></dd><dt><a name="stats.mapped"></a><span class="term">
-
- "<code class="mallctl">stats.mapped</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of bytes in chunks mapped on behalf of the
- application. This is a multiple of the chunk size, and is at least as
- large as <a class="link" href="#stats.active">
- "<code class="mallctl">stats.active</code>"
- </a>. This
- does not include inactive chunks.</p></dd><dt><a name="stats.chunks.current"></a><span class="term">
-
- "<code class="mallctl">stats.chunks.current</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Total number of chunks actively mapped on behalf of the
- application. This does not include inactive chunks.
- </p></dd><dt><a name="stats.chunks.total"></a><span class="term">
-
- "<code class="mallctl">stats.chunks.total</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of chunks allocated.</p></dd><dt><a name="stats.chunks.high"></a><span class="term">
-
- "<code class="mallctl">stats.chunks.high</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Maximum number of active chunks at any time thus far.
- </p></dd><dt><a name="stats.huge.allocated"></a><span class="term">
-
- "<code class="mallctl">stats.huge.allocated</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of bytes currently allocated by huge objects.
- </p></dd><dt><a name="stats.huge.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.huge.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of huge allocation requests.
- </p></dd><dt><a name="stats.huge.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.huge.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of huge deallocation requests.
- </p></dd><dt><a name="stats.arenas.i.dss"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.dss</code>"
-
- (<span class="type">const char *</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>dss (<span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>) allocation precedence as
- related to <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> allocation. See <a class="link" href="#opt.dss">
- "<code class="mallctl">opt.dss</code>"
- </a> for details.
- </p></dd><dt><a name="stats.arenas.i.nthreads"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.nthreads</code>"
-
- (<span class="type">unsigned</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of threads currently assigned to
- arena.</p></dd><dt><a name="stats.arenas.i.pactive"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.pactive</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of pages in active runs.</p></dd><dt><a name="stats.arenas.i.pdirty"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.pdirty</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- </span></dt><dd><p>Number of pages within unused runs that are potentially
- dirty, and for which <code class="function">madvise</code>(<em class="parameter"><code>...</code></em>,
- <em class="parameter"><code><code class="constant">MADV_DONTNEED</code></code></em>) or
- similar has not been called.</p></dd><dt><a name="stats.arenas.i.mapped"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.mapped</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of mapped bytes.</p></dd><dt><a name="stats.arenas.i.npurge"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.npurge</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of dirty page purge sweeps performed.
- </p></dd><dt><a name="stats.arenas.i.nmadvise"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.nmadvise</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of <code class="function">madvise</code>(<em class="parameter"><code>...</code></em>,
- <em class="parameter"><code><code class="constant">MADV_DONTNEED</code></code></em>) or
- similar calls made to purge dirty pages.</p></dd><dt><a name="stats.arenas.i.purged"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.purged</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of pages purged.</p></dd><dt><a name="stats.arenas.i.small.allocated"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.small.allocated</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of bytes currently allocated by small objects.
- </p></dd><dt><a name="stats.arenas.i.small.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.small.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocation requests served by
- small bins.</p></dd><dt><a name="stats.arenas.i.small.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.small.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of small objects returned to bins.
- </p></dd><dt><a name="stats.arenas.i.small.nrequests"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.small.nrequests</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of small allocation requests.
- </p></dd><dt><a name="stats.arenas.i.large.allocated"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.large.allocated</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Number of bytes currently allocated by large objects.
- </p></dd><dt><a name="stats.arenas.i.large.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.large.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of large allocation requests served
- directly by the arena.</p></dd><dt><a name="stats.arenas.i.large.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.large.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of large deallocation requests served
- directly by the arena.</p></dd><dt><a name="stats.arenas.i.large.nrequests"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.large.nrequests</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of large allocation requests.
- </p></dd><dt><a name="stats.arenas.i.bins.j.allocated"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.allocated</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Current number of bytes allocated by
- bin.</p></dd><dt><a name="stats.arenas.i.bins.j.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocations served by bin.
- </p></dd><dt><a name="stats.arenas.i.bins.j.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocations returned to bin.
- </p></dd><dt><a name="stats.arenas.i.bins.j.nrequests"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nrequests</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocation
- requests.</p></dd><dt><a name="stats.arenas.i.bins.j.nfills"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code> <code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Cumulative number of tcache fills.</p></dd><dt><a name="stats.arenas.i.bins.j.nflushes"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nflushes</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code> <code class="option">--enable-tcache</code>]
- </span></dt><dd><p>Cumulative number of tcache flushes.</p></dd><dt><a name="stats.arenas.i.bins.j.nruns"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nruns</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of runs created.</p></dd><dt><a name="stats.arenas.i.bins.j.nreruns"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreruns</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of times the current run from which
- to allocate changed.</p></dd><dt><a name="stats.arenas.i.bins.j.curruns"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Current number of runs.</p></dd><dt><a name="stats.arenas.i.lruns.j.nmalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nmalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocation requests for this size
- class served directly by the arena.</p></dd><dt><a name="stats.arenas.i.lruns.j.ndalloc"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.ndalloc</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of deallocation requests for this
- size class served directly by the arena.</p></dd><dt><a name="stats.arenas.i.lruns.j.nrequests"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nrequests</code>"
-
- (<span class="type">uint64_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Cumulative number of allocation requests for this size
- class.</p></dd><dt><a name="stats.arenas.i.lruns.j.curruns"></a><span class="term">
-
- "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</code>"
-
- (<span class="type">size_t</span>)
- <code class="literal">r-</code>
- [<code class="option">--enable-stats</code>]
- </span></dt><dd><p>Current number of runs for this size class.
- </p></dd></dl></div></div><div class="refsect1"><a name="debugging_malloc_problems"></a><h2>DEBUGGING MALLOC PROBLEMS</h2><p>When debugging, it is a good idea to configure/build jemalloc with
- the <code class="option">--enable-debug</code> and <code class="option">--enable-fill</code>
- options, and recompile the program with suitable options and symbols for
- debugger support. When so configured, jemalloc incorporates a wide variety
- of run-time assertions that catch application errors such as double-free,
- write-after-free, etc.</p><p>Programs often accidentally depend on &#8220;uninitialized&#8221;
- memory actually being filled with zero bytes. Junk filling
- (see the <a class="link" href="#opt.junk">
- "<code class="mallctl">opt.junk</code>"
- </a>
- option) tends to expose such bugs in the form of obviously incorrect
- results and/or coredumps. Conversely, zero
- filling (see the <a class="link" href="#opt.zero">
- "<code class="mallctl">opt.zero</code>"
- </a> option) eliminates
- the symptoms of such bugs. Between these two options, it is usually
- possible to quickly detect, diagnose, and eliminate such bugs.</p><p>This implementation does not provide much detail about the problems
- it detects, because the performance impact for storing such information
- would be prohibitive. However, jemalloc does integrate with the most
- excellent <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a> tool if the
- <code class="option">--enable-valgrind</code> configuration option is enabled.</p></div><div class="refsect1"><a name="diagnostic_messages"></a><h2>DIAGNOSTIC MESSAGES</h2><p>If any of the memory allocation/deallocation functions detect an
- error or warning condition, a message will be printed to file descriptor
- <code class="constant">STDERR_FILENO</code>. Errors will result in the process
- dumping core. If the <a class="link" href="#opt.abort">
- "<code class="mallctl">opt.abort</code>"
- </a> option is set, most
- warnings are treated as errors.</p><p>The <code class="varname">malloc_message</code> variable allows the programmer
- to override the function which emits the text strings forming the errors
- and warnings if for some reason the <code class="constant">STDERR_FILENO</code> file
- descriptor is not suitable for this.
- <code class="function">malloc_message</code>(<em class="parameter"><code></code></em>) takes the
- <em class="parameter"><code>cbopaque</code></em> pointer argument that is
- <code class="constant">NULL</code> unless overridden by the arguments in a call to
- <code class="function">malloc_stats_print</code>(<em class="parameter"><code></code></em>), followed by a string
- pointer. Please note that doing anything which tries to allocate memory in
- this function is likely to result in a crash or deadlock.</p><p>All messages are prefixed by
- &#8220;<code class="computeroutput">&lt;jemalloc&gt;: </code>&#8221;.</p></div><div class="refsect1"><a name="return_values"></a><h2>RETURN VALUES</h2><div class="refsect2"><a name="idm316388028784"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) and
- <code class="function">calloc</code>(<em class="parameter"><code></code></em>) functions return a pointer to the
- allocated memory if successful; otherwise a <code class="constant">NULL</code>
- pointer is returned and <code class="varname">errno</code> is set to
- <span class="errorname">ENOMEM</span>.</p><p>The <code class="function">posix_memalign</code>(<em class="parameter"><code></code></em>) function
- returns the value 0 if successful; otherwise it returns an error value.
- The <code class="function">posix_memalign</code>(<em class="parameter"><code></code></em>) function will fail
- if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">EINVAL</span></span></dt><dd><p>The <em class="parameter"><code>alignment</code></em> parameter is
- not a power of 2 at least as large as
- <code class="code">sizeof(<span class="type">void *</span>)</code>.
- </p></dd><dt><span class="term"><span class="errorname">ENOMEM</span></span></dt><dd><p>Memory allocation error.</p></dd></dl></div><p>
- </p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function returns
- a pointer to the allocated memory if successful; otherwise a
- <code class="constant">NULL</code> pointer is returned and
- <code class="varname">errno</code> is set. The
- <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function will fail if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">EINVAL</span></span></dt><dd><p>The <em class="parameter"><code>alignment</code></em> parameter is
- not a power of 2.
- </p></dd><dt><span class="term"><span class="errorname">ENOMEM</span></span></dt><dd><p>Memory allocation error.</p></dd></dl></div><p>
- </p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function returns a
- pointer, possibly identical to <em class="parameter"><code>ptr</code></em>, to the
- allocated memory if successful; otherwise a <code class="constant">NULL</code>
- pointer is returned, and <code class="varname">errno</code> is set to
- <span class="errorname">ENOMEM</span> if the error was the result of an
- allocation failure. The <code class="function">realloc</code>(<em class="parameter"><code></code></em>)
- function always leaves the original buffer intact when an error occurs.
- </p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function returns no
- value.</p></div><div class="refsect2"><a name="idm316388003104"></a><h3>Non-standard API</h3><p>The <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) and
- <code class="function">rallocx</code>(<em class="parameter"><code></code></em>) functions return a pointer to
- the allocated memory if successful; otherwise a <code class="constant">NULL</code>
- pointer is returned to indicate insufficient contiguous memory was
- available to service the allocation request. </p><p>The <code class="function">xallocx</code>(<em class="parameter"><code></code></em>) function returns the
- real size of the resulting resized allocation pointed to by
- <em class="parameter"><code>ptr</code></em>, which is a value less than
- <em class="parameter"><code>size</code></em> if the allocation could not be adequately
- grown in place. </p><p>The <code class="function">sallocx</code>(<em class="parameter"><code></code></em>) function returns the
- real size of the allocation pointed to by <em class="parameter"><code>ptr</code></em>.
- </p><p>The <code class="function">nallocx</code>(<em class="parameter"><code></code></em>) returns the real size
- that would result from a successful equivalent
- <code class="function">mallocx</code>(<em class="parameter"><code></code></em>) function call, or zero if
- insufficient memory is available to perform the size computation. </p><p>The <code class="function">mallctl</code>(<em class="parameter"><code></code></em>),
- <code class="function">mallctlnametomib</code>(<em class="parameter"><code></code></em>), and
- <code class="function">mallctlbymib</code>(<em class="parameter"><code></code></em>) functions return 0 on
- success; otherwise they return an error value. The functions will fail
- if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">EINVAL</span></span></dt><dd><p><em class="parameter"><code>newp</code></em> is not
- <code class="constant">NULL</code>, and <em class="parameter"><code>newlen</code></em> is too
- large or too small. Alternatively, <em class="parameter"><code>*oldlenp</code></em>
- is too large or too small; in this case as much data as possible
- are read despite the error.</p></dd><dt><span class="term"><span class="errorname">ENOENT</span></span></dt><dd><p><em class="parameter"><code>name</code></em> or
- <em class="parameter"><code>mib</code></em> specifies an unknown/invalid
- value.</p></dd><dt><span class="term"><span class="errorname">EPERM</span></span></dt><dd><p>Attempt to read or write void value, or attempt to
- write read-only value.</p></dd><dt><span class="term"><span class="errorname">EAGAIN</span></span></dt><dd><p>A memory allocation failure
- occurred.</p></dd><dt><span class="term"><span class="errorname">EFAULT</span></span></dt><dd><p>An interface with side effects failed in some way
- not directly related to <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>)
- read/write processing.</p></dd></dl></div><p>
- </p><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
- returns the usable size of the allocation pointed to by
- <em class="parameter"><code>ptr</code></em>. </p></div><div class="refsect2"><a name="idm316387973360"></a><h3>Experimental API</h3><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions return
- <code class="constant">ALLOCM_SUCCESS</code> on success; otherwise they return an
- error value. The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
- <code class="function">rallocm</code>(<em class="parameter"><code></code></em>), and
- <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions will fail if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">ALLOCM_ERR_OOM</span></span></dt><dd><p>Out of memory. Insufficient contiguous memory was
- available to service the allocation request. The
- <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function additionally sets
- <em class="parameter"><code>*ptr</code></em> to <code class="constant">NULL</code>, whereas
- the <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function leaves
- <code class="constant">*ptr</code> unmodified.</p></dd></dl></div><p>
- The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function will also
- fail if:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><span class="errorname">ALLOCM_ERR_NOT_MOVED</span></span></dt><dd><p><code class="constant">ALLOCM_NO_MOVE</code> was specified,
- but the reallocation request could not be serviced without moving
- the object.</p></dd></dl></div><p>
- </p></div></div><div class="refsect1"><a name="environment"></a><h2>ENVIRONMENT</h2><p>The following environment variable affects the execution of the
- allocation functions:
- </p><div class="variablelist"><dl class="variablelist"><dt><span class="term"><code class="envar">MALLOC_CONF</code></span></dt><dd><p>If the environment variable
- <code class="envar">MALLOC_CONF</code> is set, the characters it contains
- will be interpreted as options.</p></dd></dl></div><p>
- </p></div><div class="refsect1"><a name="examples"></a><h2>EXAMPLES</h2><p>To dump core whenever a problem occurs:
- </p><pre class="screen">ln -s 'abort:true' /etc/malloc.conf</pre><p>
- </p><p>To specify in the source a chunk size that is 16 MiB:
- </p><pre class="programlisting">
-malloc_conf = "lg_chunk:24";</pre></div><div class="refsect1"><a name="see_also"></a><h2>SEE ALSO</h2><p><span class="citerefentry"><span class="refentrytitle">madvise</span>(2)</span>,
- <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span>,
- <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>,
- <span class="citerefentry"><span class="refentrytitle">utrace</span>(2)</span>,
- <span class="citerefentry"><span class="refentrytitle">alloca</span>(3)</span>,
- <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span>,
- <span class="citerefentry"><span class="refentrytitle">getpagesize</span>(3)</span></p></div><div class="refsect1"><a name="standards"></a><h2>STANDARDS</h2><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>),
- <code class="function">calloc</code>(<em class="parameter"><code></code></em>),
- <code class="function">realloc</code>(<em class="parameter"><code></code></em>), and
- <code class="function">free</code>(<em class="parameter"><code></code></em>) functions conform to ISO/IEC
- 9899:1990 (&#8220;ISO C90&#8221;).</p><p>The <code class="function">posix_memalign</code>(<em class="parameter"><code></code></em>) function conforms
- to IEEE Std 1003.1-2001 (&#8220;POSIX.1&#8221;).</p></div></div></body></html>
diff --git a/deps/jemalloc/doc/jemalloc.xml.in b/deps/jemalloc/doc/jemalloc.xml.in
index d8e2e711f..1e12fd3a8 100644
--- a/deps/jemalloc/doc/jemalloc.xml.in
+++ b/deps/jemalloc/doc/jemalloc.xml.in
@@ -38,17 +38,13 @@
<refname>xallocx</refname>
<refname>sallocx</refname>
<refname>dallocx</refname>
+ <refname>sdallocx</refname>
<refname>nallocx</refname>
<refname>mallctl</refname>
<refname>mallctlnametomib</refname>
<refname>mallctlbymib</refname>
<refname>malloc_stats_print</refname>
<refname>malloc_usable_size</refname>
- <refname>allocm</refname>
- <refname>rallocm</refname>
- <refname>sallocm</refname>
- <refname>dallocm</refname>
- <refname>nallocm</refname>
-->
<refpurpose>general purpose memory allocation functions</refpurpose>
</refnamediv>
@@ -56,13 +52,12 @@
<title>LIBRARY</title>
<para>This manual describes jemalloc @jemalloc_version@. More information
can be found at the <ulink
- url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+ url="http://jemalloc.net/">jemalloc website</ulink>.</para>
</refsect1>
<refsynopsisdiv>
<title>SYNOPSIS</title>
<funcsynopsis>
- <funcsynopsisinfo>#include &lt;<filename class="headerfile">stdlib.h</filename>&gt;
-#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
+ <funcsynopsisinfo>#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
<refsect2>
<title>Standard API</title>
<funcprototype>
@@ -126,6 +121,12 @@
<paramdef>int <parameter>flags</parameter></paramdef>
</funcprototype>
<funcprototype>
+ <funcdef>void <function>sdallocx</function></funcdef>
+ <paramdef>void *<parameter>ptr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ </funcprototype>
+ <funcprototype>
<funcdef>size_t <function>nallocx</function></funcdef>
<paramdef>size_t <parameter>size</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
@@ -172,41 +173,6 @@
</funcprototype>
<para><type>const char *</type><varname>malloc_conf</varname>;</para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <funcprototype>
- <funcdef>int <function>allocm</function></funcdef>
- <paramdef>void **<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>rallocm</function></funcdef>
- <paramdef>void **<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>size_t <parameter>extra</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>sallocm</function></funcdef>
- <paramdef>const void *<parameter>ptr</parameter></paramdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>dallocm</function></funcdef>
- <paramdef>void *<parameter>ptr</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- <funcprototype>
- <funcdef>int <function>nallocm</function></funcdef>
- <paramdef>size_t *<parameter>rsize</parameter></paramdef>
- <paramdef>size_t <parameter>size</parameter></paramdef>
- <paramdef>int <parameter>flags</parameter></paramdef>
- </funcprototype>
- </refsect2>
</funcsynopsis>
</refsynopsisdiv>
<refsect1 id="description">
@@ -214,36 +180,36 @@
<refsect2>
<title>Standard API</title>
- <para>The <function>malloc<parameter/></function> function allocates
+ <para>The <function>malloc()</function> function allocates
<parameter>size</parameter> bytes of uninitialized memory. The allocated
space is suitably aligned (after possible pointer coercion) for storage
of any type of object.</para>
- <para>The <function>calloc<parameter/></function> function allocates
+ <para>The <function>calloc()</function> function allocates
space for <parameter>number</parameter> objects, each
<parameter>size</parameter> bytes in length. The result is identical to
- calling <function>malloc<parameter/></function> with an argument of
+ calling <function>malloc()</function> with an argument of
<parameter>number</parameter> * <parameter>size</parameter>, with the
exception that the allocated memory is explicitly initialized to zero
bytes.</para>
- <para>The <function>posix_memalign<parameter/></function> function
+ <para>The <function>posix_memalign()</function> function
allocates <parameter>size</parameter> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<parameter>alignment</parameter>, and returns the allocation in the value
pointed to by <parameter>ptr</parameter>. The requested
- <parameter>alignment</parameter> must be a power of 2 at least as large
- as <code language="C">sizeof(<type>void *</type>)</code>.</para>
+ <parameter>alignment</parameter> must be a power of 2 at least as large as
+ <code language="C">sizeof(<type>void *</type>)</code>.</para>
- <para>The <function>aligned_alloc<parameter/></function> function
+ <para>The <function>aligned_alloc()</function> function
allocates <parameter>size</parameter> bytes of memory such that the
- allocation's base address is an even multiple of
+ allocation's base address is a multiple of
<parameter>alignment</parameter>. The requested
<parameter>alignment</parameter> must be a power of 2. Behavior is
undefined if <parameter>size</parameter> is not an integral multiple of
<parameter>alignment</parameter>.</para>
- <para>The <function>realloc<parameter/></function> function changes the
+ <para>The <function>realloc()</function> function changes the
size of the previously allocated memory referenced by
<parameter>ptr</parameter> to <parameter>size</parameter> bytes. The
contents of the memory are unchanged up to the lesser of the new and old
@@ -251,31 +217,32 @@
portion of the memory are undefined. Upon success, the memory referenced
by <parameter>ptr</parameter> is freed and a pointer to the newly
allocated memory is returned. Note that
- <function>realloc<parameter/></function> may move the memory allocation,
+ <function>realloc()</function> may move the memory allocation,
resulting in a different return value than <parameter>ptr</parameter>.
If <parameter>ptr</parameter> is <constant>NULL</constant>, the
- <function>realloc<parameter/></function> function behaves identically to
- <function>malloc<parameter/></function> for the specified size.</para>
+ <function>realloc()</function> function behaves identically to
+ <function>malloc()</function> for the specified size.</para>
- <para>The <function>free<parameter/></function> function causes the
+ <para>The <function>free()</function> function causes the
allocated memory referenced by <parameter>ptr</parameter> to be made
available for future allocations. If <parameter>ptr</parameter> is
<constant>NULL</constant>, no action occurs.</para>
</refsect2>
<refsect2>
<title>Non-standard API</title>
- <para>The <function>mallocx<parameter/></function>,
- <function>rallocx<parameter/></function>,
- <function>xallocx<parameter/></function>,
- <function>sallocx<parameter/></function>,
- <function>dallocx<parameter/></function>, and
- <function>nallocx<parameter/></function> functions all have a
+ <para>The <function>mallocx()</function>,
+ <function>rallocx()</function>,
+ <function>xallocx()</function>,
+ <function>sallocx()</function>,
+ <function>dallocx()</function>,
+ <function>sdallocx()</function>, and
+ <function>nallocx()</function> functions all have a
<parameter>flags</parameter> argument that can be used to specify
options. The functions only check the options that are contextually
relevant. Use bitwise or (<code language="C">|</code>) operations to
specify one or more of the following:
<variablelist>
- <varlistentry>
+ <varlistentry id="MALLOCX_LG_ALIGN">
<term><constant>MALLOCX_LG_ALIGN(<parameter>la</parameter>)
</constant></term>
@@ -285,7 +252,7 @@
that <parameter>la</parameter> is within the valid
range.</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_ALIGN">
<term><constant>MALLOCX_ALIGN(<parameter>a</parameter>)
</constant></term>
@@ -295,7 +262,7 @@
validate that <parameter>a</parameter> is a power of 2.
</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_ZERO">
<term><constant>MALLOCX_ZERO</constant></term>
<listitem><para>Initialize newly allocated memory to contain zero
@@ -304,35 +271,55 @@
that are initialized to contain zero bytes. If this macro is
absent, newly allocated memory is uninitialized.</para></listitem>
</varlistentry>
- <varlistentry>
+ <varlistentry id="MALLOCX_TCACHE">
+ <term><constant>MALLOCX_TCACHE(<parameter>tc</parameter>)
+ </constant></term>
+
+ <listitem><para>Use the thread-specific cache (tcache) specified by
+ the identifier <parameter>tc</parameter>, which must have been
+ acquired via the <link
+ linkend="tcache.create"><mallctl>tcache.create</mallctl></link>
+ mallctl. This macro does not validate that
+ <parameter>tc</parameter> specifies a valid
+ identifier.</para></listitem>
+ </varlistentry>
+ <varlistentry id="MALLOC_TCACHE_NONE">
+ <term><constant>MALLOCX_TCACHE_NONE</constant></term>
+
+ <listitem><para>Do not use a thread-specific cache (tcache). Unless
+ <constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant> or
+ <constant>MALLOCX_TCACHE_NONE</constant> is specified, an
+ automatically managed tcache will be used under many circumstances.
+ This macro cannot be used in the same <parameter>flags</parameter>
+ argument as
+ <constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant>.</para></listitem>
+ </varlistentry>
+ <varlistentry id="MALLOCX_ARENA">
<term><constant>MALLOCX_ARENA(<parameter>a</parameter>)
</constant></term>
<listitem><para>Use the arena specified by the index
- <parameter>a</parameter> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <parameter>a</parameter>
- specifies an arena index in the valid range.</para></listitem>
+ <parameter>a</parameter>. This macro has no effect for regions that
+ were allocated via an arena other than the one specified. This
+ macro does not validate that <parameter>a</parameter> specifies an
+ arena index in the valid range.</para></listitem>
</varlistentry>
</variablelist>
</para>
- <para>The <function>mallocx<parameter/></function> function allocates at
+ <para>The <function>mallocx()</function> function allocates at
least <parameter>size</parameter> bytes of memory, and returns a pointer
to the base address of the allocation. Behavior is undefined if
- <parameter>size</parameter> is <constant>0</constant>, or if request size
- overflows due to size class and/or alignment constraints.</para>
+ <parameter>size</parameter> is <constant>0</constant>.</para>
- <para>The <function>rallocx<parameter/></function> function resizes the
+ <para>The <function>rallocx()</function> function resizes the
allocation at <parameter>ptr</parameter> to be at least
<parameter>size</parameter> bytes, and returns a pointer to the base
address of the resulting allocation, which may or may not have moved from
its original location. Behavior is undefined if
- <parameter>size</parameter> is <constant>0</constant>, or if request size
- overflows due to size class and/or alignment constraints.</para>
+ <parameter>size</parameter> is <constant>0</constant>.</para>
- <para>The <function>xallocx<parameter/></function> function resizes the
+ <para>The <function>xallocx()</function> function resizes the
allocation at <parameter>ptr</parameter> in place to be at least
<parameter>size</parameter> bytes, and returns the real size of the
allocation. If <parameter>extra</parameter> is non-zero, an attempt is
@@ -345,23 +332,32 @@
language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
&gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
- <para>The <function>sallocx<parameter/></function> function returns the
+ <para>The <function>sallocx()</function> function returns the
real size of the allocation at <parameter>ptr</parameter>.</para>
- <para>The <function>dallocx<parameter/></function> function causes the
+ <para>The <function>dallocx()</function> function causes the
memory referenced by <parameter>ptr</parameter> to be made available for
future allocations.</para>
- <para>The <function>nallocx<parameter/></function> function allocates no
+ <para>The <function>sdallocx()</function> function is an
+ extension of <function>dallocx()</function> with a
+ <parameter>size</parameter> parameter to allow the caller to pass in the
+ allocation size as an optimization. The minimum valid input size is the
+ original requested size of the allocation, and the maximum valid input
+ size is the corresponding value returned by
+ <function>nallocx()</function> or
+ <function>sallocx()</function>.</para>
+
+ <para>The <function>nallocx()</function> function allocates no
memory, but it performs the same size computation as the
- <function>mallocx<parameter/></function> function, and returns the real
+ <function>mallocx()</function> function, and returns the real
size of the allocation that would result from the equivalent
- <function>mallocx<parameter/></function> function call. Behavior is
- undefined if <parameter>size</parameter> is <constant>0</constant>, or if
- request size overflows due to size class and/or alignment
- constraints.</para>
+ <function>mallocx()</function> function call, or
+ <constant>0</constant> if the inputs exceed the maximum supported size
+ class and/or alignment. Behavior is undefined if
+ <parameter>size</parameter> is <constant>0</constant>.</para>
- <para>The <function>mallctl<parameter/></function> function provides a
+ <para>The <function>mallctl()</function> function provides a
general interface for introspecting the memory allocator, as well as
setting modifiable parameters and triggering actions. The
period-separated <parameter>name</parameter> argument specifies a
@@ -376,12 +372,12 @@
<parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
and <constant>0</constant>.</para>
- <para>The <function>mallctlnametomib<parameter/></function> function
+ <para>The <function>mallctlnametomib()</function> function
provides a way to avoid repeated name lookups for applications that
repeatedly query the same portion of the namespace, by translating a name
- to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
- repeatedly to <function>mallctlbymib<parameter/></function>. Upon
- successful return from <function>mallctlnametomib<parameter/></function>,
+ to a <quote>Management Information Base</quote> (MIB) that can be passed
+ repeatedly to <function>mallctlbymib()</function>. Upon
+ successful return from <function>mallctlnametomib()</function>,
<parameter>mibp</parameter> contains an array of
<parameter>*miblenp</parameter> integers, where
<parameter>*miblenp</parameter> is the lesser of the number of components
@@ -410,155 +406,50 @@ for (i = 0; i < nbins; i++) {
mib[2] = i;
len = sizeof(bin_size);
- mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
+ mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0);
/* Do something with bin_size... */
}]]></programlisting></para>
- <para>The <function>malloc_stats_print<parameter/></function> function
- writes human-readable summary statistics via the
- <parameter>write_cb</parameter> callback function pointer and
- <parameter>cbopaque</parameter> data passed to
- <parameter>write_cb</parameter>, or
- <function>malloc_message<parameter/></function> if
- <parameter>write_cb</parameter> is <constant>NULL</constant>. This
- function can be called repeatedly. General information that never
- changes during execution can be omitted by specifying "g" as a character
+ <varlistentry id="malloc_stats_print_opts">
+ </varlistentry>
+ <para>The <function>malloc_stats_print()</function> function writes
+ summary statistics via the <parameter>write_cb</parameter> callback
+ function pointer and <parameter>cbopaque</parameter> data passed to
+ <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+ if <parameter>write_cb</parameter> is <constant>NULL</constant>. The
+ statistics are presented in human-readable form unless <quote>J</quote> is
+ specified as a character within the <parameter>opts</parameter> string, in
+ which case the statistics are presented in <ulink
+ url="http://www.json.org/">JSON format</ulink>. This function can be
+ called repeatedly. General information that never changes during
+ execution can be omitted by specifying <quote>g</quote> as a character
within the <parameter>opts</parameter> string. Note that
- <function>malloc_message<parameter/></function> uses the
- <function>mallctl*<parameter/></function> functions internally, so
- inconsistent statistics can be reported if multiple threads use these
- functions simultaneously. If <option>--enable-stats</option> is
- specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
- be specified to omit merged arena and per arena statistics, respectively;
- &ldquo;b&rdquo; and &ldquo;l&rdquo; can be specified to omit per size
- class statistics for bins and large objects, respectively. Unrecognized
- characters are silently ignored. Note that thread caching may prevent
- some statistics from being completely up to date, since extra locking
- would be required to merge counters that track thread cache operations.
- </para>
-
- <para>The <function>malloc_usable_size<parameter/></function> function
+ <function>malloc_message()</function> uses the
+ <function>mallctl*()</function> functions internally, so inconsistent
+ statistics can be reported if multiple threads use these functions
+ simultaneously. If <option>--enable-stats</option> is specified during
+ configuration, <quote>m</quote>, <quote>d</quote>, and <quote>a</quote>
+ can be specified to omit merged arena, destroyed merged arena, and per
+ arena statistics, respectively; <quote>b</quote> and <quote>l</quote> can
+ be specified to omit per size class statistics for bins and large objects,
+ respectively; <quote>x</quote> can be specified to omit all mutex
+ statistics. Unrecognized characters are silently ignored. Note that
+ thread caching may prevent some statistics from being completely up to
+ date, since extra locking would be required to merge counters that track
+ thread cache operations.</para>
+
+ <para>The <function>malloc_usable_size()</function> function
returns the usable size of the allocation pointed to by
<parameter>ptr</parameter>. The return value may be larger than the size
that was requested during allocation. The
- <function>malloc_usable_size<parameter/></function> function is not a
- mechanism for in-place <function>realloc<parameter/></function>; rather
+ <function>malloc_usable_size()</function> function is not a
+ mechanism for in-place <function>realloc()</function>; rather
it is provided solely as a tool for introspection purposes. Any
discrepancy between the requested allocation size and the size reported
- by <function>malloc_usable_size<parameter/></function> should not be
+ by <function>malloc_usable_size()</function> should not be
depended on, since such behavior is entirely implementation-dependent.
</para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <para>The experimental API is subject to change or removal without regard
- for backward compatibility. If <option>--disable-experimental</option>
- is specified during configuration, the experimental API is
- omitted.</para>
-
- <para>The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>,
- <function>sallocm<parameter/></function>,
- <function>dallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions all have a
- <parameter>flags</parameter> argument that can be used to specify
- options. The functions only check the options that are contextually
- relevant. Use bitwise or (<code language="C">|</code>) operations to
- specify one or more of the following:
- <variablelist>
- <varlistentry>
- <term><constant>ALLOCM_LG_ALIGN(<parameter>la</parameter>)
- </constant></term>
-
- <listitem><para>Align the memory allocation to start at an address
- that is a multiple of <code language="C">(1 &lt;&lt;
- <parameter>la</parameter>)</code>. This macro does not validate
- that <parameter>la</parameter> is within the valid
- range.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ALIGN(<parameter>a</parameter>)
- </constant></term>
-
- <listitem><para>Align the memory allocation to start at an address
- that is a multiple of <parameter>a</parameter>, where
- <parameter>a</parameter> is a power of two. This macro does not
- validate that <parameter>a</parameter> is a power of 2.
- </para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ZERO</constant></term>
-
- <listitem><para>Initialize newly allocated memory to contain zero
- bytes. In the growing reallocation case, the real size prior to
- reallocation defines the boundary between untouched bytes and those
- that are initialized to contain zero bytes. If this macro is
- absent, newly allocated memory is uninitialized.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_NO_MOVE</constant></term>
-
- <listitem><para>For reallocation, fail rather than moving the
- object. This constraint can apply to both growth and
- shrinkage.</para></listitem>
- </varlistentry>
- <varlistentry>
- <term><constant>ALLOCM_ARENA(<parameter>a</parameter>)
- </constant></term>
-
- <listitem><para>Use the arena specified by the index
- <parameter>a</parameter> (and by necessity bypass the thread
- cache). This macro has no effect for huge regions, nor for regions
- that were allocated via an arena other than the one specified.
- This macro does not validate that <parameter>a</parameter>
- specifies an arena index in the valid range.</para></listitem>
- </varlistentry>
- </variablelist>
- </para>
-
- <para>The <function>allocm<parameter/></function> function allocates at
- least <parameter>size</parameter> bytes of memory, sets
- <parameter>*ptr</parameter> to the base address of the allocation, and
- sets <parameter>*rsize</parameter> to the real size of the allocation if
- <parameter>rsize</parameter> is not <constant>NULL</constant>. Behavior
- is undefined if <parameter>size</parameter> is <constant>0</constant>, or
- if request size overflows due to size class and/or alignment
- constraints.</para>
-
- <para>The <function>rallocm<parameter/></function> function resizes the
- allocation at <parameter>*ptr</parameter> to be at least
- <parameter>size</parameter> bytes, sets <parameter>*ptr</parameter> to
- the base address of the allocation if it moved, and sets
- <parameter>*rsize</parameter> to the real size of the allocation if
- <parameter>rsize</parameter> is not <constant>NULL</constant>. If
- <parameter>extra</parameter> is non-zero, an attempt is made to resize
- the allocation to be at least <code
- language="C">(<parameter>size</parameter> +
- <parameter>extra</parameter>)</code> bytes, though inability to allocate
- the extra byte(s) will not by itself result in failure. Behavior is
- undefined if <parameter>size</parameter> is <constant>0</constant>, if
- request size overflows due to size class and/or alignment constraints, or
- if <code language="C">(<parameter>size</parameter> +
- <parameter>extra</parameter> &gt;
- <constant>SIZE_T_MAX</constant>)</code>.</para>
-
- <para>The <function>sallocm<parameter/></function> function sets
- <parameter>*rsize</parameter> to the real size of the allocation.</para>
-
- <para>The <function>dallocm<parameter/></function> function causes the
- memory referenced by <parameter>ptr</parameter> to be made available for
- future allocations.</para>
-
- <para>The <function>nallocm<parameter/></function> function allocates no
- memory, but it performs the same size computation as the
- <function>allocm<parameter/></function> function, and if
- <parameter>rsize</parameter> is not <constant>NULL</constant> it sets
- <parameter>*rsize</parameter> to the real size of the allocation that
- would result from the equivalent <function>allocm<parameter/></function>
- function call. Behavior is undefined if <parameter>size</parameter> is
- <constant>0</constant>, or if request size overflows due to size class
- and/or alignment constraints.</para>
- </refsect2>
</refsect1>
<refsect1 id="tuning">
<title>TUNING</title>
@@ -566,19 +457,20 @@ for (i = 0; i < nbins; i++) {
routines, the allocator initializes its internals based in part on various
options that can be specified at compile- or run-time.</para>
- <para>The string pointed to by the global variable
- <varname>malloc_conf</varname>, the &ldquo;name&rdquo; of the file
- referenced by the symbolic link named <filename
- class="symlink">/etc/malloc.conf</filename>, and the value of the
+ <para>The string specified via <option>--with-malloc-conf</option>, the
+ string pointed to by the global variable <varname>malloc_conf</varname>, the
+ <quote>name</quote> of the file referenced by the symbolic link named
+ <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
that order, from left to right as options. Note that
<varname>malloc_conf</varname> may be read before
- <function>main<parameter/></function> is entered, so the declaration of
+ <function>main()</function> is entered, so the declaration of
<varname>malloc_conf</varname> should specify an initializer that contains
- the final value to be read by jemalloc. <varname>malloc_conf</varname> is
- a compile-time setting, whereas <filename
- class="symlink">/etc/malloc.conf</filename> and <envar>MALLOC_CONF</envar>
- can be safely set any time prior to program invocation.</para>
+ the final value to be read by jemalloc. <option>--with-malloc-conf</option>
+ and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
+ <filename class="symlink">/etc/malloc.conf</filename> and
+ <envar>MALLOC_CONF</envar> can be safely set any time prior to program
+ invocation.</para>
<para>An options string is a comma-separated list of option:value pairs.
There is one key corresponding to each <link
@@ -598,8 +490,10 @@ for (i = 0; i < nbins; i++) {
<manvolnum>2</manvolnum></citerefentry> to obtain memory, which is
suboptimal for several reasons, including race conditions, increased
fragmentation, and artificial limitations on maximum usable memory. If
- <option>--enable-dss</option> is specified during configuration, this
- allocator uses both <citerefentry><refentrytitle>mmap</refentrytitle>
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system, this allocator uses both
+ <citerefentry><refentrytitle>mmap</refentrytitle>
<manvolnum>2</manvolnum></citerefentry> and
<citerefentry><refentrytitle>sbrk</refentrytitle>
<manvolnum>2</manvolnum></citerefentry>, in that order of preference;
@@ -618,46 +512,31 @@ for (i = 0; i < nbins; i++) {
sense to reduce the number of arenas if an application does not make much
use of the allocation functions.</para>
- <para>In addition to multiple arenas, unless
- <option>--disable-tcache</option> is specified during configuration, this
- allocator supports thread-specific caching for small and large objects, in
- order to make it possible to completely avoid synchronization for most
- allocation requests. Such caching allows very fast allocation in the
- common case, but it increases memory usage and fragmentation, since a
- bounded number of objects can remain allocated in each thread cache.</para>
-
- <para>Memory is conceptually broken into equal-sized chunks, where the
- chunk size is a power of two that is greater than the page size. Chunks
- are always aligned to multiples of the chunk size. This alignment makes it
- possible to find metadata for user objects very quickly.</para>
-
- <para>User objects are broken into three categories according to size:
- small, large, and huge. Small objects are smaller than one page. Large
- objects are smaller than the chunk size. Huge objects are a multiple of
- the chunk size. Small and large objects are managed by arenas; huge
- objects are managed separately in a single data structure that is shared by
- all threads. Huge objects are used by applications infrequently enough
- that this single data structure is not a scalability issue.</para>
-
- <para>Each chunk that is managed by an arena tracks its contents as runs of
- contiguous pages (unused, backing a set of small objects, or backing one
- large object). The combination of chunk alignment and chunk page maps
- makes it possible to determine all metadata regarding small and large
- allocations in constant time.</para>
-
- <para>Small objects are managed in groups by page runs. Each run maintains
- a frontier and free list to track which regions are in use. Allocation
- requests that are no more than half the quantum (8 or 16, depending on
- architecture) are rounded up to the nearest power of two that is at least
- <code language="C">sizeof(<type>double</type>)</code>. All other small
- object size classes are multiples of the quantum, spaced such that internal
- fragmentation is limited to approximately 25% for all but the smallest size
- classes. Allocation requests that are larger than the maximum small size
- class, but small enough to fit in an arena-managed chunk (see the <link
- linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), are
- rounded up to the nearest run size. Allocation requests that are too large
- to fit in an arena-managed chunk are rounded up to the nearest multiple of
- the chunk size.</para>
+ <para>In addition to multiple arenas, this allocator supports
+ thread-specific caching, in order to make it possible to completely avoid
+ synchronization for most allocation requests. Such caching allows very fast
+ allocation in the common case, but it increases memory usage and
+ fragmentation, since a bounded number of objects can remain allocated in
+ each thread cache.</para>
+
+ <para>Memory is conceptually broken into extents. Extents are always
+ aligned to multiples of the page size. This alignment makes it possible to
+ find metadata for user objects quickly. User objects are broken into two
+ categories according to size: small and large. Contiguous small objects
+ comprise a slab, which resides within a single extent, whereas large objects
+ each have their own extents backing them.</para>
+
+ <para>Small objects are managed in groups by slabs. Each slab maintains
+ a bitmap to track which regions are in use. Allocation requests that are no
+ more than half the quantum (8 or 16, depending on architecture) are rounded
+ up to the nearest power of two that is at least <code
+ language="C">sizeof(<type>double</type>)</code>. All other object size
+ classes are multiples of the quantum, spaced such that there are four size
+ classes for each doubling in size, which limits internal fragmentation to
+ approximately 20% for all but the smallest size classes. Small size classes
+ are smaller than four times the page size, and large size classes extend
+ from four times the page size up to the largest size class that does not
+ exceed <constant>PTRDIFF_MAX</constant>.</para>
<para>Allocations are packed tightly together, which can be an issue for
multi-threaded applications. If you need to assure that allocations do not
@@ -665,9 +544,28 @@ for (i = 0; i < nbins; i++) {
nearest multiple of the cacheline size, or specify cacheline alignment when
allocating.</para>
- <para>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
- system, the size classes in each category are as shown in <xref
- linkend="size_classes" xrefstyle="template:Table %n"/>.</para>
+ <para>The <function>realloc()</function>,
+ <function>rallocx()</function>, and
+ <function>xallocx()</function> functions may resize allocations
+ without moving them under limited circumstances. Unlike the
+ <function>*allocx()</function> API, the standard API does not
+ officially round up the usable size of an allocation to the nearest size
+ class, so technically it is necessary to call
+ <function>realloc()</function> to grow e.g. a 9-byte allocation to
+ 16 bytes, or shrink a 16-byte allocation to 9 bytes. Growth and shrinkage
+ trivially succeeds in place as long as the pre-size and post-size both round
+ up to the same size class. No other API guarantees are made regarding
+ in-place resizing, but the current implementation also tries to resize large
+ allocations in place, as long as the pre-size and post-size are both large.
+ For shrinkage to succeed, the extent allocator must support splitting (see
+ <link
+ linkend="arena.i.extent_hooks"><mallctl>arena.&lt;i&gt;.extent_hooks</mallctl></link>).
+ Growth only succeeds if the trailing memory is currently available, and the
+ extent allocator supports merging.</para>
+
+ <para>Assuming 4 KiB pages and a 16-byte quantum on a 64-bit system, the
+ size classes in each category are as shown in <xref linkend="size_classes"
+ xrefstyle="template:Table %n"/>.</para>
<table xml:id="size_classes" frame="all">
<title>Size classes</title>
@@ -684,13 +582,13 @@ for (i = 0; i < nbins; i++) {
</thead>
<tbody>
<row>
- <entry morerows="6">Small</entry>
+ <entry morerows="8">Small</entry>
<entry>lg</entry>
<entry>[8]</entry>
</row>
<row>
<entry>16</entry>
- <entry>[16, 32, 48, ..., 128]</entry>
+ <entry>[16, 32, 48, 64, 80, 96, 112, 128]</entry>
</row>
<row>
<entry>32</entry>
@@ -710,17 +608,80 @@ for (i = 0; i < nbins; i++) {
</row>
<row>
<entry>512</entry>
- <entry>[2560, 3072, 3584]</entry>
+ <entry>[2560, 3072, 3584, 4096]</entry>
+ </row>
+ <row>
+ <entry>1 KiB</entry>
+ <entry>[5 KiB, 6 KiB, 7 KiB, 8 KiB]</entry>
+ </row>
+ <row>
+ <entry>2 KiB</entry>
+ <entry>[10 KiB, 12 KiB, 14 KiB]</entry>
+ </row>
+ <row>
+ <entry morerows="15">Large</entry>
+ <entry>2 KiB</entry>
+ <entry>[16 KiB]</entry>
</row>
<row>
- <entry>Large</entry>
<entry>4 KiB</entry>
- <entry>[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</entry>
+ <entry>[20 KiB, 24 KiB, 28 KiB, 32 KiB]</entry>
+ </row>
+ <row>
+ <entry>8 KiB</entry>
+ <entry>[40 KiB, 48 KiB, 54 KiB, 64 KiB]</entry>
+ </row>
+ <row>
+ <entry>16 KiB</entry>
+ <entry>[80 KiB, 96 KiB, 112 KiB, 128 KiB]</entry>
+ </row>
+ <row>
+ <entry>32 KiB</entry>
+ <entry>[160 KiB, 192 KiB, 224 KiB, 256 KiB]</entry>
+ </row>
+ <row>
+ <entry>64 KiB</entry>
+ <entry>[320 KiB, 384 KiB, 448 KiB, 512 KiB]</entry>
+ </row>
+ <row>
+ <entry>128 KiB</entry>
+ <entry>[640 KiB, 768 KiB, 896 KiB, 1 MiB]</entry>
+ </row>
+ <row>
+ <entry>256 KiB</entry>
+ <entry>[1280 KiB, 1536 KiB, 1792 KiB, 2 MiB]</entry>
+ </row>
+ <row>
+ <entry>512 KiB</entry>
+ <entry>[2560 KiB, 3 MiB, 3584 KiB, 4 MiB]</entry>
+ </row>
+ <row>
+ <entry>1 MiB</entry>
+ <entry>[5 MiB, 6 MiB, 7 MiB, 8 MiB]</entry>
+ </row>
+ <row>
+ <entry>2 MiB</entry>
+ <entry>[10 MiB, 12 MiB, 14 MiB, 16 MiB]</entry>
</row>
<row>
- <entry>Huge</entry>
<entry>4 MiB</entry>
- <entry>[4 MiB, 8 MiB, 12 MiB, ...]</entry>
+ <entry>[20 MiB, 24 MiB, 28 MiB, 32 MiB]</entry>
+ </row>
+ <row>
+ <entry>8 MiB</entry>
+ <entry>[40 MiB, 48 MiB, 56 MiB, 64 MiB]</entry>
+ </row>
+ <row>
+ <entry>...</entry>
+ <entry>...</entry>
+ </row>
+ <row>
+ <entry>512 PiB</entry>
+ <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+ </row>
+ <row>
+ <entry>1 EiB</entry>
+ <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
</row>
</tbody>
</tgroup>
@@ -729,19 +690,32 @@ for (i = 0; i < nbins; i++) {
<refsect1 id="mallctl_namespace">
<title>MALLCTL NAMESPACE</title>
<para>The following names are defined in the namespace accessible via the
- <function>mallctl*<parameter/></function> functions. Value types are
- specified in parentheses, their readable/writable statuses are encoded as
+ <function>mallctl*()</function> functions. Value types are specified in
+ parentheses, their readable/writable statuses are encoded as
<literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
<literal>--</literal>, and required build configuration flags follow, if
any. A name element encoded as <literal>&lt;i&gt;</literal> or
<literal>&lt;j&gt;</literal> indicates an integer component, where the
integer varies from 0 to some upper value that must be determined via
- introspection. In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>,
- <literal>&lt;i&gt;</literal> equal to <link
- linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link> can be
- used to access the summation of statistics from all arenas. Take special
- note of the <link linkend="epoch"><mallctl>epoch</mallctl></link> mallctl,
- which controls refreshing of cached dynamic statistics.</para>
+ introspection. In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>
+ and <mallctl>arena.&lt;i&gt;.{initialized,purge,decay,dss}</mallctl>,
+ <literal>&lt;i&gt;</literal> equal to
+ <constant>MALLCTL_ARENAS_ALL</constant> can be used to operate on all arenas
+ or access the summation of statistics from all arenas; similarly
+ <literal>&lt;i&gt;</literal> equal to
+ <constant>MALLCTL_ARENAS_DESTROYED</constant> can be used to access the
+ summation of statistics from all destroyed arenas. These constants can be
+ utilized either via <function>mallctlnametomib()</function> followed by
+ <function>mallctlbymib()</function>, or via code such as the following:
+ <programlisting language="C"><![CDATA[
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
+mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
+ NULL, NULL, NULL, 0);]]></programlisting>
+ Take special note of the <link
+ linkend="epoch"><mallctl>epoch</mallctl></link> mallctl, which controls
+ refreshing of cached dynamic statistics.</para>
<variablelist>
<varlistentry id="version">
@@ -760,28 +734,62 @@ for (i = 0; i < nbins; i++) {
<literal>rw</literal>
</term>
<listitem><para>If a value is passed in, refresh the data from which
- the <function>mallctl*<parameter/></function> functions report values,
+ the <function>mallctl*()</function> functions report values,
and increment the epoch. Return the current epoch. This is useful for
detecting whether another thread caused a refresh.</para></listitem>
</varlistentry>
- <varlistentry id="config.debug">
+ <varlistentry id="background_thread">
<term>
- <mallctl>config.debug</mallctl>
+ <mallctl>background_thread</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Enable/disable internal background worker threads. When
+ set to true, background threads are created on demand (the number of
+ background threads will be no more than the number of CPUs or active
+ arenas). Threads run periodically, and handle <link
+ linkend="arena.i.decay">purging</link> asynchronously. When switching
+ off, background threads are terminated synchronously. Note that after
+ <citerefentry><refentrytitle>fork</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+ function, the state in the child process will be disabled regardless
+ the state in parent process. See <link
+ linkend="stats.background_thread.num_threads"><mallctl>stats.background_thread</mallctl></link>
+ for related stats. <link
+ linkend="opt.background_thread"><mallctl>opt.background_thread</mallctl></link>
+ can be used to set the default option. This option is only available on
+ selected pthread-based platforms.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="max_background_threads">
+ <term>
+ <mallctl>max_background_threads</mallctl>
+ (<type>size_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Maximum number of background worker threads that will
+ be created. This value is capped at <link
+ linkend="opt.max_background_threads"><mallctl>opt.max_background_threads</mallctl></link> at
+ startup.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="config.cache_oblivious">
+ <term>
+ <mallctl>config.cache_oblivious</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-debug</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para><option>--enable-cache-oblivious</option> was specified
+ during build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.dss">
+ <varlistentry id="config.debug">
<term>
- <mallctl>config.dss</mallctl>
+ <mallctl>config.debug</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-dss</option> was specified during
+ <listitem><para><option>--enable-debug</option> was specified during
build configuration.</para></listitem>
</varlistentry>
@@ -805,24 +813,15 @@ for (i = 0; i < nbins; i++) {
during build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.mremap">
+ <varlistentry id="config.malloc_conf">
<term>
- <mallctl>config.mremap</mallctl>
- (<type>bool</type>)
- <literal>r-</literal>
- </term>
- <listitem><para><option>--enable-mremap</option> was specified during
- build configuration.</para></listitem>
- </varlistentry>
-
- <varlistentry id="config.munmap">
- <term>
- <mallctl>config.munmap</mallctl>
- (<type>bool</type>)
+ <mallctl>config.malloc_conf</mallctl>
+ (<type>const char *</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-munmap</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para>Embedded configure-time-specified run-time options
+ string, empty unless <option>--with-malloc-conf</option> was specified
+ during build configuration.</para></listitem>
</varlistentry>
<varlistentry id="config.prof">
@@ -865,68 +864,94 @@ for (i = 0; i < nbins; i++) {
build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.tcache">
+
+ <varlistentry id="config.utrace">
<term>
- <mallctl>config.tcache</mallctl>
+ <mallctl>config.utrace</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--disable-tcache</option> was not specified
- during build configuration.</para></listitem>
+ <listitem><para><option>--enable-utrace</option> was specified during
+ build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.tls">
+ <varlistentry id="config.xmalloc">
<term>
- <mallctl>config.tls</mallctl>
+ <mallctl>config.xmalloc</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--disable-tls</option> was not specified during
+ <listitem><para><option>--enable-xmalloc</option> was specified during
build configuration.</para></listitem>
</varlistentry>
- <varlistentry id="config.utrace">
+ <varlistentry id="opt.abort">
<term>
- <mallctl>config.utrace</mallctl>
+ <mallctl>opt.abort</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-utrace</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para>Abort-on-warning enabled/disabled. If true, most
+ warnings are fatal. Note that runtime option warnings are not included
+ (see <link
+ linkend="opt.abort_conf"><mallctl>opt.abort_conf</mallctl></link> for
+ that). The process will call
+ <citerefentry><refentrytitle>abort</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry> in these cases. This option is
+ disabled by default unless <option>--enable-debug</option> is
+ specified during configuration, in which case it is enabled by default.
+ </para></listitem>
</varlistentry>
- <varlistentry id="config.valgrind">
+ <varlistentry id="opt.abort_conf">
<term>
- <mallctl>config.valgrind</mallctl>
+ <mallctl>opt.abort_conf</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-valgrind</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para>Abort-on-invalid-configuration enabled/disabled. If
+ true, invalid runtime options are fatal. The process will call
+ <citerefentry><refentrytitle>abort</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry> in these cases. This option is
+ disabled by default unless <option>--enable-debug</option> is
+ specified during configuration, in which case it is enabled by default.
+ </para></listitem>
</varlistentry>
- <varlistentry id="config.xmalloc">
+ <varlistentry id="opt.metadata_thp">
<term>
- <mallctl>config.xmalloc</mallctl>
- (<type>bool</type>)
+ <mallctl>opt.metadata_thp</mallctl>
+ (<type>const char *</type>)
<literal>r-</literal>
</term>
- <listitem><para><option>--enable-xmalloc</option> was specified during
- build configuration.</para></listitem>
+ <listitem><para>Controls whether to allow jemalloc to use transparent
+ huge page (THP) for internal metadata (see <link
+ linkend="stats.metadata">stats.metadata</link>). <quote>always</quote>
+ allows such usage. <quote>auto</quote> uses no THP initially, but may
+ begin to do so when metadata usage reaches certain level. The default
+ is <quote>disabled</quote>.</para></listitem>
</varlistentry>
- <varlistentry id="opt.abort">
+ <varlistentry id="opt.retain">
<term>
- <mallctl>opt.abort</mallctl>
+ <mallctl>opt.retain</mallctl>
(<type>bool</type>)
<literal>r-</literal>
</term>
- <listitem><para>Abort-on-warning enabled/disabled. If true, most
- warnings are fatal. The process will call
- <citerefentry><refentrytitle>abort</refentrytitle>
- <manvolnum>3</manvolnum></citerefentry> in these cases. This option is
- disabled by default unless <option>--enable-debug</option> is
- specified during configuration, in which case it is enabled by default.
+ <listitem><para>If true, retain unused virtual memory for later reuse
+ rather than discarding it by calling
+ <citerefentry><refentrytitle>munmap</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> or equivalent (see <link
+ linkend="stats.retained">stats.retained</link> for related details).
+ This option is disabled by default unless discarding virtual memory is
+ known to trigger
+ platform-specific performance problems, e.g. for [64-bit] Linux, which
+ has a quirk in its virtual memory allocation algorithm that causes
+ semi-permanent VM map holes under normal jemalloc operation. Although
+ <citerefentry><refentrytitle>munmap</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> causes issues on 32-bit Linux as
+ well, retaining virtual memory for 32-bit Linux is disabled by default
+ due to the practical possibility of address space exhaustion.
</para></listitem>
</varlistentry>
@@ -940,53 +965,137 @@ for (i = 0; i < nbins; i++) {
<manvolnum>2</manvolnum></citerefentry>) allocation precedence as
related to <citerefentry><refentrytitle>mmap</refentrytitle>
<manvolnum>2</manvolnum></citerefentry> allocation. The following
- settings are supported: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;,
- and &ldquo;secondary&rdquo;. The default is &ldquo;secondary&rdquo; if
- <link linkend="config.dss"><mallctl>config.dss</mallctl></link> is
- true, &ldquo;disabled&rdquo; otherwise.
+ settings are supported if
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system: <quote>disabled</quote>, <quote>primary</quote>, and
+ <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+ supported. The default is <quote>secondary</quote> if
+ <citerefentry><refentrytitle>sbrk</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> is supported by the operating
+ system; <quote>disabled</quote> otherwise.
</para></listitem>
</varlistentry>
- <varlistentry id="opt.lg_chunk">
+ <varlistentry id="opt.narenas">
<term>
- <mallctl>opt.lg_chunk</mallctl>
- (<type>size_t</type>)
+ <mallctl>opt.narenas</mallctl>
+ (<type>unsigned</type>)
<literal>r-</literal>
</term>
- <listitem><para>Virtual memory chunk size (log base 2). If a chunk
- size outside the supported size range is specified, the size is
- silently clipped to the minimum/maximum supported size. The default
- chunk size is 4 MiB (2^22).
+ <listitem><para>Maximum number of arenas to use for automatic
+ multiplexing of threads and arenas. The default is four times the
+ number of CPUs, or one if there is a single CPU.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.percpu_arena">
+ <term>
+ <mallctl>opt.percpu_arena</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Per CPU arena mode. Use the <quote>percpu</quote>
+ setting to enable this feature, which uses number of CPUs to determine
+ number of arenas, and bind threads to arenas dynamically based on the
+ CPU the thread runs on currently. <quote>phycpu</quote> setting uses
+ one arena per physical CPU, which means the two hyper threads on the
+ same CPU share one arena. Note that no runtime checking regarding the
+ availability of hyper threading is done at the moment. When set to
+ <quote>disabled</quote>, narenas and thread to arena association will
+ not be impacted by this option. The default is <quote>disabled</quote>.
</para></listitem>
</varlistentry>
- <varlistentry id="opt.narenas">
+ <varlistentry id="opt.background_thread">
<term>
- <mallctl>opt.narenas</mallctl>
- (<type>size_t</type>)
+ <mallctl>opt.background_thread</mallctl>
+ (<type>const bool</type>)
<literal>r-</literal>
</term>
- <listitem><para>Maximum number of arenas to use for automatic
- multiplexing of threads and arenas. The default is four times the
- number of CPUs, or one if there is a single CPU.</para></listitem>
+ <listitem><para>Internal background worker threads enabled/disabled.
+ Because of potential circular dependencies, enabling background thread
+ using this option may cause crash or deadlock during initialization. For
+ a reliable way to use this feature, see <link
+ linkend="background_thread">background_thread</link> for dynamic control
+ options and details. This option is disabled by
+ default.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.max_background_threads">
+ <term>
+ <mallctl>opt.max_background_threads</mallctl>
+ (<type>const size_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Maximum number of background threads that will be created
+ if <link linkend="background_thread">background_thread</link> is set.
+ Defaults to number of cpus.</para></listitem>
</varlistentry>
- <varlistentry id="opt.lg_dirty_mult">
+ <varlistentry id="opt.dirty_decay_ms">
<term>
- <mallctl>opt.lg_dirty_mult</mallctl>
+ <mallctl>opt.dirty_decay_ms</mallctl>
(<type>ssize_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Per-arena minimum ratio (log base 2) of active to dirty
- pages. Some dirty unused pages may be allowed to accumulate, within
- the limit set by the ratio (or one chunk worth of dirty pages,
- whichever is greater), before informing the kernel about some of those
- pages via <citerefentry><refentrytitle>madvise</refentrytitle>
- <manvolnum>2</manvolnum></citerefentry> or a similar system call. This
- provides the kernel with sufficient information to recycle dirty pages
- if physical memory becomes scarce and the pages remain unused. The
- default minimum ratio is 8:1 (2^3:1); an option value of -1 will
- disable dirty page purging.</para></listitem>
+ <listitem><para>Approximate time in milliseconds from the creation of a
+ set of unused dirty pages until an equivalent set of unused dirty pages
+ is purged (i.e. converted to muzzy via e.g.
+ <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>
+ if supported by the operating system, or converted to clean otherwise)
+ and/or reused. Dirty pages are defined as previously having been
+ potentially written to by the application, and therefore consuming
+ physical memory, yet having no current use. The pages are incrementally
+ purged according to a sigmoidal decay curve that starts and ends with
+ zero purge rate. A decay time of 0 causes all unused dirty pages to be
+ purged immediately upon creation. A decay time of -1 disables purging.
+ The default decay time is 10 seconds. See <link
+ linkend="arenas.dirty_decay_ms"><mallctl>arenas.dirty_decay_ms</mallctl></link>
+ and <link
+ linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
+ for related dynamic control options. See <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
+ for a description of muzzy pages.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.muzzy_decay_ms">
+ <term>
+ <mallctl>opt.muzzy_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Approximate time in milliseconds from the creation of a
+ set of unused muzzy pages until an equivalent set of unused muzzy pages
+ is purged (i.e. converted to clean) and/or reused. Muzzy pages are
+ defined as previously having been unused dirty pages that were
+ subsequently purged in a manner that left them subject to the
+ reclamation whims of the operating system (e.g.
+ <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>),
+ and therefore in an indeterminate state. The pages are incrementally
+ purged according to a sigmoidal decay curve that starts and ends with
+ zero purge rate. A decay time of 0 causes all unused muzzy pages to be
+ purged immediately upon creation. A decay time of -1 disables purging.
+ The default decay time is 10 seconds. See <link
+ linkend="arenas.muzzy_decay_ms"><mallctl>arenas.muzzy_decay_ms</mallctl></link>
+ and <link
+ linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
+ for related dynamic control options.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.lg_extent_max_active_fit">
+ <term>
+ <mallctl>opt.lg_extent_max_active_fit</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>When reusing dirty extents, this determines the (log
+ base 2 of the) maximum ratio between the size of the active extent
+ selected (to split off from) and the size of the requested allocation.
+ This prevents the splitting of large active extents for smaller
+ allocations, which can reduce fragmentation over the long run
+ (especially for non-active extents). Lower value may reduce
+ fragmentation, at the cost of extra active extents. The default value
+ is 6, which gives a maximum ratio of 64 (2^6).</para></listitem>
</varlistentry>
<varlistentry id="opt.stats_print">
@@ -996,74 +1105,61 @@ for (i = 0; i < nbins; i++) {
<literal>r-</literal>
</term>
<listitem><para>Enable/disable statistics printing at exit. If
- enabled, the <function>malloc_stats_print<parameter/></function>
+ enabled, the <function>malloc_stats_print()</function>
function is called at program exit via an
<citerefentry><refentrytitle>atexit</refentrytitle>
- <manvolnum>3</manvolnum></citerefentry> function. If
+ <manvolnum>3</manvolnum></citerefentry> function. <link
+ linkend="opt.stats_print_opts"><mallctl>opt.stats_print_opts</mallctl></link>
+ can be combined to specify output options. If
<option>--enable-stats</option> is specified during configuration, this
has the potential to cause deadlock for a multi-threaded process that
exits while one or more threads are executing in the memory allocation
- functions. Therefore, this option should only be used with care; it is
- primarily intended as a performance tuning aid during application
+ functions. Furthermore, <function>atexit()</function> may
+ allocate memory during application initialization and then deadlock
+ internally when jemalloc in turn calls
+ <function>atexit()</function>, so this option is not
+ universally usable (though the application can register its own
+ <function>atexit()</function> function with equivalent
+ functionality). Therefore, this option should only be used with care;
+ it is primarily intended as a performance tuning aid during application
development. This option is disabled by default.</para></listitem>
</varlistentry>
- <varlistentry id="opt.junk">
- <term>
- <mallctl>opt.junk</mallctl>
- (<type>bool</type>)
- <literal>r-</literal>
- [<option>--enable-fill</option>]
- </term>
- <listitem><para>Junk filling enabled/disabled. If enabled, each byte
- of uninitialized allocated memory will be initialized to
- <literal>0xa5</literal>. All deallocated memory will be initialized to
- <literal>0x5a</literal>. This is intended for debugging and will
- impact performance negatively. This option is disabled by default
- unless <option>--enable-debug</option> is specified during
- configuration, in which case it is enabled by default unless running
- inside <ulink
- url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
- </varlistentry>
-
- <varlistentry id="opt.quarantine">
+ <varlistentry id="opt.stats_print_opts">
<term>
- <mallctl>opt.quarantine</mallctl>
- (<type>size_t</type>)
+ <mallctl>opt.stats_print_opts</mallctl>
+ (<type>const char *</type>)
<literal>r-</literal>
- [<option>--enable-fill</option>]
</term>
- <listitem><para>Per thread quarantine size in bytes. If non-zero, each
- thread maintains a FIFO object quarantine that stores up to the
- specified number of bytes of memory. The quarantined memory is not
- freed until it is released from quarantine, though it is immediately
- junk-filled if the <link
- linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
- enabled. This feature is of particular use in combination with <ulink
- url="http://valgrind.org/">Valgrind</ulink>, which can detect attempts
- to access quarantined objects. This is intended for debugging and will
- impact performance negatively. The default quarantine size is 0 unless
- running inside Valgrind, in which case the default is 16
- MiB.</para></listitem>
+ <listitem><para>Options (the <parameter>opts</parameter> string) to pass
+ to the <function>malloc_stats_print()</function> at exit (enabled
+ through <link
+ linkend="opt.stats_print"><mallctl>opt.stats_print</mallctl></link>). See
+ available options in <link
+ linkend="malloc_stats_print_opts"><function>malloc_stats_print()</function></link>.
+ Has no effect unless <link
+ linkend="opt.stats_print"><mallctl>opt.stats_print</mallctl></link> is
+ enabled. The default is <quote></quote>.</para></listitem>
</varlistentry>
- <varlistentry id="opt.redzone">
+ <varlistentry id="opt.junk">
<term>
- <mallctl>opt.redzone</mallctl>
- (<type>bool</type>)
+ <mallctl>opt.junk</mallctl>
+ (<type>const char *</type>)
<literal>r-</literal>
[<option>--enable-fill</option>]
</term>
- <listitem><para>Redzones enabled/disabled. If enabled, small
- allocations have redzones before and after them. Furthermore, if the
- <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
- enabled, the redzones are checked for corruption during deallocation.
- However, the primary intended purpose of this feature is to be used in
- combination with <ulink url="http://valgrind.org/">Valgrind</ulink>,
- which needs redzones in order to do effective buffer overflow/underflow
- detection. This option is intended for debugging and will impact
- performance negatively. This option is disabled by
- default unless running inside Valgrind.</para></listitem>
+ <listitem><para>Junk filling. If set to <quote>alloc</quote>, each byte
+ of uninitialized allocated memory will be initialized to
+ <literal>0xa5</literal>. If set to <quote>free</quote>, all deallocated
+ memory will be initialized to <literal>0x5a</literal>. If set to
+ <quote>true</quote>, both allocated and deallocated memory will be
+ initialized, and if set to <quote>false</quote>, junk filling be
+ disabled entirely. This is intended for debugging and will impact
+ performance negatively. This option is <quote>false</quote> by default
+ unless <option>--enable-debug</option> is specified during
+ configuration, in which case it is <quote>true</quote> by
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.zero">
@@ -1076,9 +1172,8 @@ for (i = 0; i < nbins; i++) {
<listitem><para>Zero filling enabled/disabled. If enabled, each byte
of uninitialized allocated memory will be initialized to 0. Note that
this initialization only happens once for each byte, so
- <function>realloc<parameter/></function>,
- <function>rallocx<parameter/></function> and
- <function>rallocm<parameter/></function> calls do not zero memory that
+ <function>realloc()</function> and
+ <function>rallocx()</function> calls do not zero memory that
was previously allocated. This is intended for debugging and will
impact performance negatively. This option is disabled by default.
</para></listitem>
@@ -1097,19 +1192,6 @@ for (i = 0; i < nbins; i++) {
is disabled by default.</para></listitem>
</varlistentry>
- <varlistentry id="opt.valgrind">
- <term>
- <mallctl>opt.valgrind</mallctl>
- (<type>bool</type>)
- <literal>r-</literal>
- [<option>--enable-valgrind</option>]
- </term>
- <listitem><para><ulink url="http://valgrind.org/">Valgrind</ulink>
- support enabled/disabled. This option is vestigal because jemalloc
- auto-detects whether it is running inside Valgrind. This option is
- disabled by default, unless running inside Valgrind.</para></listitem>
- </varlistentry>
-
<varlistentry id="opt.xmalloc">
<term>
<mallctl>opt.xmalloc</mallctl>
@@ -1135,18 +1217,15 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>opt.tcache</mallctl>
(<type>bool</type>)
<literal>r-</literal>
- [<option>--enable-tcache</option>]
</term>
- <listitem><para>Thread-specific caching enabled/disabled. When there
- are multiple threads, each thread uses a thread-specific cache for
- objects up to a certain size. Thread-specific caching allows many
- allocations to be satisfied without performing any thread
- synchronization, at the cost of increased memory use. See the
- <link
+ <listitem><para>Thread-specific caching (tcache) enabled/disabled. When
+ there are multiple threads, each thread uses a tcache for objects up to
+ a certain size. Thread-specific caching allows many allocations to be
+ satisfied without performing any thread synchronization, at the cost of
+ increased memory use. See the <link
linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
option for related tuning information. This option is enabled by
- default unless running inside <ulink
- url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.lg_tcache_max">
@@ -1154,14 +1233,35 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>opt.lg_tcache_max</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
- [<option>--enable-tcache</option>]
</term>
<listitem><para>Maximum size class (log base 2) to cache in the
- thread-specific cache. At a minimum, all small size classes are
- cached, and at a maximum all large size classes are cached. The
+ thread-specific cache (tcache). At a minimum, all small size classes
+ are cached, and at a maximum all large size classes are cached. The
default maximum is 32 KiB (2^15).</para></listitem>
</varlistentry>
+ <varlistentry id="opt.thp">
+ <term>
+ <mallctl>opt.thp</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Transparent hugepage (THP) mode. Settings "always",
+ "never" and "default" are available if THP is supported by the operating
+ system. The "always" setting enables transparent hugepage for all user
+ memory mappings with
+ <parameter><constant>MADV_HUGEPAGE</constant></parameter>; "never"
+ ensures no transparent hugepage with
+ <parameter><constant>MADV_NOHUGEPAGE</constant></parameter>; the default
+ setting "default" makes no changes. Note that: this option does not
+ affect THP for jemalloc internal metadata (see <link
+ linkend="opt.metadata_thp"><mallctl>opt.metadata_thp</mallctl></link>);
+ in addition, for arenas with customized <link
+ linkend="arena.i.extent_hooks"><mallctl>extent_hooks</mallctl></link>,
+ this option is bypassed as it is implemented as part of the default
+ extent hooks.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.prof">
<term>
<mallctl>opt.prof</mallctl>
@@ -1183,9 +1283,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option for information on high-water-triggered profile dumping, and the
<link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>
option for final profile dumping. Profile output is compatible with
- the included <command>pprof</command> Perl script, which originates
- from the <ulink url="http://code.google.com/p/gperftools/">gperftools
- package</ulink>.</para></listitem>
+ the <command>jeprof</command> command, which is based on the
+ <command>pprof</command> that is developed as part of the <ulink
+ url="http://code.google.com/p/gperftools/">gperftools
+ package</ulink>. See <link linkend="heap_profile_format">HEAP PROFILE
+ FORMAT</link> for heap profile format documentation.</para></listitem>
</varlistentry>
<varlistentry id="opt.prof_prefix">
@@ -1206,7 +1308,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<term>
<mallctl>opt.prof_active</mallctl>
(<type>bool</type>)
- <literal>rw</literal>
+ <literal>r-</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Profiling activated/deactivated. This is a secondary
@@ -1219,10 +1321,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
This option is enabled by default.</para></listitem>
</varlistentry>
+ <varlistentry id="opt.prof_thread_active_init">
+ <term>
+ <mallctl>opt.prof_thread_active_init</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Initial setting for <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ in newly created threads. The initial setting for newly created threads
+ can also be changed during execution via the <link
+ linkend="prof.thread_active_init"><mallctl>prof.thread_active_init</mallctl></link>
+ mallctl. This option is enabled by default.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.lg_prof_sample">
<term>
<mallctl>opt.lg_prof_sample</mallctl>
- (<type>ssize_t</type>)
+ (<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-prof</option>]
</term>
@@ -1276,13 +1393,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-prof</option>]
</term>
- <listitem><para>Trigger a memory profile dump every time the total
- virtual memory exceeds the previous maximum. Profiles are dumped to
- files named according to the pattern
- <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
- where <literal>&lt;prefix&gt;</literal> is controlled by the <link
- linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. This option is disabled by default.</para></listitem>
+ <listitem><para>Set the initial state of <link
+ linkend="prof.gdump"><mallctl>prof.gdump</mallctl></link>, which when
+ enabled triggers a memory profile dump every time the total virtual
+ memory exceeds the previous maximum. This option is disabled by
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.prof_final">
@@ -1299,7 +1414,13 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the <link
linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. This option is enabled by default.</para></listitem>
+ option. Note that <function>atexit()</function> may allocate
+ memory during application initialization and then deadlock internally
+ when jemalloc in turn calls <function>atexit()</function>, so
+ this option is not universally usable (though the application can
+ register its own <function>atexit()</function> function with
+ equivalent functionality). This option is disabled by
+ default.</para></listitem>
</varlistentry>
<varlistentry id="opt.prof_leak">
@@ -1327,7 +1448,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Get or set the arena associated with the calling
thread. If the specified arena was not initialized beforehand (see the
<link
- linkend="arenas.initialized"><mallctl>arenas.initialized</mallctl></link>
+ linkend="arena.i.initialized"><mallctl>arena.i.initialized</mallctl></link>
mallctl), it will be automatically initialized as a side effect of
calling this interface.</para></listitem>
</varlistentry>
@@ -1356,7 +1477,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<link
linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
mallctl. This is useful for avoiding the overhead of repeated
- <function>mallctl*<parameter/></function> calls.</para></listitem>
+ <function>mallctl*()</function> calls.</para></listitem>
</varlistentry>
<varlistentry id="thread.deallocated">
@@ -1383,7 +1504,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<link
linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
mallctl. This is useful for avoiding the overhead of repeated
- <function>mallctl*<parameter/></function> calls.</para></listitem>
+ <function>mallctl*()</function> calls.</para></listitem>
</varlistentry>
<varlistentry id="thread.tcache.enabled">
@@ -1391,12 +1512,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>thread.tcache.enabled</mallctl>
(<type>bool</type>)
<literal>rw</literal>
- [<option>--enable-tcache</option>]
</term>
<listitem><para>Enable/disable calling thread's tcache. The tcache is
implicitly flushed as a side effect of becoming
disabled (see <link
- lenkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>).
+ linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>).
</para></listitem>
</varlistentry>
@@ -1405,11 +1525,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>thread.tcache.flush</mallctl>
(<type>void</type>)
<literal>--</literal>
- [<option>--enable-tcache</option>]
</term>
- <listitem><para>Flush calling thread's tcache. This interface releases
- all cached objects and internal data structures associated with the
- calling thread's thread-specific cache. Ordinarily, this interface
+ <listitem><para>Flush calling thread's thread-specific cache (tcache).
+ This interface releases all cached objects and internal data structures
+ associated with the calling thread's tcache. Ordinarily, this interface
need not be called, since automatic periodic incremental garbage
collection occurs, and the thread cache is automatically discarded when
a thread exits. However, garbage collection is triggered by allocation
@@ -1418,18 +1537,158 @@ malloc_conf = "xmalloc:true";]]></programlisting>
the developer may find manual flushing useful.</para></listitem>
</varlistentry>
+ <varlistentry id="thread.prof.name">
+ <term>
+ <mallctl>thread.prof.name</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal> or
+ <literal>-w</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Get/set the descriptive name associated with the calling
+ thread in memory profile dumps. An internal copy of the name string is
+ created, so the input string need not be maintained after this interface
+ completes execution. The output string of this interface should be
+ copied for non-ephemeral uses, because multiple implementation details
+ can cause asynchronous string deallocation. Furthermore, each
+ invocation of this interface can only read or write; simultaneous
+ read/write is not supported due to string lifetime limitations. The
+ name string must be nil-terminated and comprised only of characters in
+ the sets recognized
+ by <citerefentry><refentrytitle>isgraph</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry> and
+ <citerefentry><refentrytitle>isblank</refentrytitle>
+ <manvolnum>3</manvolnum></citerefentry>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="thread.prof.active">
+ <term>
+ <mallctl>thread.prof.active</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Control whether sampling is currently active for the
+ calling thread. This is an activation mechanism in addition to <link
+ linkend="prof.active"><mallctl>prof.active</mallctl></link>; both must
+ be active for the calling thread to sample. This flag is enabled by
+ default.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.create">
+ <term>
+ <mallctl>tcache.create</mallctl>
+ (<type>unsigned</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Create an explicit thread-specific cache (tcache) and
+ return an identifier that can be passed to the <link
+ linkend="MALLOCX_TCACHE"><constant>MALLOCX_TCACHE(<parameter>tc</parameter>)</constant></link>
+ macro to explicitly use the specified cache rather than the
+ automatically managed one that is used by default. Each explicit cache
+ can be used by only one thread at a time; the application must assure
+ that this constraint holds.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.flush">
+ <term>
+ <mallctl>tcache.flush</mallctl>
+ (<type>unsigned</type>)
+ <literal>-w</literal>
+ </term>
+ <listitem><para>Flush the specified thread-specific cache (tcache). The
+ same considerations apply to this interface as to <link
+ linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>,
+ except that the tcache will never be automatically discarded.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="tcache.destroy">
+ <term>
+ <mallctl>tcache.destroy</mallctl>
+ (<type>unsigned</type>)
+ <literal>-w</literal>
+ </term>
+ <listitem><para>Flush the specified thread-specific cache (tcache) and
+ make the identifier available for use during a future tcache creation.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.initialized">
+ <term>
+ <mallctl>arena.&lt;i&gt;.initialized</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Get whether the specified arena's statistics are
+ initialized (i.e. the arena was initialized prior to the current epoch).
+ This interface can also be nominally used to query whether the merged
+ statistics corresponding to <constant>MALLCTL_ARENAS_ALL</constant> are
+ initialized (always true).</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.decay">
+ <term>
+ <mallctl>arena.&lt;i&gt;.decay</mallctl>
+ (<type>void</type>)
+ <literal>--</literal>
+ </term>
+ <listitem><para>Trigger decay-based purging of unused dirty/muzzy pages
+ for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
+ <constant>MALLCTL_ARENAS_ALL</constant>. The proportion of unused
+ dirty/muzzy pages to be purged depends on the current time; see <link
+ linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
+ and <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzy_decay_ms</mallctl></link>
+ for details.</para></listitem>
+ </varlistentry>
+
<varlistentry id="arena.i.purge">
<term>
<mallctl>arena.&lt;i&gt;.purge</mallctl>
- (<type>unsigned</type>)
+ (<type>void</type>)
<literal>--</literal>
</term>
- <listitem><para>Purge unused dirty pages for arena &lt;i&gt;, or for
- all arenas if &lt;i&gt; equals <link
- linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
+ <listitem><para>Purge all unused dirty pages for arena &lt;i&gt;, or for
+ all arenas if &lt;i&gt; equals <constant>MALLCTL_ARENAS_ALL</constant>.
</para></listitem>
</varlistentry>
+ <varlistentry id="arena.i.reset">
+ <term>
+ <mallctl>arena.&lt;i&gt;.reset</mallctl>
+ (<type>void</type>)
+ <literal>--</literal>
+ </term>
+ <listitem><para>Discard all of the arena's extant allocations. This
+ interface can only be used with arenas explicitly created via <link
+ linkend="arenas.create"><mallctl>arenas.create</mallctl></link>. None
+ of the arena's discarded/cached allocations may accessed afterward. As
+ part of this requirement, all thread caches which were used to
+ allocate/deallocate in conjunction with the arena must be flushed
+ beforehand.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.destroy">
+ <term>
+ <mallctl>arena.&lt;i&gt;.destroy</mallctl>
+ (<type>void</type>)
+ <literal>--</literal>
+ </term>
+ <listitem><para>Destroy the arena. Discard all of the arena's extant
+ allocations using the same mechanism as for <link
+ linkend="arena.i.reset"><mallctl>arena.&lt;i&gt;.reset</mallctl></link>
+ (with all the same constraints and side effects), merge the arena stats
+ into those accessible at arena index
+ <constant>MALLCTL_ARENAS_DESTROYED</constant>, and then completely
+ discard all metadata associated with the arena. Future calls to <link
+ linkend="arenas.create"><mallctl>arenas.create</mallctl></link> may
+ recycle the arena index. Destruction will fail if any threads are
+ currently associated with the arena as a result of calls to <link
+ linkend="thread.arena"><mallctl>thread.arena</mallctl></link>.</para></listitem>
+ </varlistentry>
+
<varlistentry id="arena.i.dss">
<term>
<mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -1438,15 +1697,293 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</term>
<listitem><para>Set the precedence of dss allocation as related to mmap
allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
- <link
- linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>. Note
- that even during huge allocation this setting is read from the arena
- that would be chosen for small or large allocation so that applications
- can depend on consistent dss versus mmap allocation regardless of
- allocation size. See <link
+ <constant>MALLCTL_ARENAS_ALL</constant>. See <link
linkend="opt.dss"><mallctl>opt.dss</mallctl></link> for supported
- settings.
- </para></listitem>
+ settings.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.dirty_decay_ms">
+ <term>
+ <mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Current per-arena approximate time in milliseconds from
+ the creation of a set of unused dirty pages until an equivalent set of
+ unused dirty pages is purged and/or reused. Each time this interface is
+ set, all currently unused dirty pages are considered to have fully
+ decayed, which causes immediate purging of all unused dirty pages unless
+ the decay time is set to -1 (i.e. purging disabled). See <link
+ linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
+ for additional information.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.muzzy_decay_ms">
+ <term>
+ <mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Current per-arena approximate time in milliseconds from
+ the creation of a set of unused muzzy pages until an equivalent set of
+ unused muzzy pages is purged and/or reused. Each time this interface is
+ set, all currently unused muzzy pages are considered to have fully
+ decayed, which causes immediate purging of all unused muzzy pages unless
+ the decay time is set to -1 (i.e. purging disabled). See <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
+ for additional information.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.retain_grow_limit">
+ <term>
+ <mallctl>arena.&lt;i&gt;.retain_grow_limit</mallctl>
+ (<type>size_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Maximum size to grow retained region (only relevant when
+ <link linkend="opt.retain"><mallctl>opt.retain</mallctl></link> is
+ enabled). This controls the maximum increment to expand virtual memory,
+ or allocation through <link
+ linkend="arena.i.extent_hooks"><mallctl>arena.&lt;i&gt;extent_hooks</mallctl></link>.
+ In particular, if customized extent hooks reserve physical memory
+ (e.g. 1G huge pages), this is useful to control the allocation hook's
+ input size. The default is no limit.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arena.i.extent_hooks">
+ <term>
+ <mallctl>arena.&lt;i&gt;.extent_hooks</mallctl>
+ (<type>extent_hooks_t *</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Get or set the extent management hook functions for
+ arena &lt;i&gt;. The functions must be capable of operating on all
+ extant extents associated with arena &lt;i&gt;, usually by passing
+ unknown extents to the replaced functions. In practice, it is feasible
+ to control allocation for arenas explicitly created via <link
+ linkend="arenas.create"><mallctl>arenas.create</mallctl></link> such
+ that all extents originate from an application-supplied extent allocator
+ (by specifying the custom extent hook functions during arena creation),
+ but the automatically created arenas will have already created extents
+ prior to the application having an opportunity to take over extent
+ allocation.</para>
+
+ <programlisting language="C"><![CDATA[
+typedef extent_hooks_s extent_hooks_t;
+struct extent_hooks_s {
+ extent_alloc_t *alloc;
+ extent_dalloc_t *dalloc;
+ extent_destroy_t *destroy;
+ extent_commit_t *commit;
+ extent_decommit_t *decommit;
+ extent_purge_t *purge_lazy;
+ extent_purge_t *purge_forced;
+ extent_split_t *split;
+ extent_merge_t *merge;
+};]]></programlisting>
+ <para>The <type>extent_hooks_t</type> structure comprises function
+ pointers which are described individually below. jemalloc uses these
+ functions to manage extent lifetime, which starts off with allocation of
+ mapped committed memory, in the simplest case followed by deallocation.
+ However, there are performance and platform reasons to retain extents
+ for later reuse. Cleanup attempts cascade from deallocation to decommit
+ to forced purging to lazy purging, which gives the extent management
+ functions opportunities to reject the most permanent cleanup operations
+ in favor of less permanent (and often less costly) operations. All
+ operations except allocation can be universally opted out of by setting
+ the hook pointers to <constant>NULL</constant>, or selectively opted out
+ of by returning failure. Note that once the extent hook is set, the
+ structure is accessed directly by the associated arenas, so it must
+ remain valid for the entire lifetime of the arenas.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef void *<function>(extent_alloc_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>new_addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>alignment</parameter></paramdef>
+ <paramdef>bool *<parameter>zero</parameter></paramdef>
+ <paramdef>bool *<parameter>commit</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent allocation function conforms to the
+ <type>extent_alloc_t</type> type and upon success returns a pointer to
+ <parameter>size</parameter> bytes of mapped memory on behalf of arena
+ <parameter>arena_ind</parameter> such that the extent's base address is
+ a multiple of <parameter>alignment</parameter>, as well as setting
+ <parameter>*zero</parameter> to indicate whether the extent is zeroed
+ and <parameter>*commit</parameter> to indicate whether the extent is
+ committed. Upon error the function returns <constant>NULL</constant>
+ and leaves <parameter>*zero</parameter> and
+ <parameter>*commit</parameter> unmodified. The
+ <parameter>size</parameter> parameter is always a multiple of the page
+ size. The <parameter>alignment</parameter> parameter is always a power
+ of two at least as large as the page size. Zeroing is mandatory if
+ <parameter>*zero</parameter> is true upon function entry. Committing is
+ mandatory if <parameter>*commit</parameter> is true upon function entry.
+ If <parameter>new_addr</parameter> is not <constant>NULL</constant>, the
+ returned pointer must be <parameter>new_addr</parameter> on success or
+ <constant>NULL</constant> on error. Committed memory may be committed
+ in absolute terms as on a system that does not overcommit, or in
+ implicit terms as on a system that overcommits and satisfies physical
+ memory needs on demand via soft page faults. Note that replacing the
+ default extent allocation function makes the arena's <link
+ linkend="arena.i.dss"><mallctl>arena.&lt;i&gt;.dss</mallctl></link>
+ setting irrelevant.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_dalloc_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>
+ An extent deallocation function conforms to the
+ <type>extent_dalloc_t</type> type and deallocates an extent at given
+ <parameter>addr</parameter> and <parameter>size</parameter> with
+ <parameter>committed</parameter>/decommited memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates opt-out from
+ deallocation; the virtual memory mapping associated with the extent
+ remains mapped, in the same commit state, and available for future use,
+ in which case it will be automatically retained for later reuse.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef void <function>(extent_destroy_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>
+ An extent destruction function conforms to the
+ <type>extent_destroy_t</type> type and unconditionally destroys an
+ extent at given <parameter>addr</parameter> and
+ <parameter>size</parameter> with
+ <parameter>committed</parameter>/decommited memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>. This function may be
+ called to destroy retained extents during arena destruction (see <link
+ linkend="arena.i.destroy"><mallctl>arena.&lt;i&gt;.destroy</mallctl></link>).</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_commit_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent commit function conforms to the
+ <type>extent_commit_t</type> type and commits zeroed physical memory to
+ back pages within an extent at given <parameter>addr</parameter> and
+ <parameter>size</parameter> at <parameter>offset</parameter> bytes,
+ extending for <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>, returning false upon success.
+ Committed memory may be committed in absolute terms as on a system that
+ does not overcommit, or in implicit terms as on a system that
+ overcommits and satisfies physical memory needs on demand via soft page
+ faults. If the function returns true, this indicates insufficient
+ physical memory to satisfy the request.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_decommit_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent decommit function conforms to the
+ <type>extent_decommit_t</type> type and decommits any physical memory
+ that is backing pages within an extent at given
+ <parameter>addr</parameter> and <parameter>size</parameter> at
+ <parameter>offset</parameter> bytes, extending for
+ <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>, returning false upon success, in which
+ case the pages will be committed via the extent commit function before
+ being reused. If the function returns true, this indicates opt-out from
+ decommit; the memory remains committed and available for future use, in
+ which case it will be automatically retained for later reuse.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_purge_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>offset</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent purge function conforms to the
+ <type>extent_purge_t</type> type and discards physical pages
+ within the virtual memory mapping associated with an extent at given
+ <parameter>addr</parameter> and <parameter>size</parameter> at
+ <parameter>offset</parameter> bytes, extending for
+ <parameter>length</parameter> on behalf of arena
+ <parameter>arena_ind</parameter>. A lazy extent purge function (e.g.
+ implemented via
+ <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>)
+ can delay purging indefinitely and leave the pages within the purged
+ virtual memory range in an indeterminite state, whereas a forced extent
+ purge function immediately purges, and the pages within the virtual
+ memory range will be zero-filled the next time they are accessed. If
+ the function returns true, this indicates failure to purge.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_split_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr</parameter></paramdef>
+ <paramdef>size_t <parameter>size</parameter></paramdef>
+ <paramdef>size_t <parameter>size_a</parameter></paramdef>
+ <paramdef>size_t <parameter>size_b</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent split function conforms to the
+ <type>extent_split_t</type> type and optionally splits an extent at
+ given <parameter>addr</parameter> and <parameter>size</parameter> into
+ two adjacent extents, the first of <parameter>size_a</parameter> bytes,
+ and the second of <parameter>size_b</parameter> bytes, operating on
+ <parameter>committed</parameter>/decommitted memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates that the extent
+ remains unsplit and therefore should continue to be operated on as a
+ whole.</para>
+
+ <funcsynopsis><funcprototype>
+ <funcdef>typedef bool <function>(extent_merge_t)</function></funcdef>
+ <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+ <paramdef>void *<parameter>addr_a</parameter></paramdef>
+ <paramdef>size_t <parameter>size_a</parameter></paramdef>
+ <paramdef>void *<parameter>addr_b</parameter></paramdef>
+ <paramdef>size_t <parameter>size_b</parameter></paramdef>
+ <paramdef>bool <parameter>committed</parameter></paramdef>
+ <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+ </funcprototype></funcsynopsis>
+ <literallayout></literallayout>
+ <para>An extent merge function conforms to the
+ <type>extent_merge_t</type> type and optionally merges adjacent extents,
+ at given <parameter>addr_a</parameter> and <parameter>size_a</parameter>
+ with given <parameter>addr_b</parameter> and
+ <parameter>size_b</parameter> into one contiguous extent, operating on
+ <parameter>committed</parameter>/decommitted memory as indicated, on
+ behalf of arena <parameter>arena_ind</parameter>, returning false upon
+ success. If the function returns true, this indicates that the extents
+ remain distinct mappings and therefore should continue to be operated on
+ independently.</para>
+ </listitem>
</varlistentry>
<varlistentry id="arenas.narenas">
@@ -1458,16 +1995,36 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Current limit on number of arenas.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.initialized">
+ <varlistentry id="arenas.dirty_decay_ms">
<term>
- <mallctl>arenas.initialized</mallctl>
- (<type>bool *</type>)
- <literal>r-</literal>
+ <mallctl>arenas.dirty_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Current default per-arena approximate time in
+ milliseconds from the creation of a set of unused dirty pages until an
+ equivalent set of unused dirty pages is purged and/or reused, used to
+ initialize <link
+ linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
+ during arena creation. See <link
+ linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
+ for additional information.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="arenas.muzzy_decay_ms">
+ <term>
+ <mallctl>arenas.muzzy_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>rw</literal>
</term>
- <listitem><para>An array of <link
- linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>
- booleans. Each boolean indicates whether the corresponding arena is
- initialized.</para></listitem>
+ <listitem><para>Current default per-arena approximate time in
+ milliseconds from the creation of a set of unused muzzy pages until an
+ equivalent set of unused muzzy pages is purged and/or reused, used to
+ initialize <link
+ linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
+ during arena creation. See <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
+ for additional information.</para></listitem>
</varlistentry>
<varlistentry id="arenas.quantum">
@@ -1493,7 +2050,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>arenas.tcache_max</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
- [<option>--enable-tcache</option>]
</term>
<listitem><para>Maximum thread-cached size class.</para></listitem>
</varlistentry>
@@ -1512,7 +2068,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>arenas.nhbins</mallctl>
(<type>unsigned</type>)
<literal>r-</literal>
- [<option>--enable-tcache</option>]
</term>
<listitem><para>Total number of thread cache bin size
classes.</para></listitem>
@@ -1533,30 +2088,30 @@ malloc_conf = "xmalloc:true";]]></programlisting>
(<type>uint32_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Number of regions per page run.</para></listitem>
+ <listitem><para>Number of regions per slab.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.bin.i.run_size">
+ <varlistentry id="arenas.bin.i.slab_size">
<term>
- <mallctl>arenas.bin.&lt;i&gt;.run_size</mallctl>
+ <mallctl>arenas.bin.&lt;i&gt;.slab_size</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Number of bytes per page run.</para></listitem>
+ <listitem><para>Number of bytes per slab.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.nlruns">
+ <varlistentry id="arenas.nlextents">
<term>
- <mallctl>arenas.nlruns</mallctl>
- (<type>size_t</type>)
+ <mallctl>arenas.nlextents</mallctl>
+ (<type>unsigned</type>)
<literal>r-</literal>
</term>
<listitem><para>Total number of large size classes.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.lrun.i.size">
+ <varlistentry id="arenas.lextent.i.size">
<term>
- <mallctl>arenas.lrun.&lt;i&gt;.size</mallctl>
+ <mallctl>arenas.lextent.&lt;i&gt;.size</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
</term>
@@ -1564,24 +2119,38 @@ malloc_conf = "xmalloc:true";]]></programlisting>
class.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.purge">
+ <varlistentry id="arenas.create">
<term>
- <mallctl>arenas.purge</mallctl>
- (<type>unsigned</type>)
- <literal>-w</literal>
+ <mallctl>arenas.create</mallctl>
+ (<type>unsigned</type>, <type>extent_hooks_t *</type>)
+ <literal>rw</literal>
</term>
- <listitem><para>Purge unused dirty pages for the specified arena, or
- for all arenas if none is specified.</para></listitem>
+ <listitem><para>Explicitly create a new arena outside the range of
+ automatically managed arenas, with optionally specified extent hooks,
+ and return the new arena index.</para></listitem>
</varlistentry>
- <varlistentry id="arenas.extend">
+ <varlistentry id="arenas.lookup">
<term>
- <mallctl>arenas.extend</mallctl>
- (<type>unsigned</type>)
- <literal>r-</literal>
+ <mallctl>arenas.lookup</mallctl>
+ (<type>unsigned</type>, <type>void*</type>)
+ <literal>rw</literal>
+ </term>
+ <listitem><para>Index of the arena to which an allocation belongs to.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.thread_active_init">
+ <term>
+ <mallctl>prof.thread_active_init</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
</term>
- <listitem><para>Extend the array of arenas by appending a new arena,
- and returning the new arena index.</para></listitem>
+ <listitem><para>Control the initial setting for <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ in newly created threads. See the <link
+ linkend="opt.prof_thread_active_init"><mallctl>opt.prof_thread_active_init</mallctl></link>
+ option for additional information.</para></listitem>
</varlistentry>
<varlistentry id="prof.active">
@@ -1594,8 +2163,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Control whether sampling is currently active. See the
<link
linkend="opt.prof_active"><mallctl>opt.prof_active</mallctl></link>
- option for additional information.
- </para></listitem>
+ option for additional information, as well as the interrelated <link
+ linkend="thread.prof.active"><mallctl>thread.prof.active</mallctl></link>
+ mallctl.</para></listitem>
</varlistentry>
<varlistentry id="prof.dump">
@@ -1614,6 +2184,49 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option.</para></listitem>
</varlistentry>
+ <varlistentry id="prof.gdump">
+ <term>
+ <mallctl>prof.gdump</mallctl>
+ (<type>bool</type>)
+ <literal>rw</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>When enabled, trigger a memory profile dump every time
+ the total virtual memory exceeds the previous maximum. Profiles are
+ dumped to files named according to the pattern
+ <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
+ where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+ linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+ option.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.reset">
+ <term>
+ <mallctl>prof.reset</mallctl>
+ (<type>size_t</type>)
+ <literal>-w</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Reset all memory profile statistics, and optionally
+ update the sample rate (see <link
+ linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>
+ and <link
+ linkend="prof.lg_sample"><mallctl>prof.lg_sample</mallctl></link>).
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.lg_sample">
+ <term>
+ <mallctl>prof.lg_sample</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Get the current sample rate (see <link
+ linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id="prof.interval">
<term>
<mallctl>prof.interval</mallctl>
@@ -1622,31 +2235,12 @@ malloc_conf = "xmalloc:true";]]></programlisting>
[<option>--enable-prof</option>]
</term>
<listitem><para>Average number of bytes allocated between
- inverval-based profile dumps. See the
+ interval-based profile dumps. See the
<link
linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
option for additional information.</para></listitem>
</varlistentry>
- <varlistentry id="stats.cactive">
- <term>
- <mallctl>stats.cactive</mallctl>
- (<type>size_t *</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Pointer to a counter that contains an approximate count
- of the current number of bytes in active pages. The estimate may be
- high, but never low, because each arena rounds up to the nearest
- multiple of the chunk size when computing its contribution to the
- counter. Note that the <link
- linkend="epoch"><mallctl>epoch</mallctl></link> mallctl has no bearing
- on this counter. Furthermore, counter consistency is maintained via
- atomic operations, so it is necessary to use an atomic operation in
- order to guarantee a consistent read when dereferencing the pointer.
- </para></listitem>
- </varlistentry>
-
<varlistentry id="stats.allocated">
<term>
<mallctl>stats.allocated</mallctl>
@@ -1670,88 +2264,215 @@ malloc_conf = "xmalloc:true";]]></programlisting>
equal to <link
linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
This does not include <link linkend="stats.arenas.i.pdirty">
- <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link> and pages
+ <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link>,
+ <link linkend="stats.arenas.i.pmuzzy">
+ <mallctl>stats.arenas.&lt;i&gt;.pmuzzy</mallctl></link>, nor pages
entirely devoted to allocator metadata.</para></listitem>
</varlistentry>
- <varlistentry id="stats.mapped">
+ <varlistentry id="stats.metadata">
<term>
- <mallctl>stats.mapped</mallctl>
+ <mallctl>stats.metadata</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Total number of bytes in chunks mapped on behalf of the
- application. This is a multiple of the chunk size, and is at least as
- large as <link
- linkend="stats.active"><mallctl>stats.active</mallctl></link>. This
- does not include inactive chunks.</para></listitem>
+ <listitem><para>Total number of bytes dedicated to metadata, which
+ comprise base allocations used for bootstrap-sensitive allocator
+ metadata structures (see <link
+ linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>)
+ and internal allocations (see <link
+ linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).
+ Transparent huge page (enabled with <link
+ linkend="opt.metadata_thp">opt.metadata_thp</link>) usage is not
+ considered.</para></listitem>
</varlistentry>
- <varlistentry id="stats.chunks.current">
+ <varlistentry id="stats.metadata_thp">
<term>
- <mallctl>stats.chunks.current</mallctl>
+ <mallctl>stats.metadata_thp</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Total number of chunks actively mapped on behalf of the
- application. This does not include inactive chunks.
- </para></listitem>
+ <listitem><para>Number of transparent huge pages (THP) used for
+ metadata. See <link
+ linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link> and
+ <link linkend="opt.metadata_thp">opt.metadata_thp</link>) for
+ details.</para></listitem>
</varlistentry>
- <varlistentry id="stats.chunks.total">
+ <varlistentry id="stats.resident">
<term>
- <mallctl>stats.chunks.total</mallctl>
- (<type>uint64_t</type>)
+ <mallctl>stats.resident</mallctl>
+ (<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of chunks allocated.</para></listitem>
+ <listitem><para>Maximum number of bytes in physically resident data
+ pages mapped by the allocator, comprising all pages dedicated to
+ allocator metadata, pages backing active allocations, and unused dirty
+ pages. This is a maximum rather than precise because pages may not
+ actually be physically resident if they correspond to demand-zeroed
+ virtual memory that has not yet been touched. This is a multiple of the
+ page size, and is larger than <link
+ linkend="stats.active"><mallctl>stats.active</mallctl></link>.</para></listitem>
</varlistentry>
- <varlistentry id="stats.chunks.high">
+ <varlistentry id="stats.mapped">
<term>
- <mallctl>stats.chunks.high</mallctl>
+ <mallctl>stats.mapped</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Maximum number of active chunks at any time thus far.
- </para></listitem>
+ <listitem><para>Total number of bytes in active extents mapped by the
+ allocator. This is larger than <link
+ linkend="stats.active"><mallctl>stats.active</mallctl></link>. This
+ does not include inactive extents, even those that contain unused dirty
+ pages, which means that there is no strict ordering between this and
+ <link
+ linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
</varlistentry>
- <varlistentry id="stats.huge.allocated">
+ <varlistentry id="stats.retained">
<term>
- <mallctl>stats.huge.allocated</mallctl>
+ <mallctl>stats.retained</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Number of bytes currently allocated by huge objects.
+ <listitem><para>Total number of bytes in virtual memory mappings that
+ were retained rather than being returned to the operating system via
+ e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+ <manvolnum>2</manvolnum></citerefentry> or similar. Retained virtual
+ memory is typically untouched, decommitted, or purged, so it has no
+ strongly associated physical memory (see <link
+ linkend="arena.i.extent_hooks">extent hooks</link> for details).
+ Retained memory is excluded from mapped memory statistics, e.g. <link
+ linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
</para></listitem>
</varlistentry>
- <varlistentry id="stats.huge.nmalloc">
+ <varlistentry id="stats.background_thread.num_threads">
+ <term>
+ <mallctl>stats.background_thread.num_threads</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para> Number of <link linkend="background_thread">background
+ threads</link> running currently.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.background_thread.num_runs">
<term>
- <mallctl>stats.huge.nmalloc</mallctl>
+ <mallctl>stats.background_thread.num_runs</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of huge allocation requests.
- </para></listitem>
+ <listitem><para> Total number of runs from all <link
+ linkend="background_thread">background threads</link>.</para></listitem>
</varlistentry>
- <varlistentry id="stats.huge.ndalloc">
+ <varlistentry id="stats.background_thread.run_interval">
<term>
- <mallctl>stats.huge.ndalloc</mallctl>
+ <mallctl>stats.background_thread.run_interval</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of huge deallocation requests.
- </para></listitem>
+ <listitem><para> Average run interval in nanoseconds of <link
+ linkend="background_thread">background threads</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.mutexes.ctl">
+ <term>
+ <mallctl>stats.mutexes.ctl.{counter};</mallctl>
+ (<type>counter specific type</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>ctl</varname> mutex (global
+ scope; mallctl related). <mallctl>{counter}</mallctl> is one of the
+ counters below:</para>
+ <varlistentry id="mutex_counters">
+ <listitem><para><varname>num_ops</varname> (<type>uint64_t</type>):
+ Total number of lock acquisition operations on this mutex.</para>
+
+ <para><varname>num_spin_acq</varname> (<type>uint64_t</type>): Number
+ of times the mutex was spin-acquired. When the mutex is currently
+ locked and cannot be acquired immediately, a short period of
+ spin-retry within jemalloc will be performed. Acquired through spin
+ generally means the contention was lightweight and not causing context
+ switches.</para>
+
+ <para><varname>num_wait</varname> (<type>uint64_t</type>): Number of
+ times the mutex was wait-acquired, which means the mutex contention
+ was not solved by spin-retry, and blocking operation was likely
+ involved in order to acquire the mutex. This event generally implies
+ higher cost / longer delay, and should be investigated if it happens
+ often.</para>
+
+ <para><varname>max_wait_time</varname> (<type>uint64_t</type>):
+ Maximum length of time in nanoseconds spent on a single wait-acquired
+ lock operation. Note that to avoid profiling overhead on the common
+ path, this does not consider spin-acquired cases.</para>
+
+ <para><varname>total_wait_time</varname> (<type>uint64_t</type>):
+ Cumulative time in nanoseconds spent on wait-acquired lock operations.
+ Similarly, spin-acquired cases are not considered.</para>
+
+ <para><varname>max_num_thds</varname> (<type>uint32_t</type>): Maximum
+ number of threads waiting on this mutex simultaneously. Similarly,
+ spin-acquired cases are not considered.</para>
+
+ <para><varname>num_owner_switch</varname> (<type>uint64_t</type>):
+ Number of times the current mutex owner is different from the previous
+ one. This event does not generally imply an issue; rather it is an
+ indicator of how often the protected data are accessed by different
+ threads.
+ </para>
+ </listitem>
+ </varlistentry>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.mutexes.background_thread">
+ <term>
+ <mallctl>stats.mutexes.background_thread.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>background_thread</varname> mutex
+ (global scope; <link
+ linkend="background_thread"><mallctl>background_thread</mallctl></link>
+ related). <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.mutexes.prof">
+ <term>
+ <mallctl>stats.mutexes.prof.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>prof</varname> mutex (global
+ scope; profiling related). <mallctl>{counter}</mallctl> is one of the
+ counters in <link linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.mutexes.reset">
+ <term>
+ <mallctl>stats.mutexes.reset</mallctl>
+ (<type>void</type>) <literal>--</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Reset all mutex profile statistics, including global
+ mutexes, arena mutexes and bin mutexes.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.dss">
@@ -1768,6 +2489,32 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem>
</varlistentry>
+ <varlistentry id="stats.arenas.i.dirty_decay_ms">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.dirty_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Approximate time in milliseconds from the creation of a
+ set of unused dirty pages until an equivalent set of unused dirty pages
+ is purged and/or reused. See <link
+ linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
+ for details.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.muzzy_decay_ms">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.muzzy_decay_ms</mallctl>
+ (<type>ssize_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Approximate time in milliseconds from the creation of a
+ set of unused muzzy pages until an equivalent set of unused muzzy pages
+ is purged and/or reused. See <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
+ for details.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.arenas.i.nthreads">
<term>
<mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
@@ -1778,13 +2525,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
arena.</para></listitem>
</varlistentry>
+ <varlistentry id="stats.arenas.i.uptime">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.uptime</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Time elapsed (in nanoseconds) since the arena was
+ created. If &lt;i&gt; equals <constant>0</constant> or
+ <constant>MALLCTL_ARENAS_ALL</constant>, this is the uptime since malloc
+ initialization.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.arenas.i.pactive">
<term>
<mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Number of pages in active runs.</para></listitem>
+ <listitem><para>Number of pages in active extents.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.pdirty">
@@ -1793,10 +2552,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
(<type>size_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Number of pages within unused runs that are potentially
- dirty, and for which <function>madvise<parameter>...</parameter>
- <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
- similar has not been called.</para></listitem>
+ <listitem><para>Number of pages within unused extents that are
+ potentially dirty, and for which <function>madvise()</function> or
+ similar has not been called. See <link
+ linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
+ for a description of dirty pages.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.pmuzzy">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.pmuzzy</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Number of pages within unused extents that are muzzy.
+ See <link
+ linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
+ for a description of muzzy pages.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.mapped">
@@ -1809,9 +2581,74 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<listitem><para>Number of mapped bytes.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.npurge">
+ <varlistentry id="stats.arenas.i.retained">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of retained bytes. See <link
+ linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+ details.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.base">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.base</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>
+ Number of bytes dedicated to bootstrap-sensitive allocator metadata
+ structures.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.internal">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.internal</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of bytes dedicated to internal allocations.
+ Internal allocations differ from application-originated allocations in
+ that they are for internal use, and that they are omitted from heap
+ profiles.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.metadata_thp">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.npurge</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.metadata_thp</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of transparent huge pages (THP) used for
+ metadata. See <link linkend="opt.metadata_thp">opt.metadata_thp</link>
+ for details.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.resident">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.resident</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Maximum number of bytes in physically resident data
+ pages mapped by the arena, comprising all pages dedicated to allocator
+ metadata, pages backing active allocations, and unused dirty pages.
+ This is a maximum rather than precise because pages may not actually be
+ physically resident if they correspond to demand-zeroed virtual memory
+ that has not yet been touched. This is a multiple of the page
+ size.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.dirty_npurge">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.dirty_npurge</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
@@ -1820,26 +2657,57 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.nmadvise">
+ <varlistentry id="stats.arenas.i.dirty_nmadvise">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.nmadvise</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.dirty_nmadvise</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Number of <function>madvise<parameter>...</parameter>
- <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
- similar calls made to purge dirty pages.</para></listitem>
+ <listitem><para>Number of <function>madvise()</function> or similar
+ calls made to purge dirty pages.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.purged">
+ <varlistentry id="stats.arenas.i.dirty_purged">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.purged</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.dirty_purged</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Number of pages purged.</para></listitem>
+ <listitem><para>Number of dirty pages purged.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.muzzy_npurge">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.muzzy_npurge</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of muzzy page purge sweeps performed.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.muzzy_nmadvise">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.muzzy_nmadvise</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of <function>madvise()</function> or similar
+ calls made to purge muzzy pages.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.muzzy_purged">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.muzzy_purged</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of muzzy pages purged.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.small.allocated">
@@ -1860,8 +2728,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocation requests served by
- small bins.</para></listitem>
+ <listitem><para>Cumulative number of times a small allocation was
+ requested from the arena's bins, whether to fill the relevant tcache if
+ <link linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is
+ enabled, or to directly satisfy an allocation request
+ otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.small.ndalloc">
@@ -1871,8 +2742,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of small objects returned to bins.
- </para></listitem>
+ <listitem><para>Cumulative number of times a small allocation was
+ returned to the arena's bins, whether to flush the relevant tcache if
+ <link linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is
+ enabled, or to directly deallocate an allocation
+ otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.small.nrequests">
@@ -1882,8 +2756,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of small allocation requests.
- </para></listitem>
+ <listitem><para>Cumulative number of allocation requests satisfied by
+ all bin size classes.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.large.allocated">
@@ -1904,8 +2778,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of large allocation requests served
- directly by the arena.</para></listitem>
+ <listitem><para>Cumulative number of times a large extent was allocated
+ from the arena, whether to fill the relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+ the size class is within the range being cached, or to directly satisfy
+ an allocation request otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.large.ndalloc">
@@ -1915,8 +2792,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of large deallocation requests served
- directly by the arena.</para></listitem>
+ <listitem><para>Cumulative number of times a large extent was returned
+ to the arena, whether to flush the relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+ the size class is within the range being cached, or to directly
+ deallocate an allocation otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.large.nrequests">
@@ -1926,19 +2806,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of large allocation requests.
- </para></listitem>
- </varlistentry>
-
- <varlistentry id="stats.arenas.i.bins.j.allocated">
- <term>
- <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.allocated</mallctl>
- (<type>size_t</type>)
- <literal>r-</literal>
- [<option>--enable-stats</option>]
- </term>
- <listitem><para>Current number of bytes allocated by
- bin.</para></listitem>
+ <listitem><para>Cumulative number of allocation requests satisfied by
+ all large size classes.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.bins.j.nmalloc">
@@ -1948,8 +2817,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocations served by bin.
- </para></listitem>
+ <listitem><para>Cumulative number of times a bin region of the
+ corresponding size class was allocated from the arena, whether to fill
+ the relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled, or
+ to directly satisfy an allocation request otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.bins.j.ndalloc">
@@ -1959,8 +2831,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocations returned to bin.
- </para></listitem>
+ <listitem><para>Cumulative number of times a bin region of the
+ corresponding size class was returned to the arena, whether to flush the
+ relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled, or
+ to directly deallocate an allocation otherwise.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.bins.j.nrequests">
@@ -1970,8 +2845,19 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocation
- requests.</para></listitem>
+ <listitem><para>Cumulative number of allocation requests satisfied by
+ bin regions of the corresponding size class.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.bins.j.curregs">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curregs</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Current number of regions for this size
+ class.</para></listitem>
</varlistentry>
<varlistentry id="stats.arenas.i.bins.j.nfills">
@@ -1979,7 +2865,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
- [<option>--enable-stats</option> <option>--enable-tcache</option>]
</term>
<listitem><para>Cumulative number of tcache fills.</para></listitem>
</varlistentry>
@@ -1989,87 +2874,273 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nflushes</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
- [<option>--enable-stats</option> <option>--enable-tcache</option>]
</term>
<listitem><para>Cumulative number of tcache flushes.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.bins.j.nruns">
+ <varlistentry id="stats.arenas.i.bins.j.nslabs">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nruns</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nslabs</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of runs created.</para></listitem>
+ <listitem><para>Cumulative number of slabs created.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.bins.j.nreruns">
+ <varlistentry id="stats.arenas.i.bins.j.nreslabs">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreruns</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreslabs</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of times the current run from which
+ <listitem><para>Cumulative number of times the current slab from which
to allocate changed.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.bins.j.curruns">
+ <varlistentry id="stats.arenas.i.bins.j.curslabs">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curslabs</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Current number of runs.</para></listitem>
+ <listitem><para>Current number of slabs.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.bins.mutex">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on
+ <varname>arena.&lt;i&gt;.bins.&lt;j&gt;</varname> mutex (arena bin
+ scope; bin operation related). <mallctl>{counter}</mallctl> is one of
+ the counters in <link linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.lruns.j.nmalloc">
+ <varlistentry id="stats.arenas.i.lextents.j.nmalloc">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nmalloc</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nmalloc</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocation requests for this size
- class served directly by the arena.</para></listitem>
+ <listitem><para>Cumulative number of times a large extent of the
+ corresponding size class was allocated from the arena, whether to fill
+ the relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+ the size class is within the range being cached, or to directly satisfy
+ an allocation request otherwise.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.lruns.j.ndalloc">
+ <varlistentry id="stats.arenas.i.lextents.j.ndalloc">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.ndalloc</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.ndalloc</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of deallocation requests for this
- size class served directly by the arena.</para></listitem>
+ <listitem><para>Cumulative number of times a large extent of the
+ corresponding size class was returned to the arena, whether to flush the
+ relevant tcache if <link
+ linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+ the size class is within the range being cached, or to directly
+ deallocate an allocation otherwise.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.lruns.j.nrequests">
+ <varlistentry id="stats.arenas.i.lextents.j.nrequests">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nrequests</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nrequests</mallctl>
(<type>uint64_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Cumulative number of allocation requests for this size
- class.</para></listitem>
+ <listitem><para>Cumulative number of allocation requests satisfied by
+ large extents of the corresponding size class.</para></listitem>
</varlistentry>
- <varlistentry id="stats.arenas.i.lruns.j.curruns">
+ <varlistentry id="stats.arenas.i.lextents.j.curlextents">
<term>
- <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</mallctl>
+ <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.curlextents</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-stats</option>]
</term>
- <listitem><para>Current number of runs for this size class.
+ <listitem><para>Current number of large allocations for this size class.
</para></listitem>
</varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.large">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.large.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.large</varname>
+ mutex (arena scope; large allocation related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.extent_avail">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.extent_avail.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extent_avail
+ </varname> mutex (arena scope; extent avail related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.extents_dirty">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_dirty.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_dirty
+ </varname> mutex (arena scope; dirty extents related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.extents_muzzy">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_muzzy.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_muzzy
+ </varname> mutex (arena scope; muzzy extents related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.extents_retained">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_retained.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_retained
+ </varname> mutex (arena scope; retained extents related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.decay_dirty">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.decay_dirty.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.decay_dirty
+ </varname> mutex (arena scope; decay for dirty pages related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.decay_muzzy">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.decay_muzzy.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.decay_muzzy
+ </varname> mutex (arena scope; decay for muzzy pages related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.base">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.base.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>arena.&lt;i&gt;.base</varname>
+ mutex (arena scope; base allocator related).
+ <mallctl>{counter}</mallctl> is one of the counters in <link
+ linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.arenas.i.mutexes.tcache_list">
+ <term>
+ <mallctl>stats.arenas.&lt;i&gt;.mutexes.tcache_list.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on
+ <varname>arena.&lt;i&gt;.tcache_list</varname> mutex (arena scope;
+ tcache to arena association related). This mutex is expected to be
+ accessed less often. <mallctl>{counter}</mallctl> is one of the
+ counters in <link linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
</variablelist>
</refsect1>
+ <refsect1 id="heap_profile_format">
+ <title>HEAP PROFILE FORMAT</title>
+ <para>Although the heap profiling functionality was originally designed to
+ be compatible with the
+ <command>pprof</command> command that is developed as part of the <ulink
+ url="http://code.google.com/p/gperftools/">gperftools
+ package</ulink>, the addition of per thread heap profiling functionality
+ required a different heap profile format. The <command>jeprof</command>
+ command is derived from <command>pprof</command>, with enhancements to
+ support the heap profile format described here.</para>
+
+ <para>In the following hypothetical heap profile, <constant>[...]</constant>
+ indicates elision for the sake of compactness. <programlisting><![CDATA[
+heap_v2/524288
+ t*: 28106: 56637512 [0: 0]
+ [...]
+ t3: 352: 16777344 [0: 0]
+ [...]
+ t99: 17754: 29341640 [0: 0]
+ [...]
+@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
+ t*: 13: 6688 [0: 0]
+ t3: 12: 6496 [0: ]
+ t99: 1: 192 [0: 0]
+[...]
+
+MAPPED_LIBRARIES:
+[...]]]></programlisting> The following matches the above heap profile, but most
+tokens are replaced with <constant>&lt;description&gt;</constant> to indicate
+descriptions of the corresponding fields. <programlisting><![CDATA[
+<heap_profile_format_version>/<mean_sample_interval>
+ <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+ [...]
+ <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+ [...]
+ <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+ [...]
+@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
+ <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+ <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+ <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+[...]
+
+MAPPED_LIBRARIES:
+</proc/<pid>/maps>]]></programlisting></para>
+ </refsect1>
+
<refsect1 id="debugging_malloc_problems">
<title>DEBUGGING MALLOC PROBLEMS</title>
<para>When debugging, it is a good idea to configure/build jemalloc with
@@ -2079,7 +3150,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
of run-time assertions that catch application errors such as double-free,
write-after-free, etc.</para>
- <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+ <para>Programs often accidentally depend on <quote>uninitialized</quote>
memory actually being filled with zero bytes. Junk filling
(see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
option) tends to expose such bugs in the form of obviously incorrect
@@ -2091,9 +3162,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<para>This implementation does not provide much detail about the problems
it detects, because the performance impact for storing such information
- would be prohibitive. However, jemalloc does integrate with the most
- excellent <ulink url="http://valgrind.org/">Valgrind</ulink> tool if the
- <option>--enable-valgrind</option> configuration option is enabled.</para>
+ would be prohibitive.</para>
</refsect1>
<refsect1 id="diagnostic_messages">
<title>DIAGNOSTIC MESSAGES</title>
@@ -2108,29 +3177,29 @@ malloc_conf = "xmalloc:true";]]></programlisting>
to override the function which emits the text strings forming the errors
and warnings if for some reason the <constant>STDERR_FILENO</constant> file
descriptor is not suitable for this.
- <function>malloc_message<parameter/></function> takes the
+ <function>malloc_message()</function> takes the
<parameter>cbopaque</parameter> pointer argument that is
<constant>NULL</constant> unless overridden by the arguments in a call to
- <function>malloc_stats_print<parameter/></function>, followed by a string
+ <function>malloc_stats_print()</function>, followed by a string
pointer. Please note that doing anything which tries to allocate memory in
this function is likely to result in a crash or deadlock.</para>
<para>All messages are prefixed by
- &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+ <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
</refsect1>
<refsect1 id="return_values">
<title>RETURN VALUES</title>
<refsect2>
<title>Standard API</title>
- <para>The <function>malloc<parameter/></function> and
- <function>calloc<parameter/></function> functions return a pointer to the
+ <para>The <function>malloc()</function> and
+ <function>calloc()</function> functions return a pointer to the
allocated memory if successful; otherwise a <constant>NULL</constant>
pointer is returned and <varname>errno</varname> is set to
<errorname>ENOMEM</errorname>.</para>
- <para>The <function>posix_memalign<parameter/></function> function
+ <para>The <function>posix_memalign()</function> function
returns the value 0 if successful; otherwise it returns an error value.
- The <function>posix_memalign<parameter/></function> function will fail
+ The <function>posix_memalign()</function> function will fail
if:
<variablelist>
<varlistentry>
@@ -2149,11 +3218,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</variablelist>
</para>
- <para>The <function>aligned_alloc<parameter/></function> function returns
+ <para>The <function>aligned_alloc()</function> function returns
a pointer to the allocated memory if successful; otherwise a
<constant>NULL</constant> pointer is returned and
<varname>errno</varname> is set. The
- <function>aligned_alloc<parameter/></function> function will fail if:
+ <function>aligned_alloc()</function> function will fail if:
<variablelist>
<varlistentry>
<term><errorname>EINVAL</errorname></term>
@@ -2170,44 +3239,44 @@ malloc_conf = "xmalloc:true";]]></programlisting>
</variablelist>
</para>
- <para>The <function>realloc<parameter/></function> function returns a
+ <para>The <function>realloc()</function> function returns a
pointer, possibly identical to <parameter>ptr</parameter>, to the
allocated memory if successful; otherwise a <constant>NULL</constant>
pointer is returned, and <varname>errno</varname> is set to
<errorname>ENOMEM</errorname> if the error was the result of an
- allocation failure. The <function>realloc<parameter/></function>
+ allocation failure. The <function>realloc()</function>
function always leaves the original buffer intact when an error occurs.
</para>
- <para>The <function>free<parameter/></function> function returns no
+ <para>The <function>free()</function> function returns no
value.</para>
</refsect2>
<refsect2>
<title>Non-standard API</title>
- <para>The <function>mallocx<parameter/></function> and
- <function>rallocx<parameter/></function> functions return a pointer to
+ <para>The <function>mallocx()</function> and
+ <function>rallocx()</function> functions return a pointer to
the allocated memory if successful; otherwise a <constant>NULL</constant>
pointer is returned to indicate insufficient contiguous memory was
available to service the allocation request. </para>
- <para>The <function>xallocx<parameter/></function> function returns the
+ <para>The <function>xallocx()</function> function returns the
real size of the resulting resized allocation pointed to by
<parameter>ptr</parameter>, which is a value less than
<parameter>size</parameter> if the allocation could not be adequately
grown in place. </para>
- <para>The <function>sallocx<parameter/></function> function returns the
+ <para>The <function>sallocx()</function> function returns the
real size of the allocation pointed to by <parameter>ptr</parameter>.
</para>
- <para>The <function>nallocx<parameter/></function> returns the real size
+ <para>The <function>nallocx()</function> returns the real size
that would result from a successful equivalent
- <function>mallocx<parameter/></function> function call, or zero if
+ <function>mallocx()</function> function call, or zero if
insufficient memory is available to perform the size computation. </para>
- <para>The <function>mallctl<parameter/></function>,
- <function>mallctlnametomib<parameter/></function>, and
- <function>mallctlbymib<parameter/></function> functions return 0 on
+ <para>The <function>mallctl()</function>,
+ <function>mallctlnametomib()</function>, and
+ <function>mallctlbymib()</function> functions return 0 on
success; otherwise they return an error value. The functions will fail
if:
<variablelist>
@@ -2243,52 +3312,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<term><errorname>EFAULT</errorname></term>
<listitem><para>An interface with side effects failed in some way
- not directly related to <function>mallctl*<parameter/></function>
+ not directly related to <function>mallctl*()</function>
read/write processing.</para></listitem>
</varlistentry>
</variablelist>
</para>
- <para>The <function>malloc_usable_size<parameter/></function> function
+ <para>The <function>malloc_usable_size()</function> function
returns the usable size of the allocation pointed to by
<parameter>ptr</parameter>. </para>
</refsect2>
- <refsect2>
- <title>Experimental API</title>
- <para>The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>,
- <function>sallocm<parameter/></function>,
- <function>dallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions return
- <constant>ALLOCM_SUCCESS</constant> on success; otherwise they return an
- error value. The <function>allocm<parameter/></function>,
- <function>rallocm<parameter/></function>, and
- <function>nallocm<parameter/></function> functions will fail if:
- <variablelist>
- <varlistentry>
- <term><errorname>ALLOCM_ERR_OOM</errorname></term>
-
- <listitem><para>Out of memory. Insufficient contiguous memory was
- available to service the allocation request. The
- <function>allocm<parameter/></function> function additionally sets
- <parameter>*ptr</parameter> to <constant>NULL</constant>, whereas
- the <function>rallocm<parameter/></function> function leaves
- <constant>*ptr</constant> unmodified.</para></listitem>
- </varlistentry>
- </variablelist>
- The <function>rallocm<parameter/></function> function will also
- fail if:
- <variablelist>
- <varlistentry>
- <term><errorname>ALLOCM_ERR_NOT_MOVED</errorname></term>
-
- <listitem><para><constant>ALLOCM_NO_MOVE</constant> was specified,
- but the reallocation request could not be serviced without moving
- the object.</para></listitem>
- </varlistentry>
- </variablelist>
- </para>
- </refsect2>
</refsect1>
<refsect1 id="environment">
<title>ENVIRONMENT</title>
@@ -2310,9 +3343,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<para>To dump core whenever a problem occurs:
<screen>ln -s 'abort:true' /etc/malloc.conf</screen>
</para>
- <para>To specify in the source a chunk size that is 16 MiB:
+ <para>To specify in the source that only one arena should be automatically
+ created:
<programlisting language="C"><![CDATA[
-malloc_conf = "lg_chunk:24";]]></programlisting></para>
+malloc_conf = "narenas:1";]]></programlisting></para>
</refsect1>
<refsect1 id="see_also">
<title>SEE ALSO</title>
@@ -2333,13 +3367,13 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
</refsect1>
<refsect1 id="standards">
<title>STANDARDS</title>
- <para>The <function>malloc<parameter/></function>,
- <function>calloc<parameter/></function>,
- <function>realloc<parameter/></function>, and
- <function>free<parameter/></function> functions conform to ISO/IEC
- 9899:1990 (&ldquo;ISO C90&rdquo;).</para>
-
- <para>The <function>posix_memalign<parameter/></function> function conforms
- to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+ <para>The <function>malloc()</function>,
+ <function>calloc()</function>,
+ <function>realloc()</function>, and
+ <function>free()</function> functions conform to ISO/IEC
+ 9899:1990 (<quote>ISO C90</quote>).</para>
+
+ <para>The <function>posix_memalign()</function> function conforms
+ to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
</refsect1>
</refentry>
diff --git a/deps/jemalloc/doc/stylesheet.xsl b/deps/jemalloc/doc/stylesheet.xsl
index 4e334a86f..619365d82 100644
--- a/deps/jemalloc/doc/stylesheet.xsl
+++ b/deps/jemalloc/doc/stylesheet.xsl
@@ -1,7 +1,10 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:param name="funcsynopsis.style">ansi</xsl:param>
- <xsl:param name="function.parens" select="1"/>
+ <xsl:param name="function.parens" select="0"/>
+ <xsl:template match="function">
+ <xsl:call-template name="inline.monoseq"/>
+ </xsl:template>
<xsl:template match="mallctl">
- "<xsl:call-template name="inline.monoseq"/>"
+ <quote><xsl:call-template name="inline.monoseq"/></quote>
</xsl:template>
</xsl:stylesheet>
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h
deleted file mode 100644
index 9d000c03d..000000000
--- a/deps/jemalloc/include/jemalloc/internal/arena.h
+++ /dev/null
@@ -1,1063 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/*
- * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
- * as small as possible such that this setting is still honored, without
- * violating other constraints. The goal is to make runs as small as possible
- * without exceeding a per run external fragmentation threshold.
- *
- * We use binary fixed point math for overhead computations, where the binary
- * point is implicitly RUN_BFP bits to the left.
- *
- * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
- * honored for some/all object sizes, since when heap profiling is enabled
- * there is one pointer of header overhead per object (plus a constant). This
- * constraint is relaxed (ignored) for runs that are so small that the
- * per-region overhead is greater than:
- *
- * (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP))
- */
-#define RUN_BFP 12
-/* \/ Implicit binary fixed point. */
-#define RUN_MAX_OVRHD 0x0000003dU
-#define RUN_MAX_OVRHD_RELAX 0x00001800U
-
-/* Maximum number of regions in one run. */
-#define LG_RUN_MAXREGS 11
-#define RUN_MAXREGS (1U << LG_RUN_MAXREGS)
-
-/*
- * Minimum redzone size. Redzones may be larger than this if necessary to
- * preserve region alignment.
- */
-#define REDZONE_MINSIZE 16
-
-/*
- * The minimum ratio of active:dirty pages per arena is computed as:
- *
- * (nactive >> opt_lg_dirty_mult) >= ndirty
- *
- * So, supposing that opt_lg_dirty_mult is 3, there can be no less than 8 times
- * as many active pages as dirty pages.
- */
-#define LG_DIRTY_MULT_DEFAULT 3
-
-typedef struct arena_chunk_map_s arena_chunk_map_t;
-typedef struct arena_chunk_s arena_chunk_t;
-typedef struct arena_run_s arena_run_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
-typedef struct arena_bin_s arena_bin_t;
-typedef struct arena_s arena_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-/* Each element of the chunk map corresponds to one page within the chunk. */
-struct arena_chunk_map_s {
-#ifndef JEMALLOC_PROF
- /*
- * Overlay prof_ctx in order to allow it to be referenced by dead code.
- * Such antics aren't warranted for per arena data structures, but
- * chunk map overhead accounts for a percentage of memory, rather than
- * being just a fixed cost.
- */
- union {
-#endif
- union {
- /*
- * Linkage for run trees. There are two disjoint uses:
- *
- * 1) arena_t's runs_avail tree.
- * 2) arena_run_t conceptually uses this linkage for in-use
- * non-full runs, rather than directly embedding linkage.
- */
- rb_node(arena_chunk_map_t) rb_link;
- /*
- * List of runs currently in purgatory. arena_chunk_purge()
- * temporarily allocates runs that contain dirty pages while
- * purging, so that other threads cannot use the runs while the
- * purging thread is operating without the arena lock held.
- */
- ql_elm(arena_chunk_map_t) ql_link;
- } u;
-
- /* Profile counters, used for large object runs. */
- prof_ctx_t *prof_ctx;
-#ifndef JEMALLOC_PROF
- }; /* union { ... }; */
-#endif
-
- /*
- * Run address (or size) and various flags are stored together. The bit
- * layout looks like (assuming 32-bit system):
- *
- * ???????? ???????? ????nnnn nnnndula
- *
- * ? : Unallocated: Run address for first/last pages, unset for internal
- * pages.
- * Small: Run page offset.
- * Large: Run size for first page, unset for trailing pages.
- * n : binind for small size class, BININD_INVALID for large size class.
- * d : dirty?
- * u : unzeroed?
- * l : large?
- * a : allocated?
- *
- * Following are example bit patterns for the three types of runs.
- *
- * p : run page offset
- * s : run size
- * n : binind for size class; large objects set these to BININD_INVALID
- * except for promoted allocations (see prof_promote)
- * x : don't care
- * - : 0
- * + : 1
- * [DULA] : bit set
- * [dula] : bit unset
- *
- * Unallocated (clean):
- * ssssssss ssssssss ssss++++ ++++du-a
- * xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
- * ssssssss ssssssss ssss++++ ++++dU-a
- *
- * Unallocated (dirty):
- * ssssssss ssssssss ssss++++ ++++D--a
- * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * ssssssss ssssssss ssss++++ ++++D--a
- *
- * Small:
- * pppppppp pppppppp ppppnnnn nnnnd--A
- * pppppppp pppppppp ppppnnnn nnnn---A
- * pppppppp pppppppp ppppnnnn nnnnd--A
- *
- * Large:
- * ssssssss ssssssss ssss++++ ++++D-LA
- * xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
- * -------- -------- ----++++ ++++D-LA
- *
- * Large (sampled, size <= PAGE):
- * ssssssss ssssssss ssssnnnn nnnnD-LA
- *
- * Large (not sampled, size == PAGE):
- * ssssssss ssssssss ssss++++ ++++D-LA
- */
- size_t bits;
-#define CHUNK_MAP_BININD_SHIFT 4
-#define BININD_INVALID ((size_t)0xffU)
-/* CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
-#define CHUNK_MAP_BININD_MASK ((size_t)0xff0U)
-#define CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
-#define CHUNK_MAP_FLAGS_MASK ((size_t)0xcU)
-#define CHUNK_MAP_DIRTY ((size_t)0x8U)
-#define CHUNK_MAP_UNZEROED ((size_t)0x4U)
-#define CHUNK_MAP_LARGE ((size_t)0x2U)
-#define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
-#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED
-};
-typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
-typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
-typedef ql_head(arena_chunk_map_t) arena_chunk_mapelms_t;
-
-/* Arena chunk header. */
-struct arena_chunk_s {
- /* Arena that owns the chunk. */
- arena_t *arena;
-
- /* Linkage for tree of arena chunks that contain dirty runs. */
- rb_node(arena_chunk_t) dirty_link;
-
- /* Number of dirty pages. */
- size_t ndirty;
-
- /* Number of available runs. */
- size_t nruns_avail;
-
- /*
- * Number of available run adjacencies that purging could coalesce.
- * Clean and dirty available runs are not coalesced, which causes
- * virtual memory fragmentation. The ratio of
- * (nruns_avail-nruns_adjac):nruns_adjac is used for tracking this
- * fragmentation.
- */
- size_t nruns_adjac;
-
- /*
- * Map of pages within chunk that keeps track of free/large/small. The
- * first map_bias entries are omitted, since the chunk header does not
- * need to be tracked in the map. This omission saves a header page
- * for common chunk sizes (e.g. 4 MiB).
- */
- arena_chunk_map_t map[1]; /* Dynamically sized. */
-};
-typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
-
-struct arena_run_s {
- /* Bin this run is associated with. */
- arena_bin_t *bin;
-
- /* Index of next region that has never been allocated, or nregs. */
- uint32_t nextind;
-
- /* Number of free regions in run. */
- unsigned nfree;
-};
-
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each run has the following layout:
- *
- * /--------------------\
- * | arena_run_t header |
- * | ... |
- * bitmap_offset | bitmap |
- * | ... |
- * ctx0_offset | ctx map |
- * | ... |
- * |--------------------|
- * | redzone |
- * reg0_offset | region 0 |
- * | redzone |
- * |--------------------| \
- * | redzone | |
- * | region 1 | > reg_interval
- * | redzone | /
- * |--------------------|
- * | ... |
- * | ... |
- * | ... |
- * |--------------------|
- * | redzone |
- * | region nregs-1 |
- * | redzone |
- * |--------------------|
- * | alignment pad? |
- * \--------------------/
- *
- * reg_interval has at least the same minimum alignment as reg_size; this
- * preserves the alignment constraint that sa2u() depends on. Alignment pad is
- * either 0 or redzone_size; it is present only if needed to align reg0_offset.
- */
-struct arena_bin_info_s {
- /* Size of regions in a run for this bin's size class. */
- size_t reg_size;
-
- /* Redzone size. */
- size_t redzone_size;
-
- /* Interval between regions (reg_size + (redzone_size << 1)). */
- size_t reg_interval;
-
- /* Total size of a run for this bin's size class. */
- size_t run_size;
-
- /* Total number of regions in a run for this bin's size class. */
- uint32_t nregs;
-
- /*
- * Offset of first bitmap_t element in a run header for this bin's size
- * class.
- */
- uint32_t bitmap_offset;
-
- /*
- * Metadata used to manipulate bitmaps for runs associated with this
- * bin.
- */
- bitmap_info_t bitmap_info;
-
- /*
- * Offset of first (prof_ctx_t *) in a run header for this bin's size
- * class, or 0 if (config_prof == false || opt_prof == false).
- */
- uint32_t ctx0_offset;
-
- /* Offset of first region in a run for this bin's size class. */
- uint32_t reg0_offset;
-};
-
-struct arena_bin_s {
- /*
- * All operations on runcur, runs, and stats require that lock be
- * locked. Run allocation/deallocation are protected by the arena lock,
- * which may be acquired while holding one or more bin locks, but not
- * vise versa.
- */
- malloc_mutex_t lock;
-
- /*
- * Current run being used to service allocations of this bin's size
- * class.
- */
- arena_run_t *runcur;
-
- /*
- * Tree of non-full runs. This tree is used when looking for an
- * existing run when runcur is no longer usable. We choose the
- * non-full run that is lowest in memory; this policy tends to keep
- * objects packed well, and it can also help reduce the number of
- * almost-empty chunks.
- */
- arena_run_tree_t runs;
-
- /* Bin statistics. */
- malloc_bin_stats_t stats;
-};
-
-struct arena_s {
- /* This arena's index within the arenas array. */
- unsigned ind;
-
- /*
- * Number of threads currently assigned to this arena. This field is
- * protected by arenas_lock.
- */
- unsigned nthreads;
-
- /*
- * There are three classes of arena operations from a locking
- * perspective:
- * 1) Thread asssignment (modifies nthreads) is protected by
- * arenas_lock.
- * 2) Bin-related operations are protected by bin locks.
- * 3) Chunk- and run-related operations are protected by this mutex.
- */
- malloc_mutex_t lock;
-
- arena_stats_t stats;
- /*
- * List of tcaches for extant threads associated with this arena.
- * Stats from these are merged incrementally, and at exit.
- */
- ql_head(tcache_t) tcache_ql;
-
- uint64_t prof_accumbytes;
-
- dss_prec_t dss_prec;
-
- /* Tree of dirty-page-containing chunks this arena manages. */
- arena_chunk_tree_t chunks_dirty;
-
- /*
- * In order to avoid rapid chunk allocation/deallocation when an arena
- * oscillates right on the cusp of needing a new chunk, cache the most
- * recently freed chunk. The spare is left in the arena's chunk trees
- * until it is deleted.
- *
- * There is one spare chunk per arena, rather than one spare total, in
- * order to avoid interactions between multiple threads that could make
- * a single spare inadequate.
- */
- arena_chunk_t *spare;
-
- /* Number of pages in active runs. */
- size_t nactive;
-
- /*
- * Current count of pages within unused runs that are potentially
- * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
- * By tracking this, we can institute a limit on how much dirty unused
- * memory is mapped for each arena.
- */
- size_t ndirty;
-
- /*
- * Approximate number of pages being purged. It is possible for
- * multiple threads to purge dirty pages concurrently, and they use
- * npurgatory to indicate the total number of pages all threads are
- * attempting to purge.
- */
- size_t npurgatory;
-
- /*
- * Size/address-ordered trees of this arena's available runs. The trees
- * are used for first-best-fit run allocation.
- */
- arena_avail_tree_t runs_avail;
-
- /* bins is used to store trees of free regions. */
- arena_bin_t bins[NBINS];
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern ssize_t opt_lg_dirty_mult;
-/*
- * small_size2bin is a compact lookup table that rounds request sizes up to
- * size classes. In order to reduce cache footprint, the table is compressed,
- * and all accesses are via the SMALL_SIZE2BIN macro.
- */
-extern uint8_t const small_size2bin[];
-#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN])
-
-extern arena_bin_info_t arena_bin_info[NBINS];
-
-/* Number of large size classes. */
-#define nlclasses (chunk_npages - map_bias)
-
-void arena_purge_all(arena_t *arena);
-void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
- size_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
- bool zero);
-#ifdef JEMALLOC_JET
-typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t,
- uint8_t);
-extern arena_redzone_corruption_t *arena_redzone_corruption;
-typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
-extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
-#else
-void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
-#endif
-void arena_quarantine_junk_small(void *ptr, size_t usize);
-void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
-void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
-void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
-void arena_prof_promoted(const void *ptr, size_t size);
-void arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- arena_chunk_map_t *mapelm);
-void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind, arena_chunk_map_t *mapelm);
-void arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind);
-#ifdef JEMALLOC_JET
-typedef void (arena_dalloc_junk_large_t)(void *, size_t);
-extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
-#endif
-void arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
- void *ptr);
-void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
-#ifdef JEMALLOC_JET
-typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
-extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
-#endif
-bool arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
- size_t extra, bool zero);
-void *arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
- bool try_tcache_dalloc);
-dss_prec_t arena_dss_prec_get(arena_t *arena);
-void arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-void arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
- size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
- malloc_large_stats_t *lstats);
-bool arena_new(arena_t *arena, unsigned ind);
-void arena_boot(void);
-void arena_prefork(arena_t *arena);
-void arena_postfork_parent(arena_t *arena);
-void arena_postfork_child(arena_t *arena);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_t *arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
-size_t *arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbitsp_read(size_t *mapbitsp);
-size_t arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
- size_t pageind);
-size_t arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
-void arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
-void arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
- size_t size, size_t flags);
-void arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
- size_t size);
-void arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
- size_t size, size_t flags);
-void arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind);
-void arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
- size_t runind, size_t binind, size_t flags);
-void arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed);
-bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
-bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
-size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
-unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
- const void *ptr);
-prof_ctx_t *arena_prof_ctx_get(const void *ptr);
-void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
-size_t arena_salloc(const void *ptr, bool demote);
-void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- bool try_tcache);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
-# ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_t *
-arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
-{
-
- assert(pageind >= map_bias);
- assert(pageind < chunk_npages);
-
- return (&chunk->map[pageind-map_bias]);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
-{
-
- return (&arena_mapp_get(chunk, pageind)->bits);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
-{
-
- return (*mapbitsp);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
-{
-
- return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- return (mapbits & ~PAGE_MASK);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
- (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
- return (mapbits & ~PAGE_MASK);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
- CHUNK_MAP_ALLOCATED);
- return (mapbits >> LG_PAGE);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
- size_t binind;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
- assert(binind < NBINS || binind == BININD_INVALID);
- return (binind);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- return (mapbits & CHUNK_MAP_DIRTY);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- return (mapbits & CHUNK_MAP_UNZEROED);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- return (mapbits & CHUNK_MAP_LARGE);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
-{
- size_t mapbits;
-
- mapbits = arena_mapbits_get(chunk, pageind);
- return (mapbits & CHUNK_MAP_ALLOCATED);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits)
-{
-
- *mapbitsp = mapbits;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
- size_t flags)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
-
- assert((size & PAGE_MASK) == 0);
- assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
- assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
- size_t size)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
-
- assert((size & PAGE_MASK) == 0);
- assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
- arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
- size_t flags)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
-
- assert((size & PAGE_MASK) == 0);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags
- | unzeroed | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
- size_t binind)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
-
- assert(binind <= BININD_INVALID);
- assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
- arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
- (binind << CHUNK_MAP_BININD_SHIFT));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
- size_t binind, size_t flags)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
- size_t unzeroed;
-
- assert(binind < BININD_INVALID);
- assert(pageind - runind >= map_bias);
- assert((flags & CHUNK_MAP_DIRTY) == flags);
- unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
- arena_mapbitsp_write(mapbitsp, (runind << LG_PAGE) | (binind <<
- CHUNK_MAP_BININD_SHIFT) | flags | unzeroed | CHUNK_MAP_ALLOCATED);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
- size_t unzeroed)
-{
- size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- size_t mapbits = arena_mapbitsp_read(mapbitsp);
-
- arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_UNZEROED) |
- unzeroed);
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
-{
-
- cassert(config_prof);
- assert(prof_interval != 0);
-
- arena->prof_accumbytes += accumbytes;
- if (arena->prof_accumbytes >= prof_interval) {
- arena->prof_accumbytes -= prof_interval;
- return (true);
- }
- return (false);
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
-{
-
- cassert(config_prof);
-
- if (prof_interval == 0)
- return (false);
- return (arena_prof_accum_impl(arena, accumbytes));
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
-{
-
- cassert(config_prof);
-
- if (prof_interval == 0)
- return (false);
-
- {
- bool ret;
-
- malloc_mutex_lock(&arena->lock);
- ret = arena_prof_accum_impl(arena, accumbytes);
- malloc_mutex_unlock(&arena->lock);
- return (ret);
- }
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
-{
- size_t binind;
-
- binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
-
- if (config_debug) {
- arena_chunk_t *chunk;
- arena_t *arena;
- size_t pageind;
- size_t actual_mapbits;
- arena_run_t *run;
- arena_bin_t *bin;
- size_t actual_binind;
- arena_bin_info_t *bin_info;
-
- assert(binind != BININD_INVALID);
- assert(binind < NBINS);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- arena = chunk->arena;
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- actual_mapbits = arena_mapbits_get(chunk, pageind);
- assert(mapbits == actual_mapbits);
- assert(arena_mapbits_large_get(chunk, pageind) == 0);
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- (actual_mapbits >> LG_PAGE)) << LG_PAGE));
- bin = run->bin;
- actual_binind = bin - arena->bins;
- assert(binind == actual_binind);
- bin_info = &arena_bin_info[actual_binind];
- assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
- == 0);
- }
-
- return (binind);
-}
-# endif /* JEMALLOC_ARENA_INLINE_A */
-
-# ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_INLINE size_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin)
-{
- size_t binind = bin - arena->bins;
- assert(binind < NBINS);
- return (binind);
-}
-
-JEMALLOC_INLINE unsigned
-arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
-{
- unsigned shift, diff, regind;
- size_t interval;
-
- /*
- * Freeing a pointer lower than region zero can cause assertion
- * failure.
- */
- assert((uintptr_t)ptr >= (uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset);
-
- /*
- * Avoid doing division with a variable divisor if possible. Using
- * actual division here can reduce allocator throughput by over 20%!
- */
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
- bin_info->reg0_offset);
-
- /* Rescale (factor powers of 2 out of the numerator and denominator). */
- interval = bin_info->reg_interval;
- shift = ffs(interval) - 1;
- diff >>= shift;
- interval >>= shift;
-
- if (interval == 1) {
- /* The divisor was a power of 2. */
- regind = diff;
- } else {
- /*
- * To divide by a number D that is not a power of two we
- * multiply by (2^21 / D) and then right shift by 21 positions.
- *
- * X / D
- *
- * becomes
- *
- * (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
- *
- * We can omit the first three elements, because we never
- * divide by 0, and 1 and 2 are both powers of two, which are
- * handled above.
- */
-#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
-#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
- static const unsigned interval_invs[] = {
- SIZE_INV(3),
- SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
- SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
- SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
- SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
- SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
- SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
- SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
- };
-
- if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) +
- 2)) {
- regind = (diff * interval_invs[interval - 3]) >>
- SIZE_INV_SHIFT;
- } else
- regind = diff / interval;
-#undef SIZE_INV
-#undef SIZE_INV_SHIFT
- }
- assert(diff == regind * interval);
- assert(regind < bin_info->nregs);
-
- return (regind);
-}
-
-JEMALLOC_INLINE prof_ctx_t *
-arena_prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
- size_t pageind, mapbits;
-
- cassert(config_prof);
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote)
- ret = (prof_ctx_t *)(uintptr_t)1U;
- else {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind = arena_ptr_small_binind_get(ptr,
- mapbits);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
- unsigned regind;
-
- regind = arena_run_regind(run, bin_info, ptr);
- ret = *(prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind *
- sizeof(prof_ctx_t *)));
- }
- } else
- ret = arena_mapp_get(chunk, pageind)->prof_ctx;
-
- return (ret);
-}
-
-JEMALLOC_INLINE void
-arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
-{
- arena_chunk_t *chunk;
- size_t pageind;
-
- cassert(config_prof);
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-
- if (usize > SMALL_MAXCLASS || (prof_promote &&
- ((uintptr_t)ctx != (uintptr_t)1U || arena_mapbits_large_get(chunk,
- pageind) != 0))) {
- assert(arena_mapbits_large_get(chunk, pageind) != 0);
- arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
- } else {
- assert(arena_mapbits_large_get(chunk, pageind) == 0);
- if (prof_promote == false) {
- size_t mapbits = arena_mapbits_get(chunk, pageind);
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
- LG_PAGE));
- size_t binind;
- arena_bin_info_t *bin_info;
- unsigned regind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- bin_info = &arena_bin_info[binind];
- regind = arena_run_regind(run, bin_info, ptr);
-
- *((prof_ctx_t **)((uintptr_t)run +
- bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t
- *)))) = ctx;
- }
- }
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache)
-{
- tcache_t *tcache;
-
- assert(size != 0);
- assert(size <= arena_maxclass);
-
- if (size <= SMALL_MAXCLASS) {
- if (try_tcache && (tcache = tcache_get(true)) != NULL)
- return (tcache_alloc_small(tcache, size, zero));
- else {
- return (arena_malloc_small(choose_arena(arena), size,
- zero));
- }
- } else {
- /*
- * Initialize tcache after checking size in order to avoid
- * infinite recursion during tcache initialization.
- */
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(true)) != NULL)
- return (tcache_alloc_large(tcache, size, zero));
- else {
- return (arena_malloc_large(choose_arena(arena), size,
- zero));
- }
- }
-}
-
-/* Return the size of the allocation pointed to by ptr. */
-JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
-{
- size_t ret;
- arena_chunk_t *chunk;
- size_t pageind, binind;
-
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- binind = arena_mapbits_binind_get(chunk, pageind);
- if (binind == BININD_INVALID || (config_prof && demote == false &&
- prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
- /*
- * Large allocation. In the common case (demote == true), and
- * as this is an inline function, most callers will only end up
- * looking at binind to determine that ptr is a small
- * allocation.
- */
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
- ret = arena_mapbits_large_size_get(chunk, pageind);
- assert(ret != 0);
- assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
- assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
- pageind+(ret>>LG_PAGE)-1) == 0);
- assert(binind == arena_mapbits_binind_get(chunk,
- pageind+(ret>>LG_PAGE)-1));
- assert(arena_mapbits_dirty_get(chunk, pageind) ==
- arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
- } else {
- /*
- * Small allocation (possibly promoted to a large object due to
- * prof_promote).
- */
- assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
- arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
- pageind)) == binind);
- ret = arena_bin_info[binind].reg_size;
- }
-
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
-{
- size_t pageind, mapbits;
- tcache_t *tcache;
-
- assert(arena != NULL);
- assert(chunk->arena == arena);
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- mapbits = arena_mapbits_get(chunk, pageind);
- assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- /* Small allocation. */
- if (try_tcache && (tcache = tcache_get(false)) != NULL) {
- size_t binind;
-
- binind = arena_ptr_small_binind_get(ptr, mapbits);
- tcache_dalloc_small(tcache, ptr, binind);
- } else
- arena_dalloc_small(arena, chunk, ptr, pageind);
- } else {
- size_t size = arena_mapbits_large_size_get(chunk, pageind);
-
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-
- if (try_tcache && size <= tcache_maxclass && (tcache =
- tcache_get(false)) != NULL) {
- tcache_dalloc_large(tcache, ptr, size);
- } else
- arena_dalloc_large(arena, chunk, ptr);
- }
-}
-# endif /* JEMALLOC_ARENA_INLINE_B */
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_externs.h b/deps/jemalloc/include/jemalloc/internal/arena_externs.h
new file mode 100644
index 000000000..4b3732b41
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_externs.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
+
+extern ssize_t opt_dirty_decay_ms;
+extern ssize_t opt_muzzy_decay_ms;
+
+extern percpu_arena_mode_t opt_percpu_arena;
+extern const char *percpu_arena_mode_names[];
+
+extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
+extern malloc_mutex_t arenas_lock;
+
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+ unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
+ ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+ const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+ size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
+ bin_stats_t *bstats, arena_stats_large_t *lstats);
+void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent);
+#ifdef JEMALLOC_JET
+size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+#endif
+extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+ size_t usize, size_t alignment, bool *zero);
+void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
+ extent_t *extent);
+void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
+ extent_t *extent, size_t oldsize);
+void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
+ extent_t *extent, size_t oldsize);
+ssize_t arena_dirty_decay_ms_get(arena_t *arena);
+bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
+bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+ bool all);
+void arena_reset(tsd_t *tsd, arena_t *arena);
+void arena_destroy(tsd_t *tsd, arena_t *arena);
+void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+ cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
+ bool zero);
+
+typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
+extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
+
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+ szind_t ind, bool zero);
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache);
+void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+ bool slow_path);
+void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+ extent_t *extent, void *ptr);
+void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+ size_t extra, bool zero);
+void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+ size_t size, size_t alignment, bool zero, tcache_t *tcache);
+dss_prec_t arena_dss_prec_get(arena_t *arena);
+bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+ssize_t arena_dirty_decay_ms_default_get(void);
+bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_default_get(void);
+bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
+ size_t *old_limit, size_t *new_limit);
+unsigned arena_nthreads_get(arena_t *arena, bool internal);
+void arena_nthreads_inc(arena_t *arena, bool internal);
+void arena_nthreads_dec(arena_t *arena, bool internal);
+size_t arena_extent_sn_next(arena_t *arena);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void arena_boot(void);
+void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+
+#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h
new file mode 100644
index 000000000..9abf7f6ac
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h
@@ -0,0 +1,57 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+
+static inline unsigned
+arena_ind_get(const arena_t *arena) {
+ return base_ind_get(arena->base);
+}
+
+static inline void
+arena_internal_add(arena_t *arena, size_t size) {
+ atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_internal_sub(arena_t *arena, size_t size) {
+ atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
+}
+
+static inline size_t
+arena_internal_get(arena_t *arena) {
+ return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
+}
+
+static inline bool
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
+ cassert(config_prof);
+
+ if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
+ return false;
+ }
+
+ return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
+}
+
+static inline void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+ assert(have_percpu_arena);
+ arena_t *oldarena = tsd_arena_get(tsd);
+ assert(oldarena != NULL);
+ unsigned oldind = arena_ind_get(oldarena);
+
+ if (oldind != cpu) {
+ unsigned newind = cpu;
+ arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+ assert(newarena != NULL);
+
+ /* Set new arena/tcache associations. */
+ arena_migrate(tsd, oldind, newind);
+ tcache_t *tcache = tcache_get(tsd);
+ if (tcache != NULL) {
+ tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+ newarena);
+ }
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
new file mode 100644
index 000000000..2b7e77e72
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
@@ -0,0 +1,354 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/ticker.h"
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ /* Static check. */
+ if (alloc_ctx == NULL) {
+ const extent_t *extent = iealloc(tsdn, ptr);
+ if (unlikely(!extent_slab_get(extent))) {
+ return large_prof_tctx_get(tsdn, extent);
+ }
+ } else {
+ if (unlikely(!alloc_ctx->slab)) {
+ return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
+ }
+ }
+ return (prof_tctx_t *)(uintptr_t)1U;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
+ alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ /* Static check. */
+ if (alloc_ctx == NULL) {
+ extent_t *extent = iealloc(tsdn, ptr);
+ if (unlikely(!extent_slab_get(extent))) {
+ large_prof_tctx_set(tsdn, extent, tctx);
+ }
+ } else {
+ if (unlikely(!alloc_ctx->slab)) {
+ large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+ }
+ }
+}
+
+static inline void
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ extent_t *extent = iealloc(tsdn, ptr);
+ assert(!extent_slab_get(extent));
+
+ large_prof_tctx_reset(tsdn, extent);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
+ tsd_t *tsd;
+ ticker_t *decay_ticker;
+
+ if (unlikely(tsdn_null(tsdn))) {
+ return;
+ }
+ tsd = tsdn_tsd(tsdn);
+ decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
+ if (unlikely(decay_ticker == NULL)) {
+ return;
+ }
+ if (unlikely(ticker_ticks(decay_ticker, nticks))) {
+ arena_decay(tsdn, arena, false, false);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
+ malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
+
+ arena_decay_ticks(tsdn, arena, 1);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
+ tcache_t *tcache, bool slow_path) {
+ assert(!tsdn_null(tsdn) || tcache == NULL);
+ assert(size != 0);
+
+ if (likely(tcache != NULL)) {
+ if (likely(size <= SMALL_MAXCLASS)) {
+ return tcache_alloc_small(tsdn_tsd(tsdn), arena,
+ tcache, size, ind, zero, slow_path);
+ }
+ if (likely(size <= tcache_maxclass)) {
+ return tcache_alloc_large(tsdn_tsd(tsdn), arena,
+ tcache, size, ind, zero, slow_path);
+ }
+ /* (size > tcache_maxclass) case falls through. */
+ assert(size > tcache_maxclass);
+ }
+
+ return arena_malloc_hard(tsdn, arena, size, ind, zero);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_aalloc(tsdn_t *tsdn, const void *ptr) {
+ return extent_arena_get(iealloc(tsdn, ptr));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_salloc(tsdn_t *tsdn, const void *ptr) {
+ assert(ptr != NULL);
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true);
+ assert(szind != NSIZES);
+
+ return sz_index2size(szind);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
+ /*
+ * Return 0 if ptr is not within an extent managed by jemalloc. This
+ * function has two extra costs relative to isalloc():
+ * - The rtree calls cannot claim to be dependent lookups, which induces
+ * rtree lookup load dependencies.
+ * - The lookup may fail, so there is an extra branch to check for
+ * failure.
+ */
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ extent_t *extent;
+ szind_t szind;
+ if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, false, &extent, &szind)) {
+ return 0;
+ }
+
+ if (extent == NULL) {
+ return 0;
+ }
+ assert(extent_state_get(extent) == extent_state_active);
+ /* Only slab members should be looked up via interior pointers. */
+ assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+
+ assert(szind != NSIZES);
+
+ return sz_index2size(szind);
+}
+
+static inline void
+arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
+ assert(ptr != NULL);
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ szind_t szind;
+ bool slab;
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+ true, &szind, &slab);
+
+ if (config_debug) {
+ extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+ rtree_ctx, (uintptr_t)ptr, true);
+ assert(szind == extent_szind_get(extent));
+ assert(szind < NSIZES);
+ assert(slab == extent_slab_get(extent));
+ }
+
+ if (likely(slab)) {
+ /* Small allocation. */
+ arena_dalloc_small(tsdn, ptr);
+ } else {
+ extent_t *extent = iealloc(tsdn, ptr);
+ large_dalloc(tsdn, extent);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+ alloc_ctx_t *alloc_ctx, bool slow_path) {
+ assert(!tsdn_null(tsdn) || tcache == NULL);
+ assert(ptr != NULL);
+
+ if (unlikely(tcache == NULL)) {
+ arena_dalloc_no_tcache(tsdn, ptr);
+ return;
+ }
+
+ szind_t szind;
+ bool slab;
+ rtree_ctx_t *rtree_ctx;
+ if (alloc_ctx != NULL) {
+ szind = alloc_ctx->szind;
+ slab = alloc_ctx->slab;
+ assert(szind != NSIZES);
+ } else {
+ rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &szind, &slab);
+ }
+
+ if (config_debug) {
+ rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+ extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+ rtree_ctx, (uintptr_t)ptr, true);
+ assert(szind == extent_szind_get(extent));
+ assert(szind < NSIZES);
+ assert(slab == extent_slab_get(extent));
+ }
+
+ if (likely(slab)) {
+ /* Small allocation. */
+ tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+ slow_path);
+ } else {
+ if (szind < nhbins) {
+ if (config_prof && unlikely(szind < NBINS)) {
+ arena_dalloc_promoted(tsdn, ptr, tcache,
+ slow_path);
+ } else {
+ tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+ szind, slow_path);
+ }
+ } else {
+ extent_t *extent = iealloc(tsdn, ptr);
+ large_dalloc(tsdn, extent);
+ }
+ }
+}
+
+static inline void
+arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
+ assert(ptr != NULL);
+ assert(size <= LARGE_MAXCLASS);
+
+ szind_t szind;
+ bool slab;
+ if (!config_prof || !opt_prof) {
+ /*
+ * There is no risk of being confused by a promoted sampled
+ * object, so base szind and slab on the given size.
+ */
+ szind = sz_size2index(size);
+ slab = (szind < NBINS);
+ }
+
+ if ((config_prof && opt_prof) || config_debug) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+ &rtree_ctx_fallback);
+
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &szind, &slab);
+
+ assert(szind == sz_size2index(size));
+ assert((config_prof && opt_prof) || slab == (szind < NBINS));
+
+ if (config_debug) {
+ extent_t *extent = rtree_extent_read(tsdn,
+ &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+ assert(szind == extent_szind_get(extent));
+ assert(slab == extent_slab_get(extent));
+ }
+ }
+
+ if (likely(slab)) {
+ /* Small allocation. */
+ arena_dalloc_small(tsdn, ptr);
+ } else {
+ extent_t *extent = iealloc(tsdn, ptr);
+ large_dalloc(tsdn, extent);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+ alloc_ctx_t *alloc_ctx, bool slow_path) {
+ assert(!tsdn_null(tsdn) || tcache == NULL);
+ assert(ptr != NULL);
+ assert(size <= LARGE_MAXCLASS);
+
+ if (unlikely(tcache == NULL)) {
+ arena_sdalloc_no_tcache(tsdn, ptr, size);
+ return;
+ }
+
+ szind_t szind;
+ bool slab;
+ UNUSED alloc_ctx_t local_ctx;
+ if (config_prof && opt_prof) {
+ if (alloc_ctx == NULL) {
+ /* Uncommon case and should be a static check. */
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+ &rtree_ctx_fallback);
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &local_ctx.szind,
+ &local_ctx.slab);
+ assert(local_ctx.szind == sz_size2index(size));
+ alloc_ctx = &local_ctx;
+ }
+ slab = alloc_ctx->slab;
+ szind = alloc_ctx->szind;
+ } else {
+ /*
+ * There is no risk of being confused by a promoted sampled
+ * object, so base szind and slab on the given size.
+ */
+ szind = sz_size2index(size);
+ slab = (szind < NBINS);
+ }
+
+ if (config_debug) {
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &szind, &slab);
+ extent_t *extent = rtree_extent_read(tsdn,
+ &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+ assert(szind == extent_szind_get(extent));
+ assert(slab == extent_slab_get(extent));
+ }
+
+ if (likely(slab)) {
+ /* Small allocation. */
+ tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+ slow_path);
+ } else {
+ if (szind < nhbins) {
+ if (config_prof && unlikely(szind < NBINS)) {
+ arena_dalloc_promoted(tsdn, ptr, tcache,
+ slow_path);
+ } else {
+ tcache_dalloc_large(tsdn_tsd(tsdn),
+ tcache, ptr, szind, slow_path);
+ }
+ } else {
+ extent_t *extent = iealloc(tsdn, ptr);
+ large_dalloc(tsdn, extent);
+ }
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_stats.h b/deps/jemalloc/include/jemalloc/internal/arena_stats.h
new file mode 100644
index 000000000..5f3dca8b1
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_stats.h
@@ -0,0 +1,237 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
+#define JEMALLOC_INTERNAL_ARENA_STATS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/size_classes.h"
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
+typedef struct arena_stats_large_s arena_stats_large_t;
+struct arena_stats_large_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the arena.
+ */
+ arena_stats_u64_t nmalloc;
+ arena_stats_u64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to this size class.
+ * This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ arena_stats_u64_t nrequests; /* Partially derived. */
+
+ /* Current number of allocations of this size class. */
+ size_t curlextents; /* Derived. */
+};
+
+typedef struct arena_stats_decay_s arena_stats_decay_t;
+struct arena_stats_decay_s {
+ /* Total number of purge sweeps. */
+ arena_stats_u64_t npurge;
+ /* Total number of madvise calls made. */
+ arena_stats_u64_t nmadvise;
+ /* Total number of pages purged. */
+ arena_stats_u64_t purged;
+};
+
+/*
+ * Arena stats. Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
+typedef struct arena_stats_s arena_stats_t;
+struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_t mtx;
+#endif
+
+ /* Number of bytes currently mapped, excluding retained memory. */
+ atomic_zu_t mapped; /* Partially derived. */
+
+ /*
+ * Number of unused virtual memory bytes currently retained. Retained
+ * bytes are technically mapped (though always decommitted or purged),
+ * but they are excluded from the mapped statistic (above).
+ */
+ atomic_zu_t retained; /* Derived. */
+
+ arena_stats_decay_t decay_dirty;
+ arena_stats_decay_t decay_muzzy;
+
+ atomic_zu_t base; /* Derived. */
+ atomic_zu_t internal;
+ atomic_zu_t resident; /* Derived. */
+ atomic_zu_t metadata_thp;
+
+ atomic_zu_t allocated_large; /* Derived. */
+ arena_stats_u64_t nmalloc_large; /* Derived. */
+ arena_stats_u64_t ndalloc_large; /* Derived. */
+ arena_stats_u64_t nrequests_large; /* Derived. */
+
+ /* Number of bytes cached in tcache associated with this arena. */
+ atomic_zu_t tcache_bytes; /* Derived. */
+
+ mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
+
+ /* One element for each large size class. */
+ arena_stats_large_t lstats[NSIZES - NBINS];
+
+ /* Arena uptime. */
+ nstime_t uptime;
+};
+
+static inline bool
+arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) {
+ if (config_debug) {
+ for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+ assert(((char *)arena_stats)[i] == 0);
+ }
+ }
+#ifndef JEMALLOC_ATOMIC_U64
+ if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+ WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+#endif
+ /* Memory is zeroed, so there is no need to clear stats. */
+ return false;
+}
+
+static inline void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ return *p;
+#endif
+}
+
+static inline void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ *p += x;
+#endif
+}
+
+UNUSED static inline void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ *p -= x;
+ assert(*p + x >= *p);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src. *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+ uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+ atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+ *dst += src;
+#endif
+}
+
+static inline size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_zu(p, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ return atomic_load_zu(p, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+ size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+ atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+ size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+ size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+ atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+ size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+ atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+ szind_t szind, uint64_t nrequests) {
+ arena_stats_lock(tsdn, arena_stats);
+ arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+ NBINS].nrequests, nrequests);
+ arena_stats_unlock(tsdn, arena_stats);
+}
+
+static inline void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+ arena_stats_lock(tsdn, arena_stats);
+ arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+ arena_stats_unlock(tsdn, arena_stats);
+}
+
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h
new file mode 100644
index 000000000..46aa77c88
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+
+#include "jemalloc/internal/bitmap.h"
+
+struct arena_slab_data_s {
+ /* Per region allocated/deallocated bitmap. */
+ bitmap_t bitmap[BITMAP_GROUPS_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
new file mode 100644
index 000000000..38bc95962
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
@@ -0,0 +1,229 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
+#include "jemalloc/internal/ticker.h"
+
+struct arena_decay_s {
+ /* Synchronizes all non-atomic fields. */
+ malloc_mutex_t mtx;
+ /*
+ * True if a thread is currently purging the extents associated with
+ * this decay structure.
+ */
+ bool purging;
+ /*
+ * Approximate time in milliseconds from the creation of a set of unused
+ * dirty pages until an equivalent set of unused dirty pages is purged
+ * and/or reused.
+ */
+ atomic_zd_t time_ms;
+ /* time / SMOOTHSTEP_NSTEPS. */
+ nstime_t interval;
+ /*
+ * Time at which the current decay interval logically started. We do
+ * not actually advance to a new epoch until sometime after it starts
+ * because of scheduling and computation delays, and it is even possible
+ * to completely skip epochs. In all cases, during epoch advancement we
+ * merge all relevant activity into the most recently recorded epoch.
+ */
+ nstime_t epoch;
+ /* Deadline randomness generator. */
+ uint64_t jitter_state;
+ /*
+ * Deadline for current epoch. This is the sum of interval and per
+ * epoch jitter which is a uniform random variable in [0..interval).
+ * Epochs always advance by precise multiples of interval, but we
+ * randomize the deadline to reduce the likelihood of arenas purging in
+ * lockstep.
+ */
+ nstime_t deadline;
+ /*
+ * Number of unpurged pages at beginning of current epoch. During epoch
+ * advancement we use the delta between arena->decay_*.nunpurged and
+ * extents_npages_get(&arena->extents_*) to determine how many dirty
+ * pages, if any, were generated.
+ */
+ size_t nunpurged;
+ /*
+ * Trailing log of how many unused dirty pages were generated during
+ * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+ * element is the most recent epoch. Corresponding epoch times are
+ * relative to epoch.
+ */
+ size_t backlog[SMOOTHSTEP_NSTEPS];
+
+ /*
+ * Pointer to associated stats. These stats are embedded directly in
+ * the arena's stats due to how stats structures are shared between the
+ * arena and ctl code.
+ *
+ * Synchronization: Same as associated arena's stats field. */
+ arena_stats_decay_t *stats;
+ /* Peak number of pages in associated extents. Used for debug only. */
+ uint64_t ceil_npages;
+};
+
+struct arena_s {
+ /*
+ * Number of threads currently assigned to this arena. Each thread has
+ * two distinct assignments, one for application-serving allocation, and
+ * the other for internal metadata allocation. Internal metadata must
+ * not be allocated from arenas explicitly created via the arenas.create
+ * mallctl, because the arena.<i>.reset mallctl indiscriminately
+ * discards all allocations for the affected arena.
+ *
+ * 0: Application allocation.
+ * 1: Internal metadata allocation.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_u_t nthreads[2];
+
+ /*
+ * When percpu_arena is enabled, to amortize the cost of reading /
+ * updating the current CPU id, track the most recent thread accessing
+ * this arena, and only read CPU if there is a mismatch.
+ */
+ tsdn_t *last_thd;
+
+ /* Synchronization: internal. */
+ arena_stats_t stats;
+
+ /*
+ * Lists of tcaches and cache_bin_array_descriptors for extant threads
+ * associated with this arena. Stats from these are merged
+ * incrementally, and at exit if opt_stats_print is enabled.
+ *
+ * Synchronization: tcache_ql_mtx.
+ */
+ ql_head(tcache_t) tcache_ql;
+ ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+ malloc_mutex_t tcache_ql_mtx;
+
+ /* Synchronization: internal. */
+ prof_accum_t prof_accum;
+ uint64_t prof_accumbytes;
+
+ /*
+ * PRNG state for cache index randomization of large allocation base
+ * pointers.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_zu_t offset_state;
+
+ /*
+ * Extent serial number generator state.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_zu_t extent_sn_next;
+
+ /*
+ * Represents a dss_prec_t, but atomically.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_u_t dss_prec;
+
+ /*
+ * Number of pages in active extents.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_zu_t nactive;
+
+ /*
+ * Extant large allocations.
+ *
+ * Synchronization: large_mtx.
+ */
+ extent_list_t large;
+ /* Synchronizes all large allocation/update/deallocation. */
+ malloc_mutex_t large_mtx;
+
+ /*
+ * Collections of extents that were previously allocated. These are
+ * used when allocating extents, in an attempt to re-use address space.
+ *
+ * Synchronization: internal.
+ */
+ extents_t extents_dirty;
+ extents_t extents_muzzy;
+ extents_t extents_retained;
+
+ /*
+ * Decay-based purging state, responsible for scheduling extent state
+ * transitions.
+ *
+ * Synchronization: internal.
+ */
+ arena_decay_t decay_dirty; /* dirty --> muzzy */
+ arena_decay_t decay_muzzy; /* muzzy --> retained */
+
+ /*
+ * Next extent size class in a growing series to use when satisfying a
+ * request via the extent hooks (only if opt_retain). This limits the
+ * number of disjoint virtual memory ranges so that extent merging can
+ * be effective even if multiple arenas' extent allocation requests are
+ * highly interleaved.
+ *
+ * retain_grow_limit is the max allowed size ind to expand (unless the
+ * required size is greater). Default is no limit, and controlled
+ * through mallctl only.
+ *
+ * Synchronization: extent_grow_mtx
+ */
+ pszind_t extent_grow_next;
+ pszind_t retain_grow_limit;
+ malloc_mutex_t extent_grow_mtx;
+
+ /*
+ * Available extent structures that were allocated via
+ * base_alloc_extent().
+ *
+ * Synchronization: extent_avail_mtx.
+ */
+ extent_tree_t extent_avail;
+ malloc_mutex_t extent_avail_mtx;
+
+ /*
+ * bins is used to store heaps of free regions.
+ *
+ * Synchronization: internal.
+ */
+ bin_t bins[NBINS];
+
+ /*
+ * Base allocator, from which arena metadata are allocated.
+ *
+ * Synchronization: internal.
+ */
+ base_t *base;
+ /* Used to determine uptime. Read-only after initialization. */
+ nstime_t create_time;
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+ ticker_t decay_ticker;
+};
+
+/* Used to pass rtree lookup context down the path. */
+struct alloc_ctx_s {
+ szind_t szind;
+ bool slab;
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/arena_types.h b/deps/jemalloc/include/jemalloc/internal/arena_types.h
new file mode 100644
index 000000000..70001b5f1
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/arena_types.h
@@ -0,0 +1,43 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
+#define JEMALLOC_INTERNAL_ARENA_TYPES_H
+
+/* Maximum number of regions in one slab. */
+#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN)
+#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
+
+/* Default decay times in milliseconds. */
+#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000)
+/* Number of event ticks between time checks. */
+#define DECAY_NTICKS_PER_UPDATE 1000
+
+typedef struct arena_slab_data_s arena_slab_data_t;
+typedef struct arena_decay_s arena_decay_t;
+typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
+typedef struct alloc_ctx_s alloc_ctx_t;
+
+typedef enum {
+ percpu_arena_mode_names_base = 0, /* Used for options processing. */
+
+ /*
+ * *_uninit are used only during bootstrapping, and must correspond
+ * to initialized variant plus percpu_arena_mode_enabled_base.
+ */
+ percpu_arena_uninit = 0,
+ per_phycpu_arena_uninit = 1,
+
+ /* All non-disabled modes must come after percpu_arena_disabled. */
+ percpu_arena_disabled = 2,
+
+ percpu_arena_mode_names_limit = 3, /* Used for options processing. */
+ percpu_arena_mode_enabled_base = 3,
+
+ percpu_arena = 3,
+ per_phycpu_arena = 4 /* Hyper threads share arena. */
+} percpu_arena_mode_t;
+
+#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
+
+#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/assert.h b/deps/jemalloc/include/jemalloc/internal/assert.h
new file mode 100644
index 000000000..be4d45b32
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/assert.h
@@ -0,0 +1,56 @@
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define assert(e) do { \
+ if (unlikely(config_debug && !(e))) { \
+ malloc_printf( \
+ "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
+ __FILE__, __LINE__, #e); \
+ abort(); \
+ } \
+} while (0)
+#endif
+
+#ifndef not_reached
+#define not_reached() do { \
+ if (config_debug) { \
+ malloc_printf( \
+ "<jemalloc>: %s:%d: Unreachable code reached\n", \
+ __FILE__, __LINE__); \
+ abort(); \
+ } \
+ unreachable(); \
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define not_implemented() do { \
+ if (config_debug) { \
+ malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \
+ __FILE__, __LINE__); \
+ abort(); \
+ } \
+} while (0)
+#endif
+
+#ifndef assert_not_implemented
+#define assert_not_implemented(e) do { \
+ if (unlikely(config_debug && !(e))) { \
+ not_implemented(); \
+ } \
+} while (0)
+#endif
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#ifndef cassert
+#define cassert(c) do { \
+ if (unlikely(!(c))) { \
+ not_reached(); \
+ } \
+} while (0)
+#endif
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h
index 11a7b47fe..adadb1a3a 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@@ -1,304 +1,77 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#define atomic_read_uint64(p) atomic_add_uint64(p, 0)
-#define atomic_read_uint32(p) atomic_add_uint32(p, 0)
-#define atomic_read_z(p) atomic_add_z(p, 0)
-#define atomic_read_u(p) atomic_add_u(p, 0)
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-uint64_t atomic_add_uint64(uint64_t *p, uint64_t x);
-uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x);
-uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
-uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
-size_t atomic_add_z(size_t *p, size_t x);
-size_t atomic_sub_z(size_t *p, size_t x);
-unsigned atomic_add_u(unsigned *p, unsigned x);
-unsigned atomic_sub_u(unsigned *p, unsigned x);
+#ifndef JEMALLOC_INTERNAL_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_H
+
+#define ATOMIC_INLINE static inline
+
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
+# include "jemalloc/internal/atomic_gcc_atomic.h"
+#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
+# include "jemalloc/internal/atomic_gcc_sync.h"
+#elif defined(_MSC_VER)
+# include "jemalloc/internal/atomic_msvc.h"
+#elif defined(JEMALLOC_C11_ATOMICS)
+# include "jemalloc/internal/atomic_c11.h"
+#else
+# error "Don't have atomics implemented on this platform."
#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
-/******************************************************************************/
-/* 64-bit operations. */
+/*
+ * This header gives more or less a backport of C11 atomics. The user can write
+ * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
+ * counterparts of the C11 atomic functions for type, as so:
+ * JEMALLOC_GENERATE_ATOMICS(int *, pi, 3);
+ * and then write things like:
+ * int *some_ptr;
+ * atomic_pi_t atomic_ptr_to_int;
+ * atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED);
+ * int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL);
+ * assert(some_ptr == prev_value);
+ * and expect things to work in the obvious way.
+ *
+ * Also included (with naming differences to avoid conflicts with the standard
+ * library):
+ * atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence).
+ * ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT).
+ */
+
+/*
+ * Pure convenience, so that we don't have to type "atomic_memory_order_"
+ * quite so often.
+ */
+#define ATOMIC_RELAXED atomic_memory_order_relaxed
+#define ATOMIC_ACQUIRE atomic_memory_order_acquire
+#define ATOMIC_RELEASE atomic_memory_order_release
+#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel
+#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
+
+/*
+ * Not all platforms have 64-bit atomics. If we do, this #define exposes that
+ * fact.
+ */
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-# ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- return (__sync_add_and_fetch(p, x));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- return (__sync_sub_and_fetch(p, x));
-}
-#elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- return (InterlockedExchangeAdd64(p, x));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- return (InterlockedExchangeAdd64(p, -((int64_t)x)));
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
-}
-# elif (defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- x = (uint64_t)(-(int64_t)x);
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
-}
-# elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- /*
- * atomic_fetchadd_64() doesn't exist, but we only ever use this
- * function on LP64 systems, so atomic_fetchadd_long() will do.
- */
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return (atomic_fetchadd_long(p, (unsigned long)x) + x);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
-}
-# elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
-JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
-{
-
- return (__sync_add_and_fetch(p, x));
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
-{
-
- return (__sync_sub_and_fetch(p, x));
-}
-# else
-# error "Missing implementation for 64-bit atomic operations"
-# endif
+# define JEMALLOC_ATOMIC_U64
#endif
-/******************************************************************************/
-/* 32-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
+JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
- return (__sync_add_and_fetch(p, x));
-}
+/*
+ * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only
+ * platform that actually needs to know the size, MSVC.
+ */
+JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
+JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
- return (__sync_sub_and_fetch(p, x));
-}
-#elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
+JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
- return (InterlockedExchangeAdd(p, x));
-}
+JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
+JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
- return (InterlockedExchangeAdd(p, -((int32_t)x)));
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
- return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
- return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
-}
-#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
- x = (uint32_t)(-(int32_t)x);
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (x), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return (x);
-}
-#elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
- return (atomic_fetchadd_32(p, x) + x);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
- return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
-}
-#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
-JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
-{
-
- return (__sync_add_and_fetch(p, x));
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
-{
-
- return (__sync_sub_and_fetch(p, x));
-}
-#else
-# error "Missing implementation for 32-bit atomic operations"
-#endif
-
-/******************************************************************************/
-/* size_t operations. */
-JEMALLOC_INLINE size_t
-atomic_add_z(size_t *p, size_t x)
-{
-
-#if (LG_SIZEOF_PTR == 3)
- return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
- return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE size_t
-atomic_sub_z(size_t *p, size_t x)
-{
-
-#if (LG_SIZEOF_PTR == 3)
- return ((size_t)atomic_add_uint64((uint64_t *)p,
- (uint64_t)-((int64_t)x)));
-#elif (LG_SIZEOF_PTR == 2)
- return ((size_t)atomic_add_uint32((uint32_t *)p,
- (uint32_t)-((int32_t)x)));
+#ifdef JEMALLOC_ATOMIC_U64
+JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
#endif
-}
-/******************************************************************************/
-/* unsigned operations. */
-JEMALLOC_INLINE unsigned
-atomic_add_u(unsigned *p, unsigned x)
-{
-
-#if (LG_SIZEOF_INT == 3)
- return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
-#elif (LG_SIZEOF_INT == 2)
- return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE unsigned
-atomic_sub_u(unsigned *p, unsigned x)
-{
-
-#if (LG_SIZEOF_INT == 3)
- return ((unsigned)atomic_add_uint64((uint64_t *)p,
- (uint64_t)-((int64_t)x)));
-#elif (LG_SIZEOF_INT == 2)
- return ((unsigned)atomic_add_uint32((uint32_t *)p,
- (uint32_t)-((int32_t)x)));
-#endif
-}
-/******************************************************************************/
-#endif
+#undef ATOMIC_INLINE
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_ATOMIC_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_c11.h b/deps/jemalloc/include/jemalloc/internal/atomic_c11.h
new file mode 100644
index 000000000..a5f9313a6
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_c11.h
@@ -0,0 +1,97 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
+#define JEMALLOC_INTERNAL_ATOMIC_C11_H
+
+#include <stdatomic.h>
+
+#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
+
+#define atomic_memory_order_t memory_order
+#define atomic_memory_order_relaxed memory_order_relaxed
+#define atomic_memory_order_acquire memory_order_acquire
+#define atomic_memory_order_release memory_order_release
+#define atomic_memory_order_acq_rel memory_order_acq_rel
+#define atomic_memory_order_seq_cst memory_order_seq_cst
+
+#define atomic_fence atomic_thread_fence
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef _Atomic(type) atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ /* \
+ * A strict interpretation of the C standard prevents \
+ * atomic_load from taking a const argument, but it's \
+ * convenient for our purposes. This cast is a workaround. \
+ */ \
+ atomic_##short_type##_t* a_nonconst = \
+ (atomic_##short_type##_t*)a; \
+ return atomic_load_explicit(a_nonconst, mo); \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ atomic_store_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return atomic_exchange_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return atomic_compare_exchange_weak_explicit(a, expected, \
+ desired, success_mo, failure_mo); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return atomic_compare_exchange_strong_explicit(a, expected, \
+ desired, success_mo, failure_mo); \
+}
+
+/*
+ * Integral types have some special operations available that non-integral ones
+ * lack.
+ */
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_add_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_sub_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_and_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_or_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_xor_explicit(a, val, mo); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
new file mode 100644
index 000000000..6b73a14f8
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -0,0 +1,127 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+
+#include "jemalloc/internal/assert.h"
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE int
+atomic_enum_to_builtin(atomic_memory_order_t mo) {
+ switch (mo) {
+ case atomic_memory_order_relaxed:
+ return __ATOMIC_RELAXED;
+ case atomic_memory_order_acquire:
+ return __ATOMIC_ACQUIRE;
+ case atomic_memory_order_release:
+ return __ATOMIC_RELEASE;
+ case atomic_memory_order_acq_rel:
+ return __ATOMIC_ACQ_REL;
+ case atomic_memory_order_seq_cst:
+ return __ATOMIC_SEQ_CST;
+ }
+ /* Can't happen; the switch is exhaustive. */
+ not_reached();
+}
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ __atomic_thread_fence(atomic_enum_to_builtin(mo));
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef struct { \
+ type repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ type result; \
+ __atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \
+ return result; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ __atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ type result; \
+ __atomic_exchange(&a->repr, &val, &result, \
+ atomic_enum_to_builtin(mo)); \
+ return result; \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return __atomic_compare_exchange(&a->repr, expected, &desired, \
+ true, atomic_enum_to_builtin(success_mo), \
+ atomic_enum_to_builtin(failure_mo)); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return __atomic_compare_exchange(&a->repr, expected, &desired, \
+ false, \
+ atomic_enum_to_builtin(success_mo), \
+ atomic_enum_to_builtin(failure_mo)); \
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_add(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_sub(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_and(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_or(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_xor(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
new file mode 100644
index 000000000..30846e4d2
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
@@ -0,0 +1,191 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ /* Easy cases first: no barrier, and full barrier. */
+ if (mo == atomic_memory_order_relaxed) {
+ asm volatile("" ::: "memory");
+ return;
+ }
+ if (mo == atomic_memory_order_seq_cst) {
+ asm volatile("" ::: "memory");
+ __sync_synchronize();
+ asm volatile("" ::: "memory");
+ return;
+ }
+ asm volatile("" ::: "memory");
+# if defined(__i386__) || defined(__x86_64__)
+ /* This is implicit on x86. */
+# elif defined(__ppc__)
+ asm volatile("lwsync");
+# elif defined(__sparc__) && defined(__arch64__)
+ if (mo == atomic_memory_order_acquire) {
+ asm volatile("membar #LoadLoad | #LoadStore");
+ } else if (mo == atomic_memory_order_release) {
+ asm volatile("membar #LoadStore | #StoreStore");
+ } else {
+ asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
+ }
+# else
+ __sync_synchronize();
+# endif
+ asm volatile("" ::: "memory");
+}
+
+/*
+ * A correct implementation of seq_cst loads and stores on weakly ordered
+ * architectures could do either of the following:
+ * 1. store() is weak-fence -> store -> strong fence, load() is load ->
+ * strong-fence.
+ * 2. store() is strong-fence -> store, load() is strong-fence -> load ->
+ * weak-fence.
+ * The tricky thing is, load() and store() above can be the load or store
+ * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
+ * means going with strategy 2.
+ * On strongly ordered architectures, the natural strategy is to stick a strong
+ * fence after seq_cst stores, and have naked loads. So we want the strong
+ * fences in different places on different architectures.
+ * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
+ * accomplish this.
+ */
+
+ATOMIC_INLINE void
+atomic_pre_sc_load_fence() {
+# if defined(__i386__) || defined(__x86_64__) || \
+ (defined(__sparc__) && defined(__arch64__))
+ atomic_fence(atomic_memory_order_relaxed);
+# else
+ atomic_fence(atomic_memory_order_seq_cst);
+# endif
+}
+
+ATOMIC_INLINE void
+atomic_post_sc_store_fence() {
+# if defined(__i386__) || defined(__x86_64__) || \
+ (defined(__sparc__) && defined(__arch64__))
+ atomic_fence(atomic_memory_order_seq_cst);
+# else
+ atomic_fence(atomic_memory_order_relaxed);
+# endif
+
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef struct { \
+ type volatile repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_pre_sc_load_fence(); \
+ } \
+ type result = a->repr; \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_acquire); \
+ } \
+ return result; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_release); \
+ } \
+ a->repr = val; \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_post_sc_store_fence(); \
+ } \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ /* \
+ * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
+ * an atomic exchange builtin. We fake it with a CAS loop. \
+ */ \
+ while (true) { \
+ type old = a->repr; \
+ if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
+ return old; \
+ } \
+ } \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
+ desired); \
+ if (prev == *expected) { \
+ return true; \
+ } else { \
+ *expected = prev; \
+ return false; \
+ } \
+} \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
+ desired); \
+ if (prev == *expected) { \
+ return true; \
+ } else { \
+ *expected = prev; \
+ return false; \
+ } \
+}
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_add(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_sub(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_and(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_or(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_xor(&a->repr, val); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h b/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h
new file mode 100644
index 000000000..67057ce50
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h
@@ -0,0 +1,158 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+typedef char atomic_repr_0_t;
+typedef short atomic_repr_1_t;
+typedef long atomic_repr_2_t;
+typedef __int64 atomic_repr_3_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ _ReadWriteBarrier();
+# if defined(_M_ARM) || defined(_M_ARM64)
+ /* ARM needs a barrier for everything but relaxed. */
+ if (mo != atomic_memory_order_relaxed) {
+ MemoryBarrier();
+ }
+# elif defined(_M_IX86) || defined (_M_X64)
+ /* x86 needs a barrier only for seq_cst. */
+ if (mo == atomic_memory_order_seq_cst) {
+ MemoryBarrier();
+ }
+# else
+# error "Don't know how to create atomics for this platform for MSVC."
+# endif
+ _ReadWriteBarrier();
+}
+
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+
+#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
+#define ATOMIC_RAW_CONCAT(a, b) a ## b
+
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \
+ base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \
+ ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+
+#define ATOMIC_INTERLOCKED_SUFFIX_0 8
+#define ATOMIC_INTERLOCKED_SUFFIX_1 16
+#define ATOMIC_INTERLOCKED_SUFFIX_2
+#define ATOMIC_INTERLOCKED_SUFFIX_3 64
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
+typedef struct { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_acquire); \
+ } \
+ return (type) ret; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_release); \
+ } \
+ a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_fence(atomic_memory_order_seq_cst); \
+ } \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \
+ lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) e = \
+ (ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \
+ ATOMIC_INTERLOCKED_REPR(lg_size) d = \
+ (ATOMIC_INTERLOCKED_REPR(lg_size))desired; \
+ ATOMIC_INTERLOCKED_REPR(lg_size) old = \
+ ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \
+ lg_size)(&a->repr, d, e); \
+ if (old == e) { \
+ return true; \
+ } else { \
+ *expected = (type)old; \
+ return false; \
+ } \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ /* We implement the weak version with strong semantics. */ \
+ return atomic_compare_exchange_weak_##short_type(a, expected, \
+ desired, success_mo, failure_mo); \
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \
+ lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ /* \
+ * MSVC warns on negation of unsigned operands, but for us it \
+ * gives exactly the right semantics (MAX_TYPE + 1 - operand). \
+ */ \
+ __pragma(warning(push)) \
+ __pragma(warning(disable: 4146)) \
+ return atomic_fetch_add_##short_type(a, -val, mo); \
+ __pragma(warning(pop)) \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
new file mode 100644
index 000000000..3209aa49f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
@@ -0,0 +1,33 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+
+extern bool opt_background_thread;
+extern size_t opt_max_background_threads;
+extern malloc_mutex_t background_thread_lock;
+extern atomic_b_t background_thread_enabled_state;
+extern size_t n_background_threads;
+extern size_t max_background_threads;
+extern background_thread_info_t *background_thread_info;
+extern bool can_enable_background_thread;
+
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
+bool background_threads_enable(tsd_t *tsd);
+bool background_threads_disable(tsd_t *tsd);
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+ arena_decay_t *decay, size_t npages_new);
+void background_thread_prefork0(tsdn_t *tsdn);
+void background_thread_prefork1(tsdn_t *tsdn);
+void background_thread_postfork_parent(tsdn_t *tsdn);
+void background_thread_postfork_child(tsdn_t *tsdn);
+bool background_thread_stats_read(tsdn_t *tsdn,
+ background_thread_stats_t *stats);
+void background_thread_ctl_init(tsdn_t *tsdn);
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
+ void *(*)(void *), void *__restrict);
+#endif
+bool background_thread_boot0(void);
+bool background_thread_boot1(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
new file mode 100644
index 000000000..ef50231e8
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
@@ -0,0 +1,57 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_enabled(void) {
+ return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set(tsdn_t *tsdn, bool state) {
+ malloc_mutex_assert_owner(tsdn, &background_thread_lock);
+ atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE background_thread_info_t *
+arena_background_thread_info_get(arena_t *arena) {
+ unsigned arena_ind = arena_ind_get(arena);
+ return &background_thread_info[arena_ind % ncpus];
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+background_thread_wakeup_time_get(background_thread_info_t *info) {
+ uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
+ assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
+ (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+ return next_wakeup;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
+ uint64_t wakeup_time) {
+ malloc_mutex_assert_owner(tsdn, &info->mtx);
+ atomic_store_b(&info->indefinite_sleep,
+ wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
+ nstime_init(&info->next_wakeup, wakeup_time);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_indefinite_sleep(background_thread_info_t *info) {
+ return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+ bool is_background_thread) {
+ if (!background_thread_enabled() || is_background_thread) {
+ return;
+ }
+ background_thread_info_t *info =
+ arena_background_thread_info_get(arena);
+ if (background_thread_indefinite_sleep(info)) {
+ background_thread_interval_check(tsdn, arena,
+ &arena->decay_dirty, 0);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
new file mode 100644
index 000000000..c1107dfe9
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
@@ -0,0 +1,53 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+
+/* This file really combines "structs" and "types", but only transitionally. */
+
+#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+# define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#endif
+
+#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
+#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
+
+typedef enum {
+ background_thread_stopped,
+ background_thread_started,
+ /* Thread waits on the global lock when paused (for arena_reset). */
+ background_thread_paused,
+} background_thread_state_t;
+
+struct background_thread_info_s {
+#ifdef JEMALLOC_BACKGROUND_THREAD
+ /* Background thread is pthread specific. */
+ pthread_t thread;
+ pthread_cond_t cond;
+#endif
+ malloc_mutex_t mtx;
+ background_thread_state_t state;
+ /* When true, it means no wakeup scheduled. */
+ atomic_b_t indefinite_sleep;
+ /* Next scheduled wakeup time (absolute time in ns). */
+ nstime_t next_wakeup;
+ /*
+ * Since the last background thread run, newly added number of pages
+ * that need to be purged by the next wakeup. This is adjusted on
+ * epoch advance, and is used to determine whether we should signal the
+ * background thread to wake up earlier.
+ */
+ size_t npages_to_purge_new;
+ /* Stats: total number of runs since started. */
+ uint64_t tot_n_runs;
+ /* Stats: total sleep time since started. */
+ nstime_t tot_sleep_time;
+};
+typedef struct background_thread_info_s background_thread_info_t;
+
+struct background_thread_stats_s {
+ size_t num_threads;
+ uint64_t num_runs;
+ nstime_t run_interval;
+};
+typedef struct background_thread_stats_s background_thread_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h
deleted file mode 100644
index 9cf75ffb0..000000000
--- a/deps/jemalloc/include/jemalloc/internal/base.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void *base_alloc(size_t size);
-void *base_calloc(size_t number, size_t size);
-extent_node_t *base_node_alloc(void);
-void base_node_dealloc(extent_node_t *node);
-bool base_boot(void);
-void base_prefork(void);
-void base_postfork_parent(void);
-void base_postfork_child(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/base_externs.h b/deps/jemalloc/include/jemalloc/internal/base_externs.h
new file mode 100644
index 000000000..7b705c9b4
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/base_externs.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
+#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
+
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
+
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void base_delete(tsdn_t *tsdn, base_t *base);
+extent_hooks_t *base_extent_hooks_get(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
+ extent_hooks_t *extent_hooks);
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+ size_t *resident, size_t *mapped, size_t *n_thp);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/base_inlines.h b/deps/jemalloc/include/jemalloc/internal/base_inlines.h
new file mode 100644
index 000000000..aec0e2e1e
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/base_inlines.h
@@ -0,0 +1,13 @@
+#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
+#define JEMALLOC_INTERNAL_BASE_INLINES_H
+
+static inline unsigned
+base_ind_get(const base_t *base) {
+ return base->ind;
+}
+
+static inline bool
+metadata_thp_enabled(void) {
+ return (opt_metadata_thp != metadata_thp_disabled);
+}
+#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/base_structs.h b/deps/jemalloc/include/jemalloc/internal/base_structs.h
new file mode 100644
index 000000000..2102247ac
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/base_structs.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
+#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/size_classes.h"
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+struct base_block_s {
+ /* Total size of block's virtual memory mapping. */
+ size_t size;
+
+ /* Next block in list of base's blocks. */
+ base_block_t *next;
+
+ /* Tracks unused trailing space. */
+ extent_t extent;
+};
+
+struct base_s {
+ /* Associated arena's index within the arenas array. */
+ unsigned ind;
+
+ /*
+ * User-configurable extent hook functions. Points to an
+ * extent_hooks_t.
+ */
+ atomic_p_t extent_hooks;
+
+ /* Protects base_alloc() and base_stats_get() operations. */
+ malloc_mutex_t mtx;
+
+ /* Using THP when true (metadata_thp auto mode). */
+ bool auto_thp_switched;
+ /*
+ * Most recent size class in the series of increasingly large base
+ * extents. Logarithmic spacing between subsequent allocations ensures
+ * that the total number of distinct mappings remains small.
+ */
+ pszind_t pind_last;
+
+ /* Serial number generation state. */
+ size_t extent_sn_next;
+
+ /* Chain of all blocks associated with base. */
+ base_block_t *blocks;
+
+ /* Heap of extents that track unused trailing space within blocks. */
+ extent_heap_t avail[NSIZES];
+
+ /* Stats, only maintained if config_stats. */
+ size_t allocated;
+ size_t resident;
+ size_t mapped;
+ /* Number of THP regions touched. */
+ size_t n_thp;
+};
+
+#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/base_types.h b/deps/jemalloc/include/jemalloc/internal/base_types.h
new file mode 100644
index 000000000..b6db77df7
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/base_types.h
@@ -0,0 +1,33 @@
+#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
+#define JEMALLOC_INTERNAL_BASE_TYPES_H
+
+typedef struct base_block_s base_block_t;
+typedef struct base_s base_t;
+
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block. a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD 2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
+typedef enum {
+ metadata_thp_disabled = 0,
+ /*
+ * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+ * + low usage arena (i.e. THP becomes a significant percentage), the
+ * "auto" option only starts using THP after a base allocator used up
+ * the first THP region. Starting from the second hugepage (in a single
+ * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+ * right away.
+ */
+ metadata_thp_auto = 1,
+ metadata_thp_always = 2,
+ metadata_thp_mode_limit = 3
+} metadata_thp_mode_t;
+
+#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bin.h b/deps/jemalloc/include/jemalloc/internal/bin.h
new file mode 100644
index 000000000..9b416ada7
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/bin.h
@@ -0,0 +1,106 @@
+#ifndef JEMALLOC_INTERNAL_BIN_H
+#define JEMALLOC_INTERNAL_BIN_H
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/bin_stats.h"
+
+/*
+ * A bin contains a set of extents that are currently being used for slab
+ * allocations.
+ */
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ * /--------------------\
+ * | region 0 |
+ * |--------------------|
+ * | region 1 |
+ * |--------------------|
+ * | ... |
+ * | ... |
+ * | ... |
+ * |--------------------|
+ * | region nregs-1 |
+ * \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+ /* Size of regions in a slab for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a slab for this bin's size class. */
+ size_t slab_size;
+
+ /* Total number of regions in a slab for this bin's size class. */
+ uint32_t nregs;
+
+ /*
+ * Metadata used to manipulate bitmaps for slabs associated with this
+ * bin.
+ */
+ bitmap_info_t bitmap_info;
+};
+
+extern const bin_info_t bin_infos[NBINS];
+
+
+typedef struct bin_s bin_t;
+struct bin_s {
+ /* All operations on bin_t fields require lock ownership. */
+ malloc_mutex_t lock;
+
+ /*
+ * Current slab being used to service allocations of this bin's size
+ * class. slabcur is independent of slabs_{nonfull,full}; whenever
+ * slabcur is reassigned, the previous slab must be deallocated or
+ * inserted into slabs_{nonfull,full}.
+ */
+ extent_t *slabcur;
+
+ /*
+ * Heap of non-full slabs. This heap is used to assure that new
+ * allocations come from the non-full slab that is oldest/lowest in
+ * memory.
+ */
+ extent_heap_t slabs_nonfull;
+
+ /* List used to track full slabs. */
+ extent_list_t slabs_full;
+
+ /* Bin statistics. */
+ bin_stats_t stats;
+};
+
+/* Initializes a bin to empty. Returns true on error. */
+bool bin_init(bin_t *bin);
+
+/* Forking. */
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
+
+/* Stats. */
+static inline void
+bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
+ malloc_mutex_lock(tsdn, &bin->lock);
+ malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+ dst_bin_stats->nmalloc += bin->stats.nmalloc;
+ dst_bin_stats->ndalloc += bin->stats.ndalloc;
+ dst_bin_stats->nrequests += bin->stats.nrequests;
+ dst_bin_stats->curregs += bin->stats.curregs;
+ dst_bin_stats->nfills += bin->stats.nfills;
+ dst_bin_stats->nflushes += bin->stats.nflushes;
+ dst_bin_stats->nslabs += bin->stats.nslabs;
+ dst_bin_stats->reslabs += bin->stats.reslabs;
+ dst_bin_stats->curslabs += bin->stats.curslabs;
+ malloc_mutex_unlock(tsdn, &bin->lock);
+}
+
+#endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bin_stats.h b/deps/jemalloc/include/jemalloc/internal/bin_stats.h
new file mode 100644
index 000000000..86e673ec4
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/bin_stats.h
@@ -0,0 +1,51 @@
+#ifndef JEMALLOC_INTERNAL_BIN_STATS_H
+#define JEMALLOC_INTERNAL_BIN_STATS_H
+
+#include "jemalloc/internal/mutex_prof.h"
+
+typedef struct bin_stats_s bin_stats_t;
+struct bin_stats_s {
+ /*
+ * Total number of allocation/deallocation requests served directly by
+ * the bin. Note that tcache may allocate an object, then recycle it
+ * many times, resulting many increments to nrequests, but only one
+ * each to nmalloc and ndalloc.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to the size of this
+ * bin. This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
+ */
+ uint64_t nrequests;
+
+ /*
+ * Current number of regions of this size class, including regions
+ * currently cached by tcache.
+ */
+ size_t curregs;
+
+ /* Number of tcache fills from this bin. */
+ uint64_t nfills;
+
+ /* Number of tcache flushes to this bin. */
+ uint64_t nflushes;
+
+ /* Total number of slabs created for this bin's size class. */
+ uint64_t nslabs;
+
+ /*
+ * Total number of slabs reused by extracting them from the slabs heap
+ * for this bin's size class.
+ */
+ uint64_t reslabs;
+
+ /* Current number of slabs in this bin. */
+ size_t curslabs;
+
+ mutex_prof_data_t mutex_data;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bit_util.h b/deps/jemalloc/include/jemalloc/internal/bit_util.h
new file mode 100644
index 000000000..8d078a8a3
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/bit_util.h
@@ -0,0 +1,165 @@
+#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
+#define JEMALLOC_INTERNAL_BIT_UTIL_H
+
+#include "jemalloc/internal/assert.h"
+
+#define BIT_UTIL_INLINE static inline
+
+/* Sanity check. */
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+ || !defined(JEMALLOC_INTERNAL_FFS)
+# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#endif
+
+
+BIT_UTIL_INLINE unsigned
+ffs_llu(unsigned long long bitmap) {
+ return JEMALLOC_INTERNAL_FFSLL(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_lu(unsigned long bitmap) {
+ return JEMALLOC_INTERNAL_FFSL(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u(unsigned bitmap) {
+ return JEMALLOC_INTERNAL_FFS(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_zu(size_t bitmap) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+ return ffs_u(bitmap);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+ return ffs_lu(bitmap);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+ return ffs_llu(bitmap);
+#else
+#error No implementation for size_t ffs()
+#endif
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u64(uint64_t bitmap) {
+#if LG_SIZEOF_LONG == 3
+ return ffs_lu(bitmap);
+#elif LG_SIZEOF_LONG_LONG == 3
+ return ffs_llu(bitmap);
+#else
+#error No implementation for 64-bit ffs()
+#endif
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u32(uint32_t bitmap) {
+#if LG_SIZEOF_INT == 2
+ return ffs_u(bitmap);
+#else
+#error No implementation for 32-bit ffs()
+#endif
+ return ffs_u(bitmap);
+}
+
+BIT_UTIL_INLINE uint64_t
+pow2_ceil_u64(uint64_t x) {
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ x |= x >> 32;
+ x++;
+ return x;
+}
+
+BIT_UTIL_INLINE uint32_t
+pow2_ceil_u32(uint32_t x) {
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ x++;
+ return x;
+}
+
+/* Compute the smallest power of 2 that is >= x. */
+BIT_UTIL_INLINE size_t
+pow2_ceil_zu(size_t x) {
+#if (LG_SIZEOF_PTR == 3)
+ return pow2_ceil_u64(x);
+#else
+ return pow2_ceil_u32(x);
+#endif
+}
+
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+ size_t ret;
+ assert(x != 0);
+
+ asm ("bsr %1, %0"
+ : "=r"(ret) // Outputs.
+ : "r"(x) // Inputs.
+ );
+ assert(ret < UINT_MAX);
+ return (unsigned)ret;
+}
+#elif (defined(_MSC_VER))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+ unsigned long ret;
+
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == 3)
+ _BitScanReverse64(&ret, x);
+#elif (LG_SIZEOF_PTR == 2)
+ _BitScanReverse(&ret, x);
+#else
+# error "Unsupported type size for lg_floor()"
+#endif
+ assert(ret < UINT_MAX);
+ return (unsigned)ret;
+}
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+ assert(x != 0);
+
+#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
+ return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
+#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+ return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
+#else
+# error "Unsupported type size for lg_floor()"
+#endif
+}
+#else
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+ assert(x != 0);
+
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+#if (LG_SIZEOF_PTR == 3)
+ x |= (x >> 32);
+#endif
+ if (x == SIZE_T_MAX) {
+ return (8 << LG_SIZEOF_PTR) - 1;
+ }
+ x++;
+ return ffs_zu(x) - 2;
+}
+#endif
+
+#undef BIT_UTIL_INLINE
+
+#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/bitmap.h b/deps/jemalloc/include/jemalloc/internal/bitmap.h
index 605ebac58..ac990290a 100644
--- a/deps/jemalloc/include/jemalloc/internal/bitmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/bitmap.h
@@ -1,37 +1,159 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_BITMAP_H
+#define JEMALLOC_INTERNAL_BITMAP_H
-/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/size_classes.h"
-typedef struct bitmap_level_s bitmap_level_t;
-typedef struct bitmap_info_s bitmap_info_t;
typedef unsigned long bitmap_t;
-#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
+#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
+/* Maximum bitmap bit count is determined by maximum regions per slab. */
+# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS
+#else
+/* Maximum bitmap bit count is determined by number of extent size classes. */
+# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES
+#endif
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
/* Number of bits per group. */
-#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
-#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS)
-#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
+#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1)
+
+/*
+ * Do some analysis on how big the bitmap is before we use a tree. For a brute
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
+ */
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
+# define BITMAP_USE_TREE
+#endif
+
+/* Number of groups required to store a given number of bits. */
+#define BITMAP_BITS2GROUPS(nbits) \
+ (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define BITMAP_GROUPS_L0(nbits) \
+ BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \
+ BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits) \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS( \
+ BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define BITMAP_GROUPS_1_LEVEL(nbits) \
+ BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits) \
+ (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits) \
+ (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits) \
+ (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits) \
+ (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#ifdef BITMAP_USE_TREE
-/* Maximum number of levels possible. */
-#define BITMAP_MAX_LEVELS \
- (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
- + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits)
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits)
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits)
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits)
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+# define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits)
+# define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#else
+# error "Unsupported bitmap size"
+#endif
+
+/*
+ * Maximum number of levels possible. This could be statically computed based
+ * on LG_BITMAP_MAXBITS:
+ *
+ * #define BITMAP_MAX_LEVELS \
+ * (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ * + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+ *
+ * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
+ * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
+ * various cascading macros. The only additional cost this incurs is some
+ * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
+ * are not impacted.
+ */
+#define BITMAP_MAX_LEVELS 5
+
+#define BITMAP_INFO_INITIALIZER(nbits) { \
+ /* nbits. */ \
+ nbits, \
+ /* nlevels. */ \
+ (BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) + \
+ (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) + \
+ (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) + \
+ (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1, \
+ /* levels. */ \
+ { \
+ {0}, \
+ {BITMAP_GROUPS_L0(nbits)}, \
+ {BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \
+ {BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) + \
+ BITMAP_GROUPS_L0(nbits)}, \
+ {BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) + \
+ BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)}, \
+ {BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) + \
+ BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) \
+ + BITMAP_GROUPS_L0(nbits)} \
+ } \
+}
+
+#else /* BITMAP_USE_TREE */
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+#define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
-struct bitmap_level_s {
+#define BITMAP_INFO_INITIALIZER(nbits) { \
+ /* nbits. */ \
+ nbits, \
+ /* ngroups. */ \
+ BITMAP_BITS2GROUPS(nbits) \
+}
+
+#endif /* BITMAP_USE_TREE */
+
+typedef struct bitmap_level_s {
/* Offset of this level's groups within the array of groups. */
size_t group_offset;
-};
+} bitmap_level_t;
-struct bitmap_info_s {
+typedef struct bitmap_info_s {
/* Logical number of bits in bitmap (stored at bottom level). */
size_t nbits;
+#ifdef BITMAP_USE_TREE
/* Number of levels necessary for nbits. */
unsigned nlevels;
@@ -40,67 +162,62 @@ struct bitmap_info_s {
* bottom to top (e.g. the bottom level is stored in levels[0]).
*/
bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-size_t bitmap_info_ngroups(const bitmap_info_t *binfo);
-size_t bitmap_size(size_t nbits);
-void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
-bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
-void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-#endif
+#else /* BITMAP_USE_TREE */
+ /* Number of groups necessary for nbits. */
+ size_t ngroups;
+#endif /* BITMAP_USE_TREE */
+} bitmap_info_t;
+
+void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
+size_t bitmap_size(const bitmap_info_t *binfo);
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
-JEMALLOC_INLINE bool
-bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
- unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+static inline bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+#ifdef BITMAP_USE_TREE
+ size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
bitmap_t rg = bitmap[rgoff];
/* The bitmap is full iff the root group is 0. */
return (rg == 0);
+#else
+ size_t i;
+
+ for (i = 0; i < binfo->ngroups; i++) {
+ if (bitmap[i] != 0) {
+ return false;
+ }
+ }
+ return true;
+#endif
}
-JEMALLOC_INLINE bool
-bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+static inline bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
size_t goff;
bitmap_t g;
assert(bit < binfo->nbits);
goff = bit >> LG_BITMAP_GROUP_NBITS;
g = bitmap[goff];
- return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+ return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
}
-JEMALLOC_INLINE void
-bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+static inline void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
size_t goff;
bitmap_t *gp;
bitmap_t g;
assert(bit < binfo->nbits);
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[goff];
g = *gp;
- assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
- g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+ g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
assert(bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
/* Propagate group state transitions up the tree. */
if (g == 0) {
unsigned i;
@@ -109,45 +226,113 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
goff = bit >> LG_BITMAP_GROUP_NBITS;
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
- assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
- g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+ g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- if (g != 0)
+ if (g != 0) {
break;
+ }
}
}
+#endif
+}
+
+/* ffu: find first unset >= bit. */
+static inline size_t
+bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
+ assert(min_bit < binfo->nbits);
+
+#ifdef BITMAP_USE_TREE
+ size_t bit = 0;
+ for (unsigned level = binfo->nlevels; level--;) {
+ size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
+ 1));
+ bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
+ >> lg_bits_per_group)];
+ unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit -
+ bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
+ assert(group_nmask <= BITMAP_GROUP_NBITS);
+ bitmap_t group_mask = ~((1LU << group_nmask) - 1);
+ bitmap_t group_masked = group & group_mask;
+ if (group_masked == 0LU) {
+ if (group == 0LU) {
+ return binfo->nbits;
+ }
+ /*
+ * min_bit was preceded by one or more unset bits in
+ * this group, but there are no other unset bits in this
+ * group. Try again starting at the first bit of the
+ * next sibling. This will recurse at most once per
+ * non-root level.
+ */
+ size_t sib_base = bit + (ZU(1) << lg_bits_per_group);
+ assert(sib_base > min_bit);
+ assert(sib_base > bit);
+ if (sib_base >= binfo->nbits) {
+ return binfo->nbits;
+ }
+ return bitmap_ffu(bitmap, binfo, sib_base);
+ }
+ bit += ((size_t)(ffs_lu(group_masked) - 1)) <<
+ (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+ }
+ assert(bit >= min_bit);
+ assert(bit < binfo->nbits);
+ return bit;
+#else
+ size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
+ bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
+ - 1);
+ size_t bit;
+ do {
+ bit = ffs_lu(g);
+ if (bit != 0) {
+ return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+ }
+ i++;
+ g = bitmap[i];
+ } while (i < binfo->ngroups);
+ return binfo->nbits;
+#endif
}
/* sfu: set first unset. */
-JEMALLOC_INLINE size_t
-bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+static inline size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
size_t bit;
bitmap_t g;
unsigned i;
- assert(bitmap_full(bitmap, binfo) == false);
+ assert(!bitmap_full(bitmap, binfo));
+#ifdef BITMAP_USE_TREE
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
- bit = ffsl(g) - 1;
+ bit = ffs_lu(g) - 1;
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
- bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+ bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
}
-
+#else
+ i = 0;
+ g = bitmap[0];
+ while ((bit = ffs_lu(g)) == 0) {
+ i++;
+ g = bitmap[i];
+ }
+ bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+#endif
bitmap_set(bitmap, binfo, bit);
- return (bit);
+ return bit;
}
-JEMALLOC_INLINE void
-bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+static inline void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
size_t goff;
bitmap_t *gp;
bitmap_t g;
- bool propagate;
+ UNUSED bool propagate;
assert(bit < binfo->nbits);
assert(bitmap_get(bitmap, binfo, bit));
@@ -155,10 +340,11 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
gp = &bitmap[goff];
g = *gp;
propagate = (g == 0);
- assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
- g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+ g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- assert(bitmap_get(bitmap, binfo, bit) == false);
+ assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
/* Propagate group state transitions up the tree. */
if (propagate) {
unsigned i;
@@ -168,17 +354,16 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
gp = &bitmap[binfo->levels[i].group_offset + goff];
g = *gp;
propagate = (g == 0);
- assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+ assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
== 0);
- g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+ g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
*gp = g;
- if (propagate == false)
+ if (!propagate) {
break;
+ }
}
}
+#endif /* BITMAP_USE_TREE */
}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_BITMAP_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/cache_bin.h b/deps/jemalloc/include/jemalloc/internal/cache_bin.h
new file mode 100644
index 000000000..12f3ef2dd
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/cache_bin.h
@@ -0,0 +1,114 @@
+#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
+#define JEMALLOC_INTERNAL_CACHE_BIN_H
+
+#include "jemalloc/internal/ql.h"
+
+/*
+ * The cache_bins are the mechanism that the tcache and the arena use to
+ * communicate. The tcache fills from and flushes to the arena by passing a
+ * cache_bin_t to fill/flush. When the arena needs to pull stats from the
+ * tcaches associated with it, it does so by iterating over its
+ * cache_bin_array_descriptor_t objects and reading out per-bin stats it
+ * contains. This makes it so that the arena need not know about the existence
+ * of the tcache at all.
+ */
+
+
+/*
+ * The count of the number of cached allocations in a bin. We make this signed
+ * so that negative numbers can encode "invalid" states (e.g. a low water mark
+ * of -1 for a cache that has been depleted).
+ */
+typedef int32_t cache_bin_sz_t;
+
+typedef struct cache_bin_stats_s cache_bin_stats_t;
+struct cache_bin_stats_s {
+ /*
+ * Number of allocation requests that corresponded to the size of this
+ * bin.
+ */
+ uint64_t nrequests;
+};
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+typedef struct cache_bin_info_s cache_bin_info_t;
+struct cache_bin_info_s {
+ /* Upper limit on ncached. */
+ cache_bin_sz_t ncached_max;
+};
+
+typedef struct cache_bin_s cache_bin_t;
+struct cache_bin_s {
+ /* Min # cached since last GC. */
+ cache_bin_sz_t low_water;
+ /* # of cached objects. */
+ cache_bin_sz_t ncached;
+ /*
+ * ncached and stats are both modified frequently. Let's keep them
+ * close so that they have a higher chance of being on the same
+ * cacheline, thus less write-backs.
+ */
+ cache_bin_stats_t tstats;
+ /*
+ * Stack of available objects.
+ *
+ * To make use of adjacent cacheline prefetch, the items in the avail
+ * stack goes to higher address for newer allocations. avail points
+ * just above the available space, which means that
+ * avail[-ncached, ... -1] are available items and the lowest item will
+ * be allocated first.
+ */
+ void **avail;
+};
+
+typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
+struct cache_bin_array_descriptor_s {
+ /*
+ * The arena keeps a list of the cache bins associated with it, for
+ * stats collection.
+ */
+ ql_elm(cache_bin_array_descriptor_t) link;
+ /* Pointers to the tcache bins. */
+ cache_bin_t *bins_small;
+ cache_bin_t *bins_large;
+};
+
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+ cache_bin_t *bins_small, cache_bin_t *bins_large) {
+ ql_elm_new(descriptor, link);
+ descriptor->bins_small = bins_small;
+ descriptor->bins_large = bins_large;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
+ void *ret;
+
+ if (unlikely(bin->ncached == 0)) {
+ bin->low_water = -1;
+ *success = false;
+ return NULL;
+ }
+ /*
+ * success (instead of ret) should be checked upon the return of this
+ * function. We avoid checking (ret == NULL) because there is never a
+ * null stored on the avail stack (which is unknown to the compiler),
+ * and eagerly checking ret would cause pipeline stall (waiting for the
+ * cacheline).
+ */
+ *success = true;
+ ret = *(bin->avail - bin->ncached);
+ bin->ncached--;
+
+ if (unlikely(bin->ncached < bin->low_water)) {
+ bin->low_water = bin->ncached;
+ }
+
+ return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h
deleted file mode 100644
index 87d8700da..000000000
--- a/deps/jemalloc/include/jemalloc/internal/chunk.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/*
- * Size and alignment of memory chunks that are allocated by the OS's virtual
- * memory system.
- */
-#define LG_CHUNK_DEFAULT 22
-
-/* Return the chunk address for allocation address a. */
-#define CHUNK_ADDR2BASE(a) \
- ((void *)((uintptr_t)(a) & ~chunksize_mask))
-
-/* Return the chunk offset of address a. */
-#define CHUNK_ADDR2OFFSET(a) \
- ((size_t)((uintptr_t)(a) & chunksize_mask))
-
-/* Return the smallest chunk multiple that is >= s. */
-#define CHUNK_CEILING(s) \
- (((s) + chunksize_mask) & ~chunksize_mask)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern size_t opt_lg_chunk;
-extern const char *opt_dss;
-
-/* Protects stats_chunks; currently not used for any other purpose. */
-extern malloc_mutex_t chunks_mtx;
-/* Chunk statistics. */
-extern chunk_stats_t stats_chunks;
-
-extern rtree_t *chunks_rtree;
-
-extern size_t chunksize;
-extern size_t chunksize_mask; /* (chunksize - 1). */
-extern size_t chunk_npages;
-extern size_t map_bias; /* Number of arena chunk header pages. */
-extern size_t arena_maxclass; /* Max size class for arenas. */
-
-void *chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
- dss_prec_t dss_prec);
-void chunk_unmap(void *chunk, size_t size);
-void chunk_dealloc(void *chunk, size_t size, bool unmap);
-bool chunk_boot(void);
-void chunk_prefork(void);
-void chunk_postfork_parent(void);
-void chunk_postfork_child(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
-#include "jemalloc/internal/chunk_dss.h"
-#include "jemalloc/internal/chunk_mmap.h"
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
deleted file mode 100644
index 4535ce09c..000000000
--- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef enum {
- dss_prec_disabled = 0,
- dss_prec_primary = 1,
- dss_prec_secondary = 2,
-
- dss_prec_limit = 3
-} dss_prec_t;
-#define DSS_PREC_DEFAULT dss_prec_secondary
-#define DSS_DEFAULT "secondary"
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-extern const char *dss_prec_names[];
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-dss_prec_t chunk_dss_prec_get(void);
-bool chunk_dss_prec_set(dss_prec_t dss_prec);
-void *chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
-bool chunk_in_dss(void *chunk);
-bool chunk_dss_boot(void);
-void chunk_dss_prefork(void);
-void chunk_dss_postfork_parent(void);
-void chunk_dss_postfork_child(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
deleted file mode 100644
index f24abac75..000000000
--- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-bool pages_purge(void *addr, size_t length);
-
-void *chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
-bool chunk_dealloc_mmap(void *chunk, size_t size);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h
index 58712a6a7..7b3850bc1 100644
--- a/deps/jemalloc/include/jemalloc/internal/ckh.h
+++ b/deps/jemalloc/include/jemalloc/internal/ckh.h
@@ -1,88 +1,101 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_CKH_H
+#define JEMALLOC_INTERNAL_CKH_H
-typedef struct ckh_s ckh_t;
-typedef struct ckhc_s ckhc_t;
+#include "jemalloc/internal/tsd.h"
-/* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
+/* Cuckoo hashing implementation. Skip to the end for the interface. */
+
+/******************************************************************************/
+/* INTERNAL DEFINITIONS -- IGNORE */
+/******************************************************************************/
/* Maintain counters used to get an idea of performance. */
-/* #define CKH_COUNT */
+/* #define CKH_COUNT */
/* Print counter values in ckh_delete() (requires CKH_COUNT). */
-/* #define CKH_VERBOSE */
+/* #define CKH_VERBOSE */
/*
* There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit
* one bucket per L1 cache line.
*/
-#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, size_t[2]);
+typedef bool ckh_keycomp_t (const void *, const void *);
/* Hash table cell. */
-struct ckhc_s {
- const void *key;
- const void *data;
-};
+typedef struct {
+ const void *key;
+ const void *data;
+} ckhc_t;
-struct ckh_s {
+/* The hash table itself. */
+typedef struct {
#ifdef CKH_COUNT
/* Counters used to get an idea of performance. */
- uint64_t ngrows;
- uint64_t nshrinks;
- uint64_t nshrinkfails;
- uint64_t ninserts;
- uint64_t nrelocs;
+ uint64_t ngrows;
+ uint64_t nshrinks;
+ uint64_t nshrinkfails;
+ uint64_t ninserts;
+ uint64_t nrelocs;
#endif
/* Used for pseudo-random number generation. */
-#define CKH_A 1103515241
-#define CKH_C 12347
- uint32_t prng_state;
+ uint64_t prng_state;
/* Total number of items. */
- size_t count;
+ size_t count;
/*
* Minimum and current number of hash table buckets. There are
* 2^LG_CKH_BUCKET_CELLS cells per bucket.
*/
- unsigned lg_minbuckets;
- unsigned lg_curbuckets;
+ unsigned lg_minbuckets;
+ unsigned lg_curbuckets;
/* Hash and comparison functions. */
- ckh_hash_t *hash;
- ckh_keycomp_t *keycomp;
+ ckh_hash_t *hash;
+ ckh_keycomp_t *keycomp;
/* Hash table with 2^lg_curbuckets buckets. */
- ckhc_t *tab;
-};
+ ckhc_t *tab;
+} ckh_t;
-#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
+/* BEGIN PUBLIC API */
+/******************************************************************************/
-bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+/* Lifetime management. Minitems is the initial capacity. */
+bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
ckh_keycomp_t *keycomp);
-void ckh_delete(ckh_t *ckh);
-size_t ckh_count(ckh_t *ckh);
-bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool ckh_insert(ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+void ckh_delete(tsd_t *tsd, ckh_t *ckh);
+
+/* Get the number of elements in the set. */
+size_t ckh_count(ckh_t *ckh);
+
+/*
+ * To iterate over the elements in the table, initialize *tabind to 0 and call
+ * this function until it returns true. Each call that returns false will
+ * update *key and *data to the next element in the table, assuming the pointers
+ * are non-NULL.
+ */
+bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+
+/*
+ * Basic hash table operations -- insert, removal, lookup. For ckh_remove and
+ * ckh_search, key or data can be NULL. The hash-table only stores pointers to
+ * the key and value, and doesn't do any lifetime management.
+ */
+bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
void **data);
-bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
-void ckh_string_hash(const void *key, size_t r_hash[2]);
-bool ckh_string_keycomp(const void *k1, const void *k2);
-void ckh_pointer_hash(const void *key, size_t r_hash[2]);
-bool ckh_pointer_keycomp(const void *k1, const void *k2);
+bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+/* Some useful hash and comparison functions for strings and pointers. */
+void ckh_string_hash(const void *key, size_t r_hash[2]);
+bool ckh_string_keycomp(const void *k1, const void *k2);
+void ckh_pointer_hash(const void *key, size_t r_hash[2]);
+bool ckh_pointer_keycomp(const void *k1, const void *k2);
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_CKH_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h
index 0ffecc5f2..d927d9480 100644
--- a/deps/jemalloc/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc/include/jemalloc/internal/ctl.h
@@ -1,87 +1,107 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct ctl_node_s ctl_node_t;
-typedef struct ctl_named_node_s ctl_named_node_t;
-typedef struct ctl_indexed_node_s ctl_indexed_node_t;
-typedef struct ctl_arena_stats_s ctl_arena_stats_t;
-typedef struct ctl_stats_s ctl_stats_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct ctl_node_s {
- bool named;
-};
-
-struct ctl_named_node_s {
- struct ctl_node_s node;
- const char *name;
+#ifndef JEMALLOC_INTERNAL_CTL_H
+#define JEMALLOC_INTERNAL_CTL_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
+
+/* Maximum ctl tree depth. */
+#define CTL_MAX_DEPTH 7
+
+typedef struct ctl_node_s {
+ bool named;
+} ctl_node_t;
+
+typedef struct ctl_named_node_s {
+ ctl_node_t node;
+ const char *name;
/* If (nchildren == 0), this is a terminal node. */
- unsigned nchildren;
- const ctl_node_t *children;
- int (*ctl)(const size_t *, size_t, void *, size_t *,
- void *, size_t);
-};
+ size_t nchildren;
+ const ctl_node_t *children;
+ int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *,
+ size_t);
+} ctl_named_node_t;
-struct ctl_indexed_node_s {
- struct ctl_node_s node;
- const ctl_named_node_t *(*index)(const size_t *, size_t, size_t);
-};
+typedef struct ctl_indexed_node_s {
+ struct ctl_node_s node;
+ const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t,
+ size_t);
+} ctl_indexed_node_t;
-struct ctl_arena_stats_s {
- bool initialized;
- unsigned nthreads;
- const char *dss;
- size_t pactive;
- size_t pdirty;
- arena_stats_t astats;
+typedef struct ctl_arena_stats_s {
+ arena_stats_t astats;
/* Aggregate stats for small size classes, based on bin stats. */
- size_t allocated_small;
- uint64_t nmalloc_small;
- uint64_t ndalloc_small;
- uint64_t nrequests_small;
-
- malloc_bin_stats_t bstats[NBINS];
- malloc_large_stats_t *lstats; /* nlclasses elements. */
+ size_t allocated_small;
+ uint64_t nmalloc_small;
+ uint64_t ndalloc_small;
+ uint64_t nrequests_small;
+
+ bin_stats_t bstats[NBINS];
+ arena_stats_large_t lstats[NSIZES - NBINS];
+} ctl_arena_stats_t;
+
+typedef struct ctl_stats_s {
+ size_t allocated;
+ size_t active;
+ size_t metadata;
+ size_t metadata_thp;
+ size_t resident;
+ size_t mapped;
+ size_t retained;
+
+ background_thread_stats_t background_thread;
+ mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes];
+} ctl_stats_t;
+
+typedef struct ctl_arena_s ctl_arena_t;
+struct ctl_arena_s {
+ unsigned arena_ind;
+ bool initialized;
+ ql_elm(ctl_arena_t) destroyed_link;
+
+ /* Basic stats, supported even if !config_stats. */
+ unsigned nthreads;
+ const char *dss;
+ ssize_t dirty_decay_ms;
+ ssize_t muzzy_decay_ms;
+ size_t pactive;
+ size_t pdirty;
+ size_t pmuzzy;
+
+ /* NULL if !config_stats. */
+ ctl_arena_stats_t *astats;
};
-struct ctl_stats_s {
- size_t allocated;
- size_t active;
- size_t mapped;
- struct {
- size_t current; /* stats_chunks.curchunks */
- uint64_t total; /* stats_chunks.nchunks */
- size_t high; /* stats_chunks.highchunks */
- } chunks;
- struct {
- size_t allocated; /* huge_allocated */
- uint64_t nmalloc; /* huge_nmalloc */
- uint64_t ndalloc; /* huge_ndalloc */
- } huge;
- unsigned narenas;
- ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen);
-int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
-
-int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+typedef struct ctl_arenas_s {
+ uint64_t epoch;
+ unsigned narenas;
+ ql_head(ctl_arena_t) destroyed;
+
+ /*
+ * Element 0 corresponds to merged stats for extant arenas (accessed via
+ * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for
+ * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the
+ * remaining MALLOCX_ARENA_LIMIT elements correspond to arenas.
+ */
+ ctl_arena_t *arenas[2 + MALLOCX_ARENA_LIMIT];
+} ctl_arenas_t;
+
+int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
void *newp, size_t newlen);
-bool ctl_boot(void);
-void ctl_prefork(void);
-void ctl_postfork_parent(void);
-void ctl_postfork_child(void);
+int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
-#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \
+int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen);
+bool ctl_boot(void);
+void ctl_prefork(tsdn_t *tsdn);
+void ctl_postfork_parent(tsdn_t *tsdn);
+void ctl_postfork_child(tsdn_t *tsdn);
+
+#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \
if (je_mallctl(name, oldp, oldlenp, newp, newlen) \
!= 0) { \
malloc_printf( \
@@ -91,7 +111,7 @@ void ctl_postfork_child(void);
} \
} while (0)
-#define xmallctlnametomib(name, mibp, miblenp) do { \
+#define xmallctlnametomib(name, mibp, miblenp) do { \
if (je_mallctlnametomib(name, mibp, miblenp) != 0) { \
malloc_printf("<jemalloc>: Failure in " \
"xmallctlnametomib(\"%s\", ...)\n", name); \
@@ -99,7 +119,7 @@ void ctl_postfork_child(void);
} \
} while (0)
-#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \
if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, \
newlen) != 0) { \
malloc_write( \
@@ -108,10 +128,4 @@ void ctl_postfork_child(void);
} \
} while (0)
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
+#endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/div.h b/deps/jemalloc/include/jemalloc/internal/div.h
new file mode 100644
index 000000000..aebae9398
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+ uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+ size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+ assert(n <= (uint32_t)-1);
+ /*
+ * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+ * the compilers I tried were all smart enough to turn this into the
+ * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+ * mul; mov edx eax; on x86, umull on arm, etc.).
+ */
+ size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+ assert(i * div_info->d == n);
+#endif
+ return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/emitter.h b/deps/jemalloc/include/jemalloc/internal/emitter.h
new file mode 100644
index 000000000..3a2b2f7f2
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/emitter.h
@@ -0,0 +1,435 @@
+#ifndef JEMALLOC_INTERNAL_EMITTER_H
+#define JEMALLOC_INTERNAL_EMITTER_H
+
+#include "jemalloc/internal/ql.h"
+
+typedef enum emitter_output_e emitter_output_t;
+enum emitter_output_e {
+ emitter_output_json,
+ emitter_output_table
+};
+
+typedef enum emitter_justify_e emitter_justify_t;
+enum emitter_justify_e {
+ emitter_justify_left,
+ emitter_justify_right,
+ /* Not for users; just to pass to internal functions. */
+ emitter_justify_none
+};
+
+typedef enum emitter_type_e emitter_type_t;
+enum emitter_type_e {
+ emitter_type_bool,
+ emitter_type_int,
+ emitter_type_unsigned,
+ emitter_type_uint32,
+ emitter_type_uint64,
+ emitter_type_size,
+ emitter_type_ssize,
+ emitter_type_string,
+ /*
+ * A title is a column title in a table; it's just a string, but it's
+ * not quoted.
+ */
+ emitter_type_title,
+};
+
+typedef struct emitter_col_s emitter_col_t;
+struct emitter_col_s {
+ /* Filled in by the user. */
+ emitter_justify_t justify;
+ int width;
+ emitter_type_t type;
+ union {
+ bool bool_val;
+ int int_val;
+ unsigned unsigned_val;
+ uint32_t uint32_val;
+ uint64_t uint64_val;
+ size_t size_val;
+ ssize_t ssize_val;
+ const char *str_val;
+ };
+
+ /* Filled in by initialization. */
+ ql_elm(emitter_col_t) link;
+};
+
+typedef struct emitter_row_s emitter_row_t;
+struct emitter_row_s {
+ ql_head(emitter_col_t) cols;
+};
+
+static inline void
+emitter_row_init(emitter_row_t *row) {
+ ql_new(&row->cols);
+}
+
+static inline void
+emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
+ ql_elm_new(col, link);
+ ql_tail_insert(&row->cols, col, link);
+}
+
+typedef struct emitter_s emitter_t;
+struct emitter_s {
+ emitter_output_t output;
+ /* The output information. */
+ void (*write_cb)(void *, const char *);
+ void *cbopaque;
+ int nesting_depth;
+ /* True if we've already emitted a value at the given depth. */
+ bool item_at_depth;
+};
+
+static inline void
+emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
+ void (*write_cb)(void *, const char *), void *cbopaque) {
+ emitter->output = emitter_output;
+ emitter->write_cb = write_cb;
+ emitter->cbopaque = cbopaque;
+ emitter->item_at_depth = false;
+ emitter->nesting_depth = 0;
+}
+
+/* Internal convenience function. Write to the emitter the given string. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static inline void
+emitter_printf(emitter_t *emitter, const char *format, ...) {
+ va_list ap;
+
+ va_start(ap, format);
+ malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+ va_end(ap);
+}
+
+/* Write to the emitter the given string, but only in table mode. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static inline void
+emitter_table_printf(emitter_t *emitter, const char *format, ...) {
+ if (emitter->output == emitter_output_table) {
+ va_list ap;
+ va_start(ap, format);
+ malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+ va_end(ap);
+ }
+}
+
+static inline void
+emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
+ emitter_justify_t justify, int width) {
+ size_t written;
+ if (justify == emitter_justify_none) {
+ written = malloc_snprintf(out_fmt, out_size,
+ "%%%s", fmt_specifier);
+ } else if (justify == emitter_justify_left) {
+ written = malloc_snprintf(out_fmt, out_size,
+ "%%-%d%s", width, fmt_specifier);
+ } else {
+ written = malloc_snprintf(out_fmt, out_size,
+ "%%%d%s", width, fmt_specifier);
+ }
+ /* Only happens in case of bad format string, which *we* choose. */
+ assert(written < out_size);
+}
+
+/*
+ * Internal. Emit the given value type in the relevant encoding (so that the
+ * bool true gets mapped to json "true", but the string "true" gets mapped to
+ * json "\"true\"", for instance.
+ *
+ * Width is ignored if justify is emitter_justify_none.
+ */
+static inline void
+emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
+ emitter_type_t value_type, const void *value) {
+ size_t str_written;
+#define BUF_SIZE 256
+#define FMT_SIZE 10
+ /*
+ * We dynamically generate a format string to emit, to let us use the
+ * snprintf machinery. This is kinda hacky, but gets the job done
+ * quickly without having to think about the various snprintf edge
+ * cases.
+ */
+ char fmt[FMT_SIZE];
+ char buf[BUF_SIZE];
+
+#define EMIT_SIMPLE(type, format) \
+ emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width); \
+ emitter_printf(emitter, fmt, *(const type *)value); \
+
+ switch (value_type) {
+ case emitter_type_bool:
+ emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
+ emitter_printf(emitter, fmt, *(const bool *)value ?
+ "true" : "false");
+ break;
+ case emitter_type_int:
+ EMIT_SIMPLE(int, "d")
+ break;
+ case emitter_type_unsigned:
+ EMIT_SIMPLE(unsigned, "u")
+ break;
+ case emitter_type_ssize:
+ EMIT_SIMPLE(ssize_t, "zd")
+ break;
+ case emitter_type_size:
+ EMIT_SIMPLE(size_t, "zu")
+ break;
+ case emitter_type_string:
+ str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
+ *(const char *const *)value);
+ /*
+ * We control the strings we output; we shouldn't get anything
+ * anywhere near the fmt size.
+ */
+ assert(str_written < BUF_SIZE);
+ emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
+ emitter_printf(emitter, fmt, buf);
+ break;
+ case emitter_type_uint32:
+ EMIT_SIMPLE(uint32_t, FMTu32)
+ break;
+ case emitter_type_uint64:
+ EMIT_SIMPLE(uint64_t, FMTu64)
+ break;
+ case emitter_type_title:
+ EMIT_SIMPLE(char *const, "s");
+ break;
+ default:
+ unreachable();
+ }
+#undef BUF_SIZE
+#undef FMT_SIZE
+}
+
+
+/* Internal functions. In json mode, tracks nesting state. */
+static inline void
+emitter_nest_inc(emitter_t *emitter) {
+ emitter->nesting_depth++;
+ emitter->item_at_depth = false;
+}
+
+static inline void
+emitter_nest_dec(emitter_t *emitter) {
+ emitter->nesting_depth--;
+ emitter->item_at_depth = true;
+}
+
+static inline void
+emitter_indent(emitter_t *emitter) {
+ int amount = emitter->nesting_depth;
+ const char *indent_str;
+ if (emitter->output == emitter_output_json) {
+ indent_str = "\t";
+ } else {
+ amount *= 2;
+ indent_str = " ";
+ }
+ for (int i = 0; i < amount; i++) {
+ emitter_printf(emitter, "%s", indent_str);
+ }
+}
+
+static inline void
+emitter_json_key_prefix(emitter_t *emitter) {
+ emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
+ emitter_indent(emitter);
+}
+
+static inline void
+emitter_begin(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth == 0);
+ emitter_printf(emitter, "{");
+ emitter_nest_inc(emitter);
+ } else {
+ // tabular init
+ emitter_printf(emitter, "%s", "");
+ }
+}
+
+static inline void
+emitter_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth == 1);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n}\n");
+ }
+}
+
+/*
+ * Note emits a different kv pair as well, but only in table mode. Omits the
+ * note if table_note_key is NULL.
+ */
+static inline void
+emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
+ emitter_type_t value_type, const void *value,
+ const char *table_note_key, emitter_type_t table_note_value_type,
+ const void *table_note_value) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth > 0);
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "\"%s\": ", json_key);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ value_type, value);
+ } else {
+ emitter_indent(emitter);
+ emitter_printf(emitter, "%s: ", table_key);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ value_type, value);
+ if (table_note_key != NULL) {
+ emitter_printf(emitter, " (%s: ", table_note_key);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ table_note_value_type, table_note_value);
+ emitter_printf(emitter, ")");
+ }
+ emitter_printf(emitter, "\n");
+ }
+ emitter->item_at_depth = true;
+}
+
+static inline void
+emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+ emitter_type_t value_type, const void *value) {
+ emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
+ emitter_type_bool, NULL);
+}
+
+static inline void
+emitter_json_kv(emitter_t *emitter, const char *json_key,
+ emitter_type_t value_type, const void *value) {
+ if (emitter->output == emitter_output_json) {
+ emitter_kv(emitter, json_key, NULL, value_type, value);
+ }
+}
+
+static inline void
+emitter_table_kv(emitter_t *emitter, const char *table_key,
+ emitter_type_t value_type, const void *value) {
+ if (emitter->output == emitter_output_table) {
+ emitter_kv(emitter, NULL, table_key, value_type, value);
+ }
+}
+
+static inline void
+emitter_dict_begin(emitter_t *emitter, const char *json_key,
+ const char *table_header) {
+ if (emitter->output == emitter_output_json) {
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "\"%s\": {", json_key);
+ emitter_nest_inc(emitter);
+ } else {
+ emitter_indent(emitter);
+ emitter_printf(emitter, "%s\n", table_header);
+ emitter_nest_inc(emitter);
+ }
+}
+
+static inline void
+emitter_dict_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth > 0);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ emitter_printf(emitter, "}");
+ } else {
+ emitter_nest_dec(emitter);
+ }
+}
+
+static inline void
+emitter_json_dict_begin(emitter_t *emitter, const char *json_key) {
+ if (emitter->output == emitter_output_json) {
+ emitter_dict_begin(emitter, json_key, NULL);
+ }
+}
+
+static inline void
+emitter_json_dict_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ emitter_dict_end(emitter);
+ }
+}
+
+static inline void
+emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
+ if (emitter->output == emitter_output_table) {
+ emitter_dict_begin(emitter, NULL, table_key);
+ }
+}
+
+static inline void
+emitter_table_dict_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_table) {
+ emitter_dict_end(emitter);
+ }
+}
+
+static inline void
+emitter_json_arr_begin(emitter_t *emitter, const char *json_key) {
+ if (emitter->output == emitter_output_json) {
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "\"%s\": [", json_key);
+ emitter_nest_inc(emitter);
+ }
+}
+
+static inline void
+emitter_json_arr_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth > 0);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ emitter_printf(emitter, "]");
+ }
+}
+
+static inline void
+emitter_json_arr_obj_begin(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ emitter_json_key_prefix(emitter);
+ emitter_printf(emitter, "{");
+ emitter_nest_inc(emitter);
+ }
+}
+
+static inline void
+emitter_json_arr_obj_end(emitter_t *emitter) {
+ if (emitter->output == emitter_output_json) {
+ assert(emitter->nesting_depth > 0);
+ emitter_nest_dec(emitter);
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ emitter_printf(emitter, "}");
+ }
+}
+
+static inline void
+emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type,
+ const void *value) {
+ if (emitter->output == emitter_output_json) {
+ emitter_json_key_prefix(emitter);
+ emitter_print_value(emitter, emitter_justify_none, -1,
+ value_type, value);
+ }
+}
+
+static inline void
+emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
+ if (emitter->output != emitter_output_table) {
+ return;
+ }
+ emitter_col_t *col;
+ ql_foreach(col, &row->cols, link) {
+ emitter_print_value(emitter, col->justify, col->width,
+ col->type, (const void *)&col->bool_val);
+ }
+ emitter_table_printf(emitter, "\n");
+}
+
+#endif /* JEMALLOC_INTERNAL_EMITTER_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h
deleted file mode 100644
index ba95ca816..000000000
--- a/deps/jemalloc/include/jemalloc/internal/extent.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct extent_node_s extent_node_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-/* Tree of extents. */
-struct extent_node_s {
- /* Linkage for the size/address-ordered tree. */
- rb_node(extent_node_t) link_szad;
-
- /* Linkage for the address-ordered tree. */
- rb_node(extent_node_t) link_ad;
-
- /* Profile counters, used for huge objects. */
- prof_ctx_t *prof_ctx;
-
- /* Pointer to the extent that this tree node is responsible for. */
- void *addr;
-
- /* Total region size. */
- size_t size;
-
- /* True if zero-filled; used by chunk recycling code. */
- bool zeroed;
-};
-typedef rb_tree(extent_node_t) extent_tree_t;
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
-
-rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_dss.h b/deps/jemalloc/include/jemalloc/internal/extent_dss.h
new file mode 100644
index 000000000..e8f02ce2a
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_dss.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_H
+
+typedef enum {
+ dss_prec_disabled = 0,
+ dss_prec_primary = 1,
+ dss_prec_secondary = 2,
+
+ dss_prec_limit = 3
+} dss_prec_t;
+#define DSS_PREC_DEFAULT dss_prec_secondary
+#define DSS_DEFAULT "secondary"
+
+extern const char *dss_prec_names[];
+
+extern const char *opt_dss;
+
+dss_prec_t extent_dss_prec_get(void);
+bool extent_dss_prec_set(dss_prec_t dss_prec);
+void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool *commit);
+bool extent_in_dss(void *addr);
+bool extent_dss_mergeable(void *addr_a, void *addr_b);
+void extent_dss_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_externs.h b/deps/jemalloc/include/jemalloc/internal/extent_externs.h
new file mode 100644
index 000000000..b8a4d026c
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_externs.h
@@ -0,0 +1,73 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+extern const extent_hooks_t extent_hooks_default;
+extern mutex_pool_t extent_mutex_pool;
+
+extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+
+extent_hooks_t *extent_hooks_get(arena_t *arena);
+extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
+ extent_hooks_t *extent_hooks);
+
+#ifdef JEMALLOC_JET
+size_t extent_size_quantize_floor(size_t size);
+size_t extent_size_quantize_ceil(size_t size);
+#endif
+
+rb_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
+bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+ bool delay_coalesce);
+extent_state_t extents_state_get(const extents_t *extents);
+size_t extents_npages_get(extents_t *extents);
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+ size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+ bool *zero, bool *commit);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
+void extents_prefork(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent);
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length);
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+ szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
+
+bool extent_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
new file mode 100644
index 000000000..77181df8d
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
@@ -0,0 +1,433 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
+#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
+
+static inline void
+extent_lock(tsdn_t *tsdn, extent_t *extent) {
+ assert(extent != NULL);
+ mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_unlock(tsdn_t *tsdn, extent_t *extent) {
+ assert(extent != NULL);
+ mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+ assert(extent1 != NULL && extent2 != NULL);
+ mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+ (uintptr_t)extent2);
+}
+
+static inline void
+extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+ assert(extent1 != NULL && extent2 != NULL);
+ mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+ (uintptr_t)extent2);
+}
+
+static inline arena_t *
+extent_arena_get(const extent_t *extent) {
+ unsigned arena_ind = (unsigned)((extent->e_bits &
+ EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+ /*
+ * The following check is omitted because we should never actually read
+ * a NULL arena pointer.
+ */
+ if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
+ return NULL;
+ }
+ assert(arena_ind < MALLOCX_ARENA_LIMIT);
+ return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
+}
+
+static inline szind_t
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+ szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+ EXTENT_BITS_SZIND_SHIFT);
+ assert(szind <= NSIZES);
+ return szind;
+}
+
+static inline szind_t
+extent_szind_get(const extent_t *extent) {
+ szind_t szind = extent_szind_get_maybe_invalid(extent);
+ assert(szind < NSIZES); /* Never call when "invalid". */
+ return szind;
+}
+
+static inline size_t
+extent_usize_get(const extent_t *extent) {
+ return sz_index2size(extent_szind_get(extent));
+}
+
+static inline size_t
+extent_sn_get(const extent_t *extent) {
+ return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+ EXTENT_BITS_SN_SHIFT);
+}
+
+static inline extent_state_t
+extent_state_get(const extent_t *extent) {
+ return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+ EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline bool
+extent_zeroed_get(const extent_t *extent) {
+ return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+ EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+extent_committed_get(const extent_t *extent) {
+ return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+ EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline bool
+extent_dumpable_get(const extent_t *extent) {
+ return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+ EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
+extent_slab_get(const extent_t *extent) {
+ return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+ EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+extent_nfree_get(const extent_t *extent) {
+ assert(extent_slab_get(extent));
+ return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+ EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+extent_base_get(const extent_t *extent) {
+ assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+ !extent_slab_get(extent));
+ return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+static inline void *
+extent_addr_get(const extent_t *extent) {
+ assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+ !extent_slab_get(extent));
+ return extent->e_addr;
+}
+
+static inline size_t
+extent_size_get(const extent_t *extent) {
+ return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+static inline size_t
+extent_esn_get(const extent_t *extent) {
+ return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+static inline size_t
+extent_bsize_get(const extent_t *extent) {
+ return extent->e_bsize;
+}
+
+static inline void *
+extent_before_get(const extent_t *extent) {
+ return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+}
+
+static inline void *
+extent_last_get(const extent_t *extent) {
+ return (void *)((uintptr_t)extent_base_get(extent) +
+ extent_size_get(extent) - PAGE);
+}
+
+static inline void *
+extent_past_get(const extent_t *extent) {
+ return (void *)((uintptr_t)extent_base_get(extent) +
+ extent_size_get(extent));
+}
+
+static inline arena_slab_data_t *
+extent_slab_data_get(extent_t *extent) {
+ assert(extent_slab_get(extent));
+ return &extent->e_slab_data;
+}
+
+static inline const arena_slab_data_t *
+extent_slab_data_get_const(const extent_t *extent) {
+ assert(extent_slab_get(extent));
+ return &extent->e_slab_data;
+}
+
+static inline prof_tctx_t *
+extent_prof_tctx_get(const extent_t *extent) {
+ return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
+ ATOMIC_ACQUIRE);
+}
+
+static inline void
+extent_arena_set(extent_t *extent, arena_t *arena) {
+ unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
+ MALLOCX_ARENA_BITS) - 1);
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+ ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+}
+
+static inline void
+extent_addr_set(extent_t *extent, void *addr) {
+ extent->e_addr = addr;
+}
+
+static inline void
+extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) {
+ assert(extent_base_get(extent) == extent_addr_get(extent));
+
+ if (alignment < PAGE) {
+ unsigned lg_range = LG_PAGE -
+ lg_floor(CACHELINE_CEILING(alignment));
+ size_t r;
+ if (!tsdn_null(tsdn)) {
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ r = (size_t)prng_lg_range_u64(
+ tsd_offset_statep_get(tsd), lg_range);
+ } else {
+ r = prng_lg_range_zu(
+ &extent_arena_get(extent)->offset_state,
+ lg_range, true);
+ }
+ uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+ lg_range);
+ extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+ random_offset);
+ assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+ extent->e_addr);
+ }
+}
+
+static inline void
+extent_size_set(extent_t *extent, size_t size) {
+ assert((size & ~EXTENT_SIZE_MASK) == 0);
+ extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+static inline void
+extent_esn_set(extent_t *extent, size_t esn) {
+ extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+ EXTENT_ESN_MASK);
+}
+
+static inline void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+ extent->e_bsize = bsize;
+}
+
+static inline void
+extent_szind_set(extent_t *extent, szind_t szind) {
+ assert(szind <= NSIZES); /* NSIZES means "invalid". */
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+ ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+}
+
+static inline void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+ assert(extent_slab_get(extent));
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+ ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_inc(extent_t *extent) {
+ assert(extent_slab_get(extent));
+ extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_dec(extent_t *extent) {
+ assert(extent_slab_get(extent));
+ extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_sn_set(extent_t *extent, size_t sn) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+ ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+}
+
+static inline void
+extent_state_set(extent_t *extent, extent_state_t state) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+ ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline void
+extent_zeroed_set(extent_t *extent, bool zeroed) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+ ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+extent_committed_set(extent_t *extent, bool committed) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+ ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+ ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
+extent_slab_set(extent_t *extent, bool slab) {
+ extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+ ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline void
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
+ atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
+ bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+ bool committed, bool dumpable) {
+ assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+ extent_arena_set(extent, arena);
+ extent_addr_set(extent, addr);
+ extent_size_set(extent, size);
+ extent_slab_set(extent, slab);
+ extent_szind_set(extent, szind);
+ extent_sn_set(extent, sn);
+ extent_state_set(extent, state);
+ extent_zeroed_set(extent, zeroed);
+ extent_committed_set(extent, committed);
+ extent_dumpable_set(extent, dumpable);
+ ql_elm_new(extent, ql_link);
+ if (config_prof) {
+ extent_prof_tctx_set(extent, NULL);
+ }
+}
+
+static inline void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+ extent_arena_set(extent, NULL);
+ extent_addr_set(extent, addr);
+ extent_bsize_set(extent, bsize);
+ extent_slab_set(extent, false);
+ extent_szind_set(extent, NSIZES);
+ extent_sn_set(extent, sn);
+ extent_state_set(extent, extent_state_active);
+ extent_zeroed_set(extent, true);
+ extent_committed_set(extent, true);
+ extent_dumpable_set(extent, true);
+}
+
+static inline void
+extent_list_init(extent_list_t *list) {
+ ql_new(list);
+}
+
+static inline extent_t *
+extent_list_first(const extent_list_t *list) {
+ return ql_first(list);
+}
+
+static inline extent_t *
+extent_list_last(const extent_list_t *list) {
+ return ql_last(list, ql_link);
+}
+
+static inline void
+extent_list_append(extent_list_t *list, extent_t *extent) {
+ ql_tail_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+ ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+ extent_t *to_insert) {
+ ql_after_insert(to_remove, to_insert, ql_link);
+ ql_remove(list, to_remove, ql_link);
+}
+
+static inline void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+ ql_remove(list, extent, ql_link);
+}
+
+static inline int
+extent_sn_comp(const extent_t *a, const extent_t *b) {
+ size_t a_sn = extent_sn_get(a);
+ size_t b_sn = extent_sn_get(b);
+
+ return (a_sn > b_sn) - (a_sn < b_sn);
+}
+
+static inline int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+ size_t a_esn = extent_esn_get(a);
+ size_t b_esn = extent_esn_get(b);
+
+ return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+extent_ad_comp(const extent_t *a, const extent_t *b) {
+ uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+ uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+ return (a_addr > b_addr) - (a_addr < b_addr);
+}
+
+static inline int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+ uintptr_t a_eaddr = (uintptr_t)a;
+ uintptr_t b_eaddr = (uintptr_t)b;
+
+ return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline int
+extent_snad_comp(const extent_t *a, const extent_t *b) {
+ int ret;
+
+ ret = extent_sn_comp(a, b);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = extent_ad_comp(a, b);
+ return ret;
+}
+
+static inline int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+ int ret;
+
+ ret = extent_esn_comp(a, b);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = extent_ead_comp(a, b);
+ return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_mmap.h b/deps/jemalloc/include/jemalloc/internal/extent_mmap.h
new file mode 100644
index 000000000..55f17ee48
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_mmap.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+
+extern bool opt_retain;
+
+void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+ bool *zero, bool *commit);
+bool extent_dalloc_mmap(void *addr, size_t size);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_structs.h b/deps/jemalloc/include/jemalloc/internal/extent_structs.h
new file mode 100644
index 000000000..4873b9e9e
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_structs.h
@@ -0,0 +1,219 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/size_classes.h"
+
+typedef enum {
+ extent_state_active = 0,
+ extent_state_dirty = 1,
+ extent_state_muzzy = 2,
+ extent_state_retained = 3
+} extent_state_t;
+
+/* Extent (span of pages). Use accessor functions for e_* fields. */
+struct extent_s {
+ /*
+ * Bitfield containing several fields:
+ *
+ * a: arena_ind
+ * b: slab
+ * c: committed
+ * d: dumpable
+ * z: zeroed
+ * t: state
+ * i: szind
+ * f: nfree
+ * n: sn
+ *
+ * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+ *
+ * arena_ind: Arena from which this extent came, or all 1 bits if
+ * unassociated.
+ *
+ * slab: The slab flag indicates whether the extent is used for a slab
+ * of small regions. This helps differentiate small size classes,
+ * and it indicates whether interior pointers can be looked up via
+ * iealloc().
+ *
+ * committed: The committed flag indicates whether physical memory is
+ * committed to the extent, whether explicitly or implicitly
+ * as on a system that overcommits and satisfies physical
+ * memory needs on demand via soft page faults.
+ *
+ * dumpable: The dumpable flag indicates whether or not we've set the
+ * memory in question to be dumpable. Note that this
+ * interacts somewhat subtly with user-specified extent hooks,
+ * since we don't know if *they* are fiddling with
+ * dumpability (in which case, we don't want to undo whatever
+ * they're doing). To deal with this scenario, we:
+ * - Make dumpable false only for memory allocated with the
+ * default hooks.
+ * - Only allow memory to go from non-dumpable to dumpable,
+ * and only once.
+ * - Never make the OS call to allow dumping when the
+ * dumpable bit is already set.
+ * These three constraints mean that we will never
+ * accidentally dump user memory that the user meant to set
+ * nondumpable with their extent hooks.
+ *
+ *
+ * zeroed: The zeroed flag is used by extent recycling code to track
+ * whether memory is zero-filled.
+ *
+ * state: The state flag is an extent_state_t.
+ *
+ * szind: The szind flag indicates usable size class index for
+ * allocations residing in this extent, regardless of whether the
+ * extent is a slab. Extent size and usable size often differ
+ * even for non-slabs, either due to sz_large_pad or promotion of
+ * sampled small regions.
+ *
+ * nfree: Number of free regions in slab.
+ *
+ * sn: Serial number (potentially non-unique).
+ *
+ * Serial numbers may wrap around if !opt_retain, but as long as
+ * comparison functions fall back on address comparison for equal
+ * serial numbers, stable (if imperfect) ordering is maintained.
+ *
+ * Serial numbers may not be unique even in the absence of
+ * wrap-around, e.g. when splitting an extent and assigning the same
+ * serial number to both resulting adjacent extents.
+ */
+ uint64_t e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT 0
+#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_WIDTH 1
+#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_WIDTH 1
+#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_DUMPABLE_WIDTH 1
+#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_WIDTH 1
+#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_WIDTH 2
+#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_WIDTH LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+
+ /* Pointer to the extent that this structure is responsible for. */
+ void *e_addr;
+
+ union {
+ /*
+ * Extent size and serial number associated with the extent
+ * structure (different than the serial number for the extent at
+ * e_addr).
+ *
+ * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+ */
+ size_t e_size_esn;
+ #define EXTENT_SIZE_MASK ((size_t)~(PAGE-1))
+ #define EXTENT_ESN_MASK ((size_t)PAGE-1)
+ /* Base extent size, which may not be a multiple of PAGE. */
+ size_t e_bsize;
+ };
+
+ /*
+ * List linkage, used by a variety of lists:
+ * - bin_t's slabs_full
+ * - extents_t's LRU
+ * - stashed dirty extents
+ * - arena's large allocations
+ */
+ ql_elm(extent_t) ql_link;
+
+ /*
+ * Linkage for per size class sn/address-ordered heaps, and
+ * for extent_avail
+ */
+ phn(extent_t) ph_link;
+
+ union {
+ /* Small region slab metadata. */
+ arena_slab_data_t e_slab_data;
+
+ /*
+ * Profile counters, used for large objects. Points to a
+ * prof_tctx_t.
+ */
+ atomic_p_t e_prof_tctx;
+ };
+};
+typedef ql_head(extent_t) extent_list_t;
+typedef ph(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
+
+/* Quantized collection of extents, with built-in LRU queue. */
+struct extents_s {
+ malloc_mutex_t mtx;
+
+ /*
+ * Quantized per size class heaps of extents.
+ *
+ * Synchronization: mtx.
+ */
+ extent_heap_t heaps[NPSIZES+1];
+
+ /*
+ * Bitmap for which set bits correspond to non-empty heaps.
+ *
+ * Synchronization: mtx.
+ */
+ bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)];
+
+ /*
+ * LRU of all extents in heaps.
+ *
+ * Synchronization: mtx.
+ */
+ extent_list_t lru;
+
+ /*
+ * Page sum for all extents in heaps.
+ *
+ * The synchronization here is a little tricky. Modifications to npages
+ * must hold mtx, but reads need not (though, a reader who sees npages
+ * without holding the mutex can't assume anything about the rest of the
+ * state of the extents_t).
+ */
+ atomic_zu_t npages;
+
+ /* All stored extents must be in the same state. */
+ extent_state_t state;
+
+ /*
+ * If true, delay coalescing until eviction; otherwise coalesce during
+ * deallocation.
+ */
+ bool delay_coalesce;
+};
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/extent_types.h b/deps/jemalloc/include/jemalloc/internal/extent_types.h
new file mode 100644
index 000000000..c0561d99f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/extent_types.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
+#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
+
+typedef struct extent_s extent_t;
+typedef struct extents_s extents_t;
+
+#define EXTENT_HOOKS_INITIALIZER NULL
+
+#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h
index c7183ede8..dcfc992df 100644
--- a/deps/jemalloc/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc/include/jemalloc/internal/hash.h
@@ -1,92 +1,76 @@
+#ifndef JEMALLOC_INTERNAL_HASH_H
+#define JEMALLOC_INTERNAL_HASH_H
+
+#include "jemalloc/internal/assert.h"
+
/*
* The following hash function is based on MurmurHash3, placed into the public
- * domain by Austin Appleby. See http://code.google.com/p/smhasher/ for
+ * domain by Austin Appleby. See https://github.com/aappleby/smhasher for
* details.
*/
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-uint32_t hash_x86_32(const void *key, int len, uint32_t seed);
-void hash_x86_128(const void *key, const int len, uint32_t seed,
- uint64_t r_out[2]);
-void hash_x64_128(const void *key, const int len, const uint32_t seed,
- uint64_t r_out[2]);
-void hash(const void *key, size_t len, const uint32_t seed,
- size_t r_hash[2]);
-#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
/******************************************************************************/
/* Internal implementation. */
-JEMALLOC_INLINE uint32_t
-hash_rotl_32(uint32_t x, int8_t r)
-{
-
- return (x << r) | (x >> (32 - r));
+static inline uint32_t
+hash_rotl_32(uint32_t x, int8_t r) {
+ return ((x << r) | (x >> (32 - r)));
}
-JEMALLOC_INLINE uint64_t
-hash_rotl_64(uint64_t x, int8_t r)
-{
- return (x << r) | (x >> (64 - r));
+static inline uint64_t
+hash_rotl_64(uint64_t x, int8_t r) {
+ return ((x << r) | (x >> (64 - r)));
}
-JEMALLOC_INLINE uint32_t
-hash_get_block_32(const uint32_t *p, int i)
-{
+static inline uint32_t
+hash_get_block_32(const uint32_t *p, int i) {
+ /* Handle unaligned read. */
+ if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+ uint32_t ret;
- return (p[i]);
+ memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
+ return ret;
+ }
+
+ return p[i];
}
-JEMALLOC_INLINE uint64_t
-hash_get_block_64(const uint64_t *p, int i)
-{
+static inline uint64_t
+hash_get_block_64(const uint64_t *p, int i) {
+ /* Handle unaligned read. */
+ if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+ uint64_t ret;
- return (p[i]);
-}
+ memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
+ return ret;
+ }
-JEMALLOC_INLINE uint32_t
-hash_fmix_32(uint32_t h)
-{
+ return p[i];
+}
+static inline uint32_t
+hash_fmix_32(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
- return (h);
+ return h;
}
-JEMALLOC_INLINE uint64_t
-hash_fmix_64(uint64_t k)
-{
-
+static inline uint64_t
+hash_fmix_64(uint64_t k) {
k ^= k >> 33;
- k *= QU(0xff51afd7ed558ccdLLU);
+ k *= KQU(0xff51afd7ed558ccd);
k ^= k >> 33;
- k *= QU(0xc4ceb9fe1a85ec53LLU);
+ k *= KQU(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
- return (k);
+ return k;
}
-JEMALLOC_INLINE uint32_t
-hash_x86_32(const void *key, int len, uint32_t seed)
-{
+static inline uint32_t
+hash_x86_32(const void *key, int len, uint32_t seed) {
const uint8_t *data = (const uint8_t *) key;
const int nblocks = len / 4;
@@ -132,13 +116,12 @@ hash_x86_32(const void *key, int len, uint32_t seed)
h1 = hash_fmix_32(h1);
- return (h1);
+ return h1;
}
-UNUSED JEMALLOC_INLINE void
+UNUSED static inline void
hash_x86_128(const void *key, const int len, uint32_t seed,
- uint64_t r_out[2])
-{
+ uint64_t r_out[2]) {
const uint8_t * data = (const uint8_t *) key;
const int nblocks = len / 16;
@@ -237,18 +220,17 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
r_out[1] = (((uint64_t) h4) << 32) | h3;
}
-UNUSED JEMALLOC_INLINE void
+UNUSED static inline void
hash_x64_128(const void *key, const int len, const uint32_t seed,
- uint64_t r_out[2])
-{
+ uint64_t r_out[2]) {
const uint8_t *data = (const uint8_t *) key;
const int nblocks = len / 16;
uint64_t h1 = seed;
uint64_t h2 = seed;
- const uint64_t c1 = QU(0x87c37b91114253d5LLU);
- const uint64_t c2 = QU(0x4cf5ad432745937fLLU);
+ const uint64_t c1 = KQU(0x87c37b91114253d5);
+ const uint64_t c2 = KQU(0x4cf5ad432745937f);
/* body */
{
@@ -278,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t k2 = 0;
switch (len & 15) {
- case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
- case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
- case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
- case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
- case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
- case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
+ case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
+ case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
+ case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
+ case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
+ case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
+ case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; /* falls through */
case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-
- case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
- case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
- case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
- case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
- case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
- case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
- case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
+ /* falls through */
+ case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
+ case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
+ case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
+ case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
+ case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
+ case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
+ case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; /* falls through */
case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
}
@@ -317,19 +299,20 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
/******************************************************************************/
/* API. */
-JEMALLOC_INLINE void
-hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
-{
+static inline void
+hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) {
+ assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
+
#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
- hash_x64_128(key, len, seed, (uint64_t *)r_hash);
+ hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash);
#else
- uint64_t hashes[2];
- hash_x86_128(key, len, seed, hashes);
- r_hash[0] = (size_t)hashes[0];
- r_hash[1] = (size_t)hashes[1];
+ {
+ uint64_t hashes[2];
+ hash_x86_128(key, (int)len, seed, hashes);
+ r_hash[0] = (size_t)hashes[0];
+ r_hash[1] = (size_t)hashes[1];
+ }
#endif
}
-#endif
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_HASH_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/hooks.h b/deps/jemalloc/include/jemalloc/internal/hooks.h
new file mode 100644
index 000000000..cd49afcb0
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/hooks.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_HOOKS_H
+#define JEMALLOC_INTERNAL_HOOKS_H
+
+extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)();
+extern JEMALLOC_EXPORT void (*hooks_libc_hook)();
+
+#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+
+#define open JEMALLOC_HOOK(open, hooks_libc_hook)
+#define read JEMALLOC_HOOK(read, hooks_libc_hook)
+#define write JEMALLOC_HOOK(write, hooks_libc_hook)
+#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook)
+#define close JEMALLOC_HOOK(close, hooks_libc_hook)
+#define creat JEMALLOC_HOOK(creat, hooks_libc_hook)
+#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook)
+/* Note that this is undef'd and re-define'd in src/prof.c. */
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
+
+#endif /* JEMALLOC_INTERNAL_HOOKS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h
deleted file mode 100644
index a2b9c7791..000000000
--- a/deps/jemalloc/include/jemalloc/internal/huge.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-/* Huge allocation statistics. */
-extern uint64_t huge_nmalloc;
-extern uint64_t huge_ndalloc;
-extern size_t huge_allocated;
-
-/* Protects chunk-related data structures. */
-extern malloc_mutex_t huge_mtx;
-
-void *huge_malloc(size_t size, bool zero, dss_prec_t dss_prec);
-void *huge_palloc(size_t size, size_t alignment, bool zero,
- dss_prec_t dss_prec);
-bool huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
- size_t extra);
-void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec);
-#ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(void *, size_t);
-extern huge_dalloc_junk_t *huge_dalloc_junk;
-#endif
-void huge_dalloc(void *ptr, bool unmap);
-size_t huge_salloc(const void *ptr);
-dss_prec_t huge_dss_prec_get(arena_t *arena);
-prof_ctx_t *huge_prof_ctx_get(const void *ptr);
-void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-bool huge_boot(void);
-void huge_prefork(void);
-void huge_postfork_parent(void);
-void huge_postfork_child(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
deleted file mode 100644
index 574bbb141..000000000
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ /dev/null
@@ -1,1028 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_H
-#define JEMALLOC_INTERNAL_H
-#include <math.h>
-#ifdef _WIN32
-# include <windows.h>
-# define ENOENT ERROR_PATH_NOT_FOUND
-# define EINVAL ERROR_BAD_ARGUMENTS
-# define EAGAIN ERROR_OUTOFMEMORY
-# define EPERM ERROR_WRITE_FAULT
-# define EFAULT ERROR_INVALID_ADDRESS
-# define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-# undef ERANGE
-# define ERANGE ERROR_INVALID_DATA
-#else
-# include <sys/param.h>
-# include <sys/mman.h>
-# include <sys/syscall.h>
-# if !defined(SYS_write) && defined(__NR_write)
-# define SYS_write __NR_write
-# endif
-# include <sys/uio.h>
-# include <pthread.h>
-# include <errno.h>
-#endif
-#include <sys/types.h>
-
-#include <limits.h>
-#ifndef SIZE_T_MAX
-# define SIZE_T_MAX SIZE_MAX
-#endif
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stddef.h>
-#ifndef offsetof
-# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
-#endif
-#include <inttypes.h>
-#include <string.h>
-#include <strings.h>
-#include <ctype.h>
-#ifdef _MSC_VER
-# include <io.h>
-typedef intptr_t ssize_t;
-# define PATH_MAX 1024
-# define STDERR_FILENO 2
-# define __func__ __FUNCTION__
-/* Disable warnings about deprecated system functions */
-# pragma warning(disable: 4996)
-#else
-# include <unistd.h>
-#endif
-#include <fcntl.h>
-
-#include "jemalloc_internal_defs.h"
-
-#ifdef JEMALLOC_UTRACE
-#include <sys/ktrace.h>
-#endif
-
-#ifdef JEMALLOC_VALGRIND
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-#endif
-
-#define JEMALLOC_NO_DEMANGLE
-#ifdef JEMALLOC_JET
-# define JEMALLOC_N(n) jet_##n
-# include "jemalloc/internal/public_namespace.h"
-# define JEMALLOC_NO_RENAME
-# include "../jemalloc@install_suffix@.h"
-# undef JEMALLOC_NO_RENAME
-#else
-# define JEMALLOC_N(n) @private_namespace@##n
-# include "../jemalloc@install_suffix@.h"
-#endif
-#include "jemalloc/internal/private_namespace.h"
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
- true
-#else
- false
-#endif
- ;
-static const bool config_dss =
-#ifdef JEMALLOC_DSS
- true
-#else
- false
-#endif
- ;
-static const bool config_fill =
-#ifdef JEMALLOC_FILL
- true
-#else
- false
-#endif
- ;
-static const bool config_lazy_lock =
-#ifdef JEMALLOC_LAZY_LOCK
- true
-#else
- false
-#endif
- ;
-static const bool config_prof =
-#ifdef JEMALLOC_PROF
- true
-#else
- false
-#endif
- ;
-static const bool config_prof_libgcc =
-#ifdef JEMALLOC_PROF_LIBGCC
- true
-#else
- false
-#endif
- ;
-static const bool config_prof_libunwind =
-#ifdef JEMALLOC_PROF_LIBUNWIND
- true
-#else
- false
-#endif
- ;
-static const bool config_mremap =
-#ifdef JEMALLOC_MREMAP
- true
-#else
- false
-#endif
- ;
-static const bool config_munmap =
-#ifdef JEMALLOC_MUNMAP
- true
-#else
- false
-#endif
- ;
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
- true
-#else
- false
-#endif
- ;
-static const bool config_tcache =
-#ifdef JEMALLOC_TCACHE
- true
-#else
- false
-#endif
- ;
-static const bool config_tls =
-#ifdef JEMALLOC_TLS
- true
-#else
- false
-#endif
- ;
-static const bool config_utrace =
-#ifdef JEMALLOC_UTRACE
- true
-#else
- false
-#endif
- ;
-static const bool config_valgrind =
-#ifdef JEMALLOC_VALGRIND
- true
-#else
- false
-#endif
- ;
-static const bool config_xmalloc =
-#ifdef JEMALLOC_XMALLOC
- true
-#else
- false
-#endif
- ;
-static const bool config_ivsalloc =
-#ifdef JEMALLOC_IVSALLOC
- true
-#else
- false
-#endif
- ;
-
-#ifdef JEMALLOC_ATOMIC9
-#include <machine/atomic.h>
-#endif
-
-#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
-#include <libkern/OSAtomic.h>
-#endif
-
-#ifdef JEMALLOC_ZONE
-#include <mach/mach_error.h>
-#include <mach/mach_init.h>
-#include <mach/vm_map.h>
-#include <malloc/malloc.h>
-#endif
-
-#define RB_COMPACT
-#include "jemalloc/internal/rb.h"
-#include "jemalloc/internal/qr.h"
-#include "jemalloc/internal/ql.h"
-
-/*
- * jemalloc can conceptually be broken into components (arena, tcache, etc.),
- * but there are circular dependencies that cannot be broken without
- * substantial performance degradation. In order to reduce the effect on
- * visual code flow, read the header files in multiple passes, with one of the
- * following cpp variables defined during each pass:
- *
- * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data
- * types.
- * JEMALLOC_H_STRUCTS : Data structures.
- * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
- * JEMALLOC_H_INLINES : Inline functions.
- */
-/******************************************************************************/
-#define JEMALLOC_H_TYPES
-
-#include "jemalloc/internal/jemalloc_internal_macros.h"
-
-#define MALLOCX_LG_ALIGN_MASK ((int)0x3f)
-#define ALLOCM_LG_ALIGN_MASK ((int)0x3f)
-
-/* Smallest size class to support. */
-#define LG_TINY_MIN 3
-#define TINY_MIN (1U << LG_TINY_MIN)
-
-/*
- * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size
- * classes).
- */
-#ifndef LG_QUANTUM
-# if (defined(__i386__) || defined(_M_IX86))
-# define LG_QUANTUM 4
-# endif
-# ifdef __ia64__
-# define LG_QUANTUM 4
-# endif
-# ifdef __alpha__
-# define LG_QUANTUM 4
-# endif
-# ifdef __sparc64__
-# define LG_QUANTUM 4
-# endif
-# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-# define LG_QUANTUM 4
-# endif
-# ifdef __arm__
-# define LG_QUANTUM 3
-# endif
-# ifdef __aarch64__
-# define LG_QUANTUM 4
-# endif
-# ifdef __hppa__
-# define LG_QUANTUM 4
-# endif
-# ifdef __mips__
-# define LG_QUANTUM 3
-# endif
-# ifdef __powerpc__
-# define LG_QUANTUM 4
-# endif
-# ifdef __s390__
-# define LG_QUANTUM 4
-# endif
-# ifdef __SH4__
-# define LG_QUANTUM 4
-# endif
-# ifdef __tile__
-# define LG_QUANTUM 4
-# endif
-# ifndef LG_QUANTUM
-# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS"
-# endif
-#endif
-
-#define QUANTUM ((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK (QUANTUM - 1)
-
-/* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a) \
- (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
-
-#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
-#define LONG_MASK (LONG - 1)
-
-/* Return the smallest long multiple that is >= a. */
-#define LONG_CEILING(a) \
- (((a) + LONG_MASK) & ~LONG_MASK)
-
-#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
-#define PTR_MASK (SIZEOF_PTR - 1)
-
-/* Return the smallest (void *) multiple that is >= a. */
-#define PTR_CEILING(a) \
- (((a) + PTR_MASK) & ~PTR_MASK)
-
-/*
- * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
- * In addition, this controls the spacing of cacheline-spaced size classes.
- *
- * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
- * only handle raw constants.
- */
-#define LG_CACHELINE 6
-#define CACHELINE 64
-#define CACHELINE_MASK (CACHELINE - 1)
-
-/* Return the smallest cacheline multiple that is >= s. */
-#define CACHELINE_CEILING(s) \
- (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
-
-/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */
-#ifdef PAGE_MASK
-# undef PAGE_MASK
-#endif
-#define LG_PAGE STATIC_PAGE_SHIFT
-#define PAGE ((size_t)(1U << STATIC_PAGE_SHIFT))
-#define PAGE_MASK ((size_t)(PAGE - 1))
-
-/* Return the smallest pagesize multiple that is >= s. */
-#define PAGE_CEILING(s) \
- (((s) + PAGE_MASK) & ~PAGE_MASK)
-
-/* Return the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2BASE(a, alignment) \
- ((void *)((uintptr_t)(a) & (-(alignment))))
-
-/* Return the offset between a and the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2OFFSET(a, alignment) \
- ((size_t)((uintptr_t)(a) & (alignment - 1)))
-
-/* Return the smallest alignment multiple that is >= s. */
-#define ALIGNMENT_CEILING(s, alignment) \
- (((s) + (alignment - 1)) & (-(alignment)))
-
-/* Declare a variable length array */
-#if __STDC_VERSION__ < 199901L
-# ifdef _MSC_VER
-# include <malloc.h>
-# define alloca _alloca
-# else
-# ifdef JEMALLOC_HAS_ALLOCA_H
-# include <alloca.h>
-# else
-# include <stdlib.h>
-# endif
-# endif
-# define VARIABLE_ARRAY(type, name, count) \
- type *name = alloca(sizeof(type) * count)
-#else
-# define VARIABLE_ARRAY(type, name, count) type name[count]
-#endif
-
-#ifdef JEMALLOC_VALGRIND
-/*
- * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
- * so that when Valgrind reports errors, there are no extra stack frames
- * in the backtraces.
- *
- * The size that is reported to valgrind must be consistent through a chain of
- * malloc..realloc..realloc calls. Request size isn't recorded anywhere in
- * jemalloc, so it is critical that all callers of these macros provide usize
- * rather than request size. As a result, buffer overflow detection is
- * technically weakened for the standard API, though it is generally accepted
- * practice to consider any extra bytes reported by malloc_usable_size() as
- * usable space.
- */
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do { \
- if (config_valgrind && opt_valgrind && cond) \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero); \
-} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do { \
- if (config_valgrind && opt_valgrind) { \
- size_t rzsize = p2rz(ptr); \
- \
- if (ptr == old_ptr) { \
- VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize, \
- usize, rzsize); \
- if (zero && old_usize < usize) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- old_usize), usize - old_usize); \
- } \
- } else { \
- if (old_ptr != NULL) { \
- VALGRIND_FREELIKE_BLOCK(old_ptr, \
- old_rzsize); \
- } \
- if (ptr != NULL) { \
- size_t copy_size = (old_usize < usize) \
- ? old_usize : usize; \
- size_t tail_size = usize - copy_size; \
- VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, \
- rzsize, false); \
- if (copy_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED(ptr, \
- copy_size); \
- } \
- if (zero && tail_size > 0) { \
- VALGRIND_MAKE_MEM_DEFINED( \
- (void *)((uintptr_t)ptr + \
- copy_size), tail_size); \
- } \
- } \
- } \
- } \
-} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do { \
- if (config_valgrind && opt_valgrind) \
- VALGRIND_FREELIKE_BLOCK(ptr, rzsize); \
-} while (0)
-#else
-#define RUNNING_ON_VALGRIND ((unsigned)0)
-#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
- do {} while (0)
-#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \
- do {} while (0)
-#define VALGRIND_FREELIKE_BLOCK(addr, rzB) do {} while (0)
-#define VALGRIND_MAKE_MEM_NOACCESS(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len) do {} while (0)
-#define JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize, \
- old_rzsize, zero) do {} while (0)
-#define JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
-#endif
-
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
-#include "jemalloc/internal/prof.h"
-
-#undef JEMALLOC_H_TYPES
-/******************************************************************************/
-#define JEMALLOC_H_STRUCTS
-
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
-#include "jemalloc/internal/prof.h"
-
-typedef struct {
- uint64_t allocated;
- uint64_t deallocated;
-} thread_allocated_t;
-/*
- * The JEMALLOC_ARG_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro
- * argument.
- */
-#define THREAD_ALLOCATED_INITIALIZER JEMALLOC_ARG_CONCAT({0, 0})
-
-#undef JEMALLOC_H_STRUCTS
-/******************************************************************************/
-#define JEMALLOC_H_EXTERNS
-
-extern bool opt_abort;
-extern bool opt_junk;
-extern size_t opt_quarantine;
-extern bool opt_redzone;
-extern bool opt_utrace;
-extern bool opt_valgrind;
-extern bool opt_xmalloc;
-extern bool opt_zero;
-extern size_t opt_narenas;
-
-/* Number of CPUs. */
-extern unsigned ncpus;
-
-/* Protects arenas initialization (arenas, arenas_total). */
-extern malloc_mutex_t arenas_lock;
-/*
- * Arenas that are used to service external requests. Not all elements of the
- * arenas array are necessarily used; arenas are created lazily as needed.
- *
- * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
- * arenas. arenas[narenas_auto..narenas_total) are only used if the application
- * takes some action to create them and allocate from them.
- */
-extern arena_t **arenas;
-extern unsigned narenas_total;
-extern unsigned narenas_auto; /* Read-only after initialization. */
-
-arena_t *arenas_extend(unsigned ind);
-void arenas_cleanup(void *arg);
-arena_t *choose_arena_hard(void);
-void jemalloc_prefork(void);
-void jemalloc_postfork_parent(void);
-void jemalloc_postfork_child(void);
-
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
-#include "jemalloc/internal/prof.h"
-
-#undef JEMALLOC_H_EXTERNS
-/******************************************************************************/
-#define JEMALLOC_H_INLINES
-
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/mb.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
-
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *)
-
-size_t s2u(size_t size);
-size_t sa2u(size_t size, size_t alignment);
-unsigned narenas_total_get(void);
-arena_t *choose_arena(arena_t *arena);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/*
- * Map of pthread_self() --> arenas[???], used for selecting an arena to use
- * for allocations.
- */
-malloc_tsd_externs(arenas, arena_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, arenas, arena_t *, NULL,
- arenas_cleanup)
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-s2u(size_t size)
-{
-
- if (size <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
- if (size <= arena_maxclass)
- return (PAGE_CEILING(size));
- return (CHUNK_CEILING(size));
-}
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size and alignment.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-sa2u(size_t size, size_t alignment)
-{
- size_t usize;
-
- assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
-
- /*
- * Round size up to the nearest multiple of alignment.
- *
- * This done, we can take advantage of the fact that for each small
- * size class, every object is aligned at the smallest power of two
- * that is non-zero in the base two representation of the size. For
- * example:
- *
- * Size | Base 2 | Minimum alignment
- * -----+----------+------------------
- * 96 | 1100000 | 32
- * 144 | 10100000 | 32
- * 192 | 11000000 | 64
- */
- usize = ALIGNMENT_CEILING(size, alignment);
- /*
- * (usize < size) protects against the combination of maximal
- * alignment and size greater than maximal alignment.
- */
- if (usize < size) {
- /* size_t overflow. */
- return (0);
- }
-
- if (usize <= arena_maxclass && alignment <= PAGE) {
- if (usize <= SMALL_MAXCLASS)
- return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
- return (PAGE_CEILING(usize));
- } else {
- size_t run_size;
-
- /*
- * We can't achieve subpage alignment, so round up alignment
- * permanently; it makes later calculations simpler.
- */
- alignment = PAGE_CEILING(alignment);
- usize = PAGE_CEILING(size);
- /*
- * (usize < size) protects against very large sizes within
- * PAGE of SIZE_T_MAX.
- *
- * (usize + alignment < usize) protects against the
- * combination of maximal alignment and usize large enough
- * to cause overflow. This is similar to the first overflow
- * check above, but it needs to be repeated due to the new
- * usize value, which may now be *equal* to maximal
- * alignment, whereas before we only detected overflow if the
- * original size was *greater* than maximal alignment.
- */
- if (usize < size || usize + alignment < usize) {
- /* size_t overflow. */
- return (0);
- }
-
- /*
- * Calculate the size of the over-size run that arena_palloc()
- * would need to allocate in order to guarantee the alignment.
- * If the run wouldn't fit within a chunk, round up to a huge
- * allocation size.
- */
- run_size = usize + alignment - PAGE;
- if (run_size <= arena_maxclass)
- return (PAGE_CEILING(usize));
- return (CHUNK_CEILING(usize));
- }
-}
-
-JEMALLOC_INLINE unsigned
-narenas_total_get(void)
-{
- unsigned narenas;
-
- malloc_mutex_lock(&arenas_lock);
- narenas = narenas_total;
- malloc_mutex_unlock(&arenas_lock);
-
- return (narenas);
-}
-
-/* Choose an arena based on a per-thread value. */
-JEMALLOC_INLINE arena_t *
-choose_arena(arena_t *arena)
-{
- arena_t *ret;
-
- if (arena != NULL)
- return (arena);
-
- if ((ret = *arenas_tsd_get()) == NULL) {
- ret = choose_arena_hard();
- assert(ret != NULL);
- }
-
- return (ret);
-}
-#endif
-
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/rtree.h"
-/*
- * Include arena.h twice in order to resolve circular dependencies with
- * tcache.h.
- */
-#define JEMALLOC_ARENA_INLINE_A
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_A
-#include "jemalloc/internal/tcache.h"
-#define JEMALLOC_ARENA_INLINE_B
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_B
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void *imalloct(size_t size, bool try_tcache, arena_t *arena);
-void *imalloc(size_t size);
-void *icalloct(size_t size, bool try_tcache, arena_t *arena);
-void *icalloc(size_t size);
-void *ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena);
-void *ipalloc(size_t usize, size_t alignment, bool zero);
-size_t isalloc(const void *ptr, bool demote);
-size_t ivsalloc(const void *ptr, bool demote);
-size_t u2rz(size_t usize);
-size_t p2rz(const void *ptr);
-void idalloct(void *ptr, bool try_tcache);
-void idalloc(void *ptr);
-void iqalloct(void *ptr, bool try_tcache);
-void iqalloc(void *ptr);
-void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena);
-void *iralloct(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena);
-void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-bool ixalloc(void *ptr, size_t size, size_t extra, size_t alignment,
- bool zero);
-malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t)
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_ALWAYS_INLINE void *
-imalloct(size_t size, bool try_tcache, arena_t *arena)
-{
-
- assert(size != 0);
-
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, false, try_tcache));
- else
- return (huge_malloc(size, false, huge_dss_prec_get(arena)));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-imalloc(size_t size)
-{
-
- return (imalloct(size, true, NULL));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(size_t size, bool try_tcache, arena_t *arena)
-{
-
- if (size <= arena_maxclass)
- return (arena_malloc(arena, size, true, try_tcache));
- else
- return (huge_malloc(size, true, huge_dss_prec_get(arena)));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(size_t size)
-{
-
- return (icalloct(size, true, NULL));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloct(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena)
-{
- void *ret;
-
- assert(usize != 0);
- assert(usize == sa2u(usize, alignment));
-
- if (usize <= arena_maxclass && alignment <= PAGE)
- ret = arena_malloc(arena, usize, zero, try_tcache);
- else {
- if (usize <= arena_maxclass) {
- ret = arena_palloc(choose_arena(arena), usize,
- alignment, zero);
- } else if (alignment <= chunksize)
- ret = huge_malloc(usize, zero, huge_dss_prec_get(arena));
- else
- ret = huge_palloc(usize, alignment, zero, huge_dss_prec_get(arena));
- }
-
- assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloc(size_t usize, size_t alignment, bool zero)
-{
-
- return (ipalloct(usize, alignment, zero, true, NULL));
-}
-
-/*
- * Typical usage:
- * void *ptr = [...]
- * size_t sz = isalloc(ptr, config_prof);
- */
-JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
-{
- size_t ret;
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
- /* Demotion only makes sense if config_prof is true. */
- assert(config_prof || demote == false);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- ret = arena_salloc(ptr, demote);
- else
- ret = huge_salloc(ptr);
-
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
-{
-
- /* Return 0 if ptr is not within a chunk managed by jemalloc. */
- if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == 0)
- return (0);
-
- return (isalloc(ptr, demote));
-}
-
-JEMALLOC_INLINE size_t
-u2rz(size_t usize)
-{
- size_t ret;
-
- if (usize <= SMALL_MAXCLASS) {
- size_t binind = SMALL_SIZE2BIN(usize);
- ret = arena_bin_info[binind].redzone_size;
- } else
- ret = 0;
-
- return (ret);
-}
-
-JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
-{
- size_t usize = isalloc(ptr, false);
-
- return (u2rz(usize));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloct(void *ptr, bool try_tcache)
-{
- arena_chunk_t *chunk;
-
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(chunk->arena, chunk, ptr, try_tcache);
- else
- huge_dalloc(ptr, true);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloc(void *ptr)
-{
-
- idalloct(ptr, true);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-iqalloct(void *ptr, bool try_tcache)
-{
-
- if (config_fill && opt_quarantine)
- quarantine(ptr);
- else
- idalloct(ptr, try_tcache);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-iqalloc(void *ptr)
-{
-
- iqalloct(ptr, true);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena)
-{
- void *p;
- size_t usize, copysize;
-
- usize = sa2u(size + extra, alignment);
- if (usize == 0)
- return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
- if (p == NULL) {
- if (extra == 0)
- return (NULL);
- /* Try again, without extra this time. */
- usize = sa2u(size, alignment);
- if (usize == 0)
- return (NULL);
- p = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
- if (p == NULL)
- return (NULL);
- }
- /*
- * Copy at most size bytes (not size+extra), since the caller has no
- * expectation that the extra bytes will be reliably preserved.
- */
- copysize = (size < oldsize) ? size : oldsize;
- memcpy(p, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
- return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloct(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
- bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena)
-{
- size_t oldsize;
-
- assert(ptr != NULL);
- assert(size != 0);
-
- oldsize = isalloc(ptr, config_prof);
-
- if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
- != 0) {
- /*
- * Existing object alignment is inadequate; allocate new space
- * and copy.
- */
- return (iralloct_realign(ptr, oldsize, size, extra, alignment,
- zero, try_tcache_alloc, try_tcache_dalloc, arena));
- }
-
- if (size + extra <= arena_maxclass) {
- return (arena_ralloc(arena, ptr, oldsize, size, extra,
- alignment, zero, try_tcache_alloc,
- try_tcache_dalloc));
- } else {
- return (huge_ralloc(ptr, oldsize, size, extra,
- alignment, zero, try_tcache_dalloc, huge_dss_prec_get(arena)));
- }
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
-{
-
- return (iralloct(ptr, size, extra, alignment, zero, true, true, NULL));
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-ixalloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero)
-{
- size_t oldsize;
-
- assert(ptr != NULL);
- assert(size != 0);
-
- oldsize = isalloc(ptr, config_prof);
- if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
- != 0) {
- /* Existing object alignment is inadequate. */
- return (true);
- }
-
- if (size <= arena_maxclass)
- return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
- else
- return (huge_ralloc_no_move(ptr, oldsize, size, extra));
-}
-
-malloc_tsd_externs(thread_allocated, thread_allocated_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, thread_allocated, thread_allocated_t,
- THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup)
-#endif
-
-#include "jemalloc/internal/prof.h"
-
-#undef JEMALLOC_H_INLINES
-/******************************************************************************/
-#endif /* JEMALLOC_INTERNAL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
new file mode 100644
index 000000000..be70df510
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -0,0 +1,91 @@
+#ifndef JEMALLOC_INTERNAL_DECLS_H
+#define JEMALLOC_INTERNAL_DECLS_H
+
+#include <math.h>
+#ifdef _WIN32
+# include <windows.h>
+# include "msvc_compat/windows_extra.h"
+# ifdef _WIN64
+# if LG_VADDR <= 32
+# error Generate the headers using x64 vcargs
+# endif
+# else
+# if LG_VADDR > 32
+# undef LG_VADDR
+# define LG_VADDR 32
+# endif
+# endif
+#else
+# include <sys/param.h>
+# include <sys/mman.h>
+# if !defined(__pnacl__) && !defined(__native_client__)
+# include <sys/syscall.h>
+# if !defined(SYS_write) && defined(__NR_write)
+# define SYS_write __NR_write
+# endif
+# if defined(SYS_open) && defined(__aarch64__)
+ /* Android headers may define SYS_open to __NR_open even though
+ * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
+# undef SYS_open
+# endif
+# include <sys/uio.h>
+# endif
+# include <pthread.h>
+# include <signal.h>
+# ifdef JEMALLOC_OS_UNFAIR_LOCK
+# include <os/lock.h>
+# endif
+# ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+# include <sched.h>
+# endif
+# include <errno.h>
+# include <sys/time.h>
+# include <time.h>
+# ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+# include <mach/mach_time.h>
+# endif
+#endif
+#include <sys/types.h>
+
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
+#ifndef SSIZE_MAX
+# define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1))
+#endif
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+# define offsetof(type, member) ((size_t)&(((type *)NULL)->member))
+#endif
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#ifdef _MSC_VER
+# include <io.h>
+typedef intptr_t ssize_t;
+# define PATH_MAX 1024
+# define STDERR_FILENO 2
+# define __func__ __FUNCTION__
+# ifdef JEMALLOC_HAS_RESTRICT
+# define restrict __restrict
+# endif
+/* Disable warnings about deprecated system functions. */
+# pragma warning(disable: 4996)
+#if _MSC_VER < 1800
+static int
+isblank(int c) {
+ return (c == '\t' || c == ' ');
+}
+#endif
+#else
+# include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#endif /* JEMALLOC_INTERNAL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c166fbd9e..8dad9a1db 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -1,5 +1,5 @@
#ifndef JEMALLOC_INTERNAL_DEFS_H_
-#define JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
@@ -9,6 +9,18 @@
#undef JEMALLOC_CPREFIX
/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+#undef JEMALLOC_OVERRIDE___LIBC_CALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_FREE
+#undef JEMALLOC_OVERRIDE___LIBC_MALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#undef JEMALLOC_OVERRIDE___LIBC_REALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+
+/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
@@ -21,21 +33,30 @@
* order to yield to another virtual CPU.
*/
#undef CPU_SPINWAIT
-
-/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
-#undef JEMALLOC_ATOMIC9
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#undef HAVE_CPU_SPINWAIT
/*
- * Defined if OSAtomic*() functions are available, as provided by Darwin, and
- * documented in the atomic(3) manual page.
+ * Number of significant bits in virtual addresses. This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
*/
-#undef JEMALLOC_OSATOMIC
+#undef LG_VADDR
+
+/* Defined if C11 atomics are available. */
+#undef JEMALLOC_C11_ATOMICS
+
+/* Defined if GCC __atomic atomics are available. */
+#undef JEMALLOC_GCC_ATOMIC_ATOMICS
+
+/* Defined if GCC __sync atomics are available. */
+#undef JEMALLOC_GCC_SYNC_ATOMICS
/*
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
* __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
@@ -43,16 +64,60 @@
* Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
* __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
- * functions are defined in libgcc instead of being inlines)
+ * functions are defined in libgcc instead of being inlines).
*/
#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
/*
+ * Defined if __builtin_clz() and __builtin_clzl() are available.
+ */
+#undef JEMALLOC_HAVE_BUILTIN_CLZ
+
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
+/*
* Defined if OSSpin*() functions are available, as provided by Darwin, and
* documented in the spinlock(3) manual page.
*/
#undef JEMALLOC_OSSPIN
+/* Defined if syscall(2) is usable. */
+#undef JEMALLOC_USE_SYSCALL
+
+/*
+ * Defined if secure_getenv(3) is available.
+ */
+#undef JEMALLOC_HAVE_SECURE_GETENV
+
+/*
+ * Defined if issetugid(2) is available.
+ */
+#undef JEMALLOC_HAVE_ISSETUGID
+
+/* Defined if pthread_atfork(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_ATFORK
+
+/* Defined if pthread_setname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
@@ -76,18 +141,9 @@
*/
#undef JEMALLOC_MUTEX_INIT_CB
-/* Defined if sbrk() is supported. */
-#undef JEMALLOC_HAVE_SBRK
-
/* Non-empty if the tls_model attribute is supported. */
#undef JEMALLOC_TLS_MODEL
-/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
-#undef JEMALLOC_CC_SILENCE
-
-/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */
-#undef JEMALLOC_CODE_COVERAGE
-
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
@@ -110,81 +166,140 @@
#undef JEMALLOC_PROF_GCC
/*
- * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
- * This makes it possible to allocate/deallocate objects without any locking
- * when the cache is in the steady state.
- */
-#undef JEMALLOC_TCACHE
-
-/*
- * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
#undef JEMALLOC_DSS
-/* Support memory filling (junk/zero/quarantine/redzone). */
+/* Support memory filling (junk/zero). */
#undef JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
#undef JEMALLOC_UTRACE
-/* Support Valgrind. */
-#undef JEMALLOC_VALGRIND
-
/* Support optional abort() on OOM. */
#undef JEMALLOC_XMALLOC
/* Support lazy locking (avoid locking unless a second thread is launched). */
#undef JEMALLOC_LAZY_LOCK
-/* One page is 2^STATIC_PAGE_SHIFT bytes. */
-#undef STATIC_PAGE_SHIFT
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#undef LG_QUANTUM
+
+/* One page is 2^LG_PAGE bytes. */
+#undef LG_PAGE
+
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#undef LG_HUGEPAGE
/*
- * If defined, use munmap() to unmap freed chunks, rather than storing them for
- * later reuse. This is disabled by default on Linux because common sequences
- * of mmap()/munmap() calls will cause virtual memory map holes.
+ * If defined, adjacent virtual memory mappings with identical attributes
+ * automatically coalesce, and they fragment when changes are made to subranges.
+ * This is the normal order of things for mmap()/munmap(), but on Windows
+ * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
+ * mappings do *not* coalesce/fragment.
*/
-#undef JEMALLOC_MUNMAP
+#undef JEMALLOC_MAPS_COALESCE
/*
- * If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). This is
- * disabled by default because it is Linux-specific and it will cause virtual
- * memory map holes, much like munmap(2) does.
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents. This is enabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
*/
-#undef JEMALLOC_MREMAP
+#undef JEMALLOC_RETAIN
/* TLS is used to map arenas and magazine caches to threads. */
#undef JEMALLOC_TLS
/*
- * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
- * within jemalloc-owned chunks before dereferencing them.
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
+/*
+ * ffs*() functions to use for bitmapping. Don't use these directly; instead,
+ * use ffs_*() from util.h.
*/
-#undef JEMALLOC_IVSALLOC
+#undef JEMALLOC_INTERNAL_FFSLL
+#undef JEMALLOC_INTERNAL_FFSL
+#undef JEMALLOC_INTERNAL_FFS
+
+/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#undef JEMALLOC_CACHE_OBLIVIOUS
+
+/*
+ * If defined, enable logging facilities. We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+#undef JEMALLOC_LOG
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
-#undef JEMALLOC_ZONE_VERSION
+
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ * /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
+/* Defined if madvise(2) is available. */
+#undef JEMALLOC_HAVE_MADVISE
+
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
/*
* Methods for purging unused pages differ between operating systems.
*
- * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ * madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ * will be discarded rather than swapped out.
+ * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ * defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
- * the address region is later touched.
- * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
- * unused, such that they will be discarded rather
- * than swapped out.
+ * the address region is later touched;
+ * otherwise this behaves similarly to
+ * MADV_FREE, though typically with higher
+ * system overhead.
*/
-#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_FREE
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+#undef JEMALLOC_DEFINE_MADVISE_FREE
/*
- * Define if operating system has alloca.h header.
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
+#undef JEMALLOC_MADVISE_DONTDUMP
+
+/*
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
+ */
+#undef JEMALLOC_THP
+
+/* Define if operating system has alloca.h header. */
#undef JEMALLOC_HAS_ALLOCA_H
/* C99 restrict keyword supported. */
@@ -199,7 +314,53 @@
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#undef LG_SIZEOF_LONG
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#undef LG_SIZEOF_LONG_LONG
+
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#undef LG_SIZEOF_INTMAX_T
+/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
+#undef JEMALLOC_GLIBC_MALLOC_HOOK
+
+/* glibc memalign hook. */
+#undef JEMALLOC_GLIBC_MEMALIGN_HOOK
+
+/* pthread support */
+#undef JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#undef JEMALLOC_HAVE_DLSYM
+
+/* Adaptive mutex support in pthreads. */
+#undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
+
+/* GNU specific sched_getcpu support */
+#undef JEMALLOC_HAVE_SCHED_GETCPU
+
+/* GNU specific sched_setaffinity support */
+#undef JEMALLOC_HAVE_SCHED_SETAFFINITY
+
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#undef JEMALLOC_BACKGROUND_THREAD
+
+/*
+ * If defined, jemalloc symbols are not exported (doesn't work when
+ * JEMALLOC_PREFIX is not defined).
+ */
+#undef JEMALLOC_EXPORT
+
+/* config.malloc_conf options string. */
+#undef JEMALLOC_CONFIG_MALLOC_CONF
+
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#undef JEMALLOC_IS_MALLOC
+
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
new file mode 100644
index 000000000..e10fb275d
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -0,0 +1,53 @@
+#ifndef JEMALLOC_INTERNAL_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTERNS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/* TSD checks this to set thread local slow state accordingly. */
+extern bool malloc_slow;
+
+/* Run-time options. */
+extern bool opt_abort;
+extern bool opt_abort_conf;
+extern const char *opt_junk;
+extern bool opt_junk_alloc;
+extern bool opt_junk_free;
+extern bool opt_utrace;
+extern bool opt_xmalloc;
+extern bool opt_zero;
+extern unsigned opt_narenas;
+
+/* Number of CPUs. */
+extern unsigned ncpus;
+
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned narenas_auto;
+
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern atomic_p_t arenas[];
+
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
+void arena_set(unsigned ind, arena_t *arena);
+unsigned narenas_total_get(void);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void iarena_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_tdata_cleanup(tsd_t *tsd);
+void jemalloc_prefork(void);
+void jemalloc_postfork_parent(void);
+void jemalloc_postfork_child(void);
+bool malloc_initialized(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h
new file mode 100644
index 000000000..437eaa407
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_INCLUDES_H
+#define JEMALLOC_INTERNAL_INCLUDES_H
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation.
+ *
+ * Historically, we dealt with this by each header into four sections (types,
+ * structs, externs, and inlines), and included each header file multiple times
+ * in this file, picking out the portion we want on each pass using the
+ * following #defines:
+ * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data
+ * types.
+ * JEMALLOC_H_STRUCTS : Data structures.
+ * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ * JEMALLOC_H_INLINES : Inline functions.
+ *
+ * We're moving toward a world in which the dependencies are explicit; each file
+ * will #include the headers it depends on (rather than relying on them being
+ * implicitly available via this file including every header file in the
+ * project).
+ *
+ * We're now in an intermediate state: we've broken up the header files to avoid
+ * having to include each one multiple times, but have not yet moved the
+ * dependency information into the header files (i.e. we still rely on the
+ * ordering in this file to ensure all a header's dependencies are available in
+ * its translation unit). Each component is now broken up into multiple header
+ * files, corresponding to the sections above (e.g. instead of "foo.h", we now
+ * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h").
+ *
+ * Those files which have been converted to explicitly include their
+ * inter-component dependencies are now in the initial HERMETIC HEADERS
+ * section. All headers may still rely on jemalloc_preamble.h (which, by fiat,
+ * must be included first in every translation unit) for system headers and
+ * global jemalloc definitions, however.
+ */
+
+/******************************************************************************/
+/* TYPES */
+/******************************************************************************/
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/base_types.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/prof_types.h"
+
+/******************************************************************************/
+/* STRUCTS */
+/******************************************************************************/
+
+#include "jemalloc/internal/arena_structs_a.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/prof_structs.h"
+#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/background_thread_structs.h"
+
+/******************************************************************************/
+/* EXTERNS */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/extent_externs.h"
+#include "jemalloc/internal/base_externs.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/large_externs.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/background_thread_externs.h"
+
+/******************************************************************************/
+/* INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
+#include "jemalloc/internal/base_inlines.h"
+/*
+ * Include portions of arena code interleaved with tcache code in order to
+ * resolve circular dependencies.
+ */
+#include "jemalloc/internal/prof_inlines_a.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
+#include "jemalloc/internal/tcache_inlines.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/background_thread_inlines.h"
+
+#endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
new file mode 100644
index 000000000..c6a1f7eb2
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -0,0 +1,172 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_A_H
+#define JEMALLOC_INTERNAL_INLINES_A_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/ticker.h"
+
+JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
+malloc_getcpu(void) {
+ assert(have_percpu_arena);
+#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+ return (malloc_cpuid_t)sched_getcpu();
+#else
+ not_reached();
+ return -1;
+#endif
+}
+
+/* Return the chosen arena index based on current cpu. */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_choose(void) {
+ assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
+ malloc_cpuid_t cpuid = malloc_getcpu();
+ assert(cpuid >= 0);
+
+ unsigned arena_ind;
+ if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
+ 2)) {
+ arena_ind = cpuid;
+ } else {
+ assert(opt_percpu_arena == per_phycpu_arena);
+ /* Hyper threads on the same physical CPU share arena. */
+ arena_ind = cpuid - ncpus / 2;
+ }
+
+ return arena_ind;
+}
+
+/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_ind_limit(percpu_arena_mode_t mode) {
+ assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode));
+ if (mode == per_phycpu_arena && ncpus > 1) {
+ if (ncpus % 2) {
+ /* This likely means a misconfig. */
+ return ncpus / 2 + 1;
+ }
+ return ncpus / 2;
+ } else {
+ return ncpus;
+ }
+}
+
+static inline arena_tdata_t *
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
+ arena_tdata_t *tdata;
+ arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+
+ if (unlikely(arenas_tdata == NULL)) {
+ /* arenas_tdata hasn't been initialized yet. */
+ return arena_tdata_get_hard(tsd, ind);
+ }
+ if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
+ /*
+ * ind is invalid, cache is old (too small), or tdata to be
+ * initialized.
+ */
+ return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
+ NULL);
+ }
+
+ tdata = &arenas_tdata[ind];
+ if (likely(tdata != NULL) || !refresh_if_missing) {
+ return tdata;
+ }
+ return arena_tdata_get_hard(tsd, ind);
+}
+
+static inline arena_t *
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
+ arena_t *ret;
+
+ assert(ind < MALLOCX_ARENA_LIMIT);
+
+ ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
+ if (unlikely(ret == NULL)) {
+ if (init_if_missing) {
+ ret = arena_init(tsdn, ind,
+ (extent_hooks_t *)&extent_hooks_default);
+ }
+ }
+ return ret;
+}
+
+static inline ticker_t *
+decay_ticker_get(tsd_t *tsd, unsigned ind) {
+ arena_tdata_t *tdata;
+
+ tdata = arena_tdata_get(tsd, ind, true);
+ if (unlikely(tdata == NULL)) {
+ return NULL;
+ }
+ return &tdata->decay_ticker;
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
+tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
+ assert(binind < NBINS);
+ return &tcache->bins_small[binind];
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
+tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
+ assert(binind >= NBINS &&binind < nhbins);
+ return &tcache->bins_large[binind - NBINS];
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_available(tsd_t *tsd) {
+ /*
+ * Thread specific auto tcache might be unavailable if: 1) during tcache
+ * initialization, or 2) disabled through thread.tcache.enabled mallctl
+ * or config options. This check covers all cases.
+ */
+ if (likely(tsd_tcache_enabled_get(tsd))) {
+ /* Associated arena == NULL implies tcache init in progress. */
+ assert(tsd_tcachep_get(tsd)->arena == NULL ||
+ tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
+ NULL);
+ return true;
+ }
+
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get(tsd_t *tsd) {
+ if (!tcache_available(tsd)) {
+ return NULL;
+ }
+
+ return tsd_tcachep_get(tsd);
+}
+
+static inline void
+pre_reentrancy(tsd_t *tsd, arena_t *arena) {
+ /* arena is the current context. Reentry from a0 is not allowed. */
+ assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
+
+ bool fast = tsd_fast(tsd);
+ assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
+ ++*tsd_reentrancy_levelp_get(tsd);
+ if (fast) {
+ /* Prepare slow path for reentrancy. */
+ tsd_slow_update(tsd);
+ assert(tsd->state == tsd_state_nominal_slow);
+ }
+}
+
+static inline void
+post_reentrancy(tsd_t *tsd) {
+ int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+ assert(*reentrancy_level > 0);
+ if (--*reentrancy_level == 0) {
+ tsd_slow_update(tsd);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
new file mode 100644
index 000000000..2e76e5d8f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -0,0 +1,86 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_B_H
+#define JEMALLOC_INTERNAL_INLINES_B_H
+
+#include "jemalloc/internal/rtree.h"
+
+/* Choose an arena based on a per-thread value. */
+static inline arena_t *
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
+ arena_t *ret;
+
+ if (arena != NULL) {
+ return arena;
+ }
+
+ /* During reentrancy, arena 0 is the safest bet. */
+ if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) {
+ return arena_get(tsd_tsdn(tsd), 0, true);
+ }
+
+ ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+ if (unlikely(ret == NULL)) {
+ ret = arena_choose_hard(tsd, internal);
+ assert(ret);
+ if (tcache_available(tsd)) {
+ tcache_t *tcache = tcache_get(tsd);
+ if (tcache->arena != NULL) {
+ /* See comments in tcache_data_init().*/
+ assert(tcache->arena ==
+ arena_get(tsd_tsdn(tsd), 0, false));
+ if (tcache->arena != ret) {
+ tcache_arena_reassociate(tsd_tsdn(tsd),
+ tcache, ret);
+ }
+ } else {
+ tcache_arena_associate(tsd_tsdn(tsd), tcache,
+ ret);
+ }
+ }
+ }
+
+ /*
+ * Note that for percpu arena, if the current arena is outside of the
+ * auto percpu arena range, (i.e. thread is assigned to a manually
+ * managed arena), then percpu arena is skipped.
+ */
+ if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
+ !internal && (arena_ind_get(ret) <
+ percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
+ tsd_tsdn(tsd))) {
+ unsigned ind = percpu_arena_choose();
+ if (arena_ind_get(ret) != ind) {
+ percpu_arena_update(tsd, ind);
+ ret = tsd_arena_get(tsd);
+ }
+ ret->last_thd = tsd_tsdn(tsd);
+ }
+
+ return ret;
+}
+
+static inline arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena) {
+ return arena_choose_impl(tsd, arena, false);
+}
+
+static inline arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena) {
+ return arena_choose_impl(tsd, arena, true);
+}
+
+static inline bool
+arena_is_auto(arena_t *arena) {
+ assert(narenas_auto > 0);
+ return (arena_ind_get(arena) < narenas_auto);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+iealloc(tsdn_t *tsdn, const void *ptr) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true);
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
new file mode 100644
index 000000000..290e5cf99
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -0,0 +1,246 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_C_H
+#define JEMALLOC_INTERNAL_INLINES_C_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/witness.h"
+
+/*
+ * Translating the names of the 'i' functions:
+ * Abbreviations used in the first part of the function name (before
+ * alloc/dalloc) describe what that function accomplishes:
+ * a: arena (query)
+ * s: size (query, or sized deallocation)
+ * e: extent (query)
+ * p: aligned (allocates)
+ * vs: size (query, without knowing that the pointer is into the heap)
+ * r: rallocx implementation
+ * x: xallocx implementation
+ * Abbreviations used in the second part of the function name (after
+ * alloc/dalloc) describe the arguments it takes
+ * z: whether to return zeroed memory
+ * t: accepts a tcache_t * parameter
+ * m: accepts an arena_t * parameter
+ */
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+iaalloc(tsdn_t *tsdn, const void *ptr) {
+ assert(ptr != NULL);
+
+ return arena_aalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+isalloc(tsdn_t *tsdn, const void *ptr) {
+ assert(ptr != NULL);
+
+ return arena_salloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+ bool is_internal, arena_t *arena, bool slow_path) {
+ void *ret;
+
+ assert(size != 0);
+ assert(!is_internal || tcache == NULL);
+ assert(!is_internal || arena == NULL || arena_is_auto(arena));
+ if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+ }
+
+ ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+ if (config_stats && is_internal && likely(ret != NULL)) {
+ arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+ }
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
+ return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
+ NULL, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, bool is_internal, arena_t *arena) {
+ void *ret;
+
+ assert(usize != 0);
+ assert(usize == sz_sa2u(usize, alignment));
+ assert(!is_internal || tcache == NULL);
+ assert(!is_internal || arena == NULL || arena_is_auto(arena));
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+ assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+ if (config_stats && is_internal && likely(ret != NULL)) {
+ arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+ }
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+ tcache_t *tcache, arena_t *arena) {
+ return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
+ return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+ tcache_get(tsd), false, NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+ivsalloc(tsdn_t *tsdn, const void *ptr) {
+ return arena_vsalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
+ bool is_internal, bool slow_path) {
+ assert(ptr != NULL);
+ assert(!is_internal || tcache == NULL);
+ assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+ if (config_stats && is_internal) {
+ arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
+ }
+ if (!is_internal && !tsdn_null(tsdn) &&
+ tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+ assert(tcache == NULL);
+ }
+ arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloc(tsd_t *tsd, void *ptr) {
+ idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+ alloc_ctx_t *alloc_ctx, bool slow_path) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+ arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+ size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+ arena_t *arena) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+ void *p;
+ size_t usize, copysize;
+
+ usize = sz_sa2u(size + extra, alignment);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ return NULL;
+ }
+ p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+ if (p == NULL) {
+ if (extra == 0) {
+ return NULL;
+ }
+ /* Try again, without extra this time. */
+ usize = sz_sa2u(size, alignment);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ return NULL;
+ }
+ p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+ if (p == NULL) {
+ return NULL;
+ }
+ }
+ /*
+ * Copy at most size bytes (not size+extra), since the caller has no
+ * expectation that the extra bytes will be reliably preserved.
+ */
+ copysize = (size < oldsize) ? size : oldsize;
+ memcpy(p, ptr, copysize);
+ isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
+ return p;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero, tcache_t *tcache, arena_t *arena) {
+ assert(ptr != NULL);
+ assert(size != 0);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+ != 0) {
+ /*
+ * Existing object alignment is inadequate; allocate new space
+ * and copy.
+ */
+ return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
+ zero, tcache, arena);
+ }
+
+ return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
+ tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+ bool zero) {
+ return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
+ tcache_get(tsd), NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+ size_t alignment, bool zero) {
+ assert(ptr != NULL);
+ assert(size != 0);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+ != 0) {
+ /* Existing object alignment is inadequate. */
+ return true;
+ }
+
+ return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
+}
+
+JEMALLOC_ALWAYS_INLINE int
+iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) {
+ int defrag = 0;
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ szind_t szind;
+ bool is_slab;
+ rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &szind, &is_slab);
+ if (likely(is_slab)) {
+ /* Small allocation. */
+ extent_t *slab = iealloc(tsdn, ptr);
+ arena_t *arena = extent_arena_get(slab);
+ szind_t binind = extent_szind_get(slab);
+ bin_t *bin = &arena->bins[binind];
+ malloc_mutex_lock(tsdn, &bin->lock);
+ /* don't bother moving allocations from the slab currently used for new allocations */
+ if (slab != bin->slabcur) {
+ const bin_info_t *bin_info = &bin_infos[binind];
+ size_t availregs = bin_info->nregs * bin->stats.curslabs;
+ *bin_util = ((long long)bin->stats.curregs<<16) / availregs;
+ *run_util = ((long long)(bin_info->nregs - extent_nfree_get(slab))<<16) / bin_info->nregs;
+ defrag = 1;
+ }
+ malloc_mutex_unlock(tsdn, &bin->lock);
+ }
+ return defrag;
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
index 4e2392302..ed75d3768 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -1,51 +1,43 @@
-/*
- * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for
- * functions that are static inline functions if inlining is enabled, and
- * single-definition library-private functions if inlining is disabled.
- *
- * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in
- * which case the denoted functions are always static, regardless of whether
- * inlining is enabled.
- */
-#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE)
- /* Disable inlining to make debugging/profiling easier. */
-# define JEMALLOC_ALWAYS_INLINE
-# define JEMALLOC_ALWAYS_INLINE_C static
-# define JEMALLOC_INLINE
-# define JEMALLOC_INLINE_C static
-# define inline
-#else
-# define JEMALLOC_ENABLE_INLINE
-# ifdef JEMALLOC_HAVE_ATTR
-# define JEMALLOC_ALWAYS_INLINE \
- static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline)
-# define JEMALLOC_ALWAYS_INLINE_C \
- static inline JEMALLOC_ATTR(always_inline)
-# else
-# define JEMALLOC_ALWAYS_INLINE static inline
-# define JEMALLOC_ALWAYS_INLINE_C static inline
-# endif
-# define JEMALLOC_INLINE static inline
-# define JEMALLOC_INLINE_C static inline
-# ifdef _MSC_VER
-# define inline _inline
-# endif
-#endif
+#ifndef JEMALLOC_INTERNAL_MACROS_H
+#define JEMALLOC_INTERNAL_MACROS_H
-#ifdef JEMALLOC_CC_SILENCE
-# define UNUSED JEMALLOC_ATTR(unused)
+#ifdef JEMALLOC_DEBUG
+# define JEMALLOC_ALWAYS_INLINE static inline
#else
-# define UNUSED
+# define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#endif
+#ifdef _MSC_VER
+# define inline _inline
#endif
-#define ZU(z) ((size_t)z)
-#define QU(q) ((uint64_t)q)
-#define QI(q) ((int64_t)q)
+#define UNUSED JEMALLOC_ATTR(unused)
+
+#define ZU(z) ((size_t)z)
+#define ZD(z) ((ssize_t)z)
+#define QU(q) ((uint64_t)q)
+#define QD(q) ((int64_t)q)
+
+#define KZU(z) ZU(z##ULL)
+#define KZD(z) ZD(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQD(q) QI(q##LL)
#ifndef __DECONST
# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
#endif
-#ifndef JEMALLOC_HAS_RESTRICT
+#if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
# define restrict
#endif
+
+/* Various function pointers are statick and immutable except during testing. */
+#ifdef JEMALLOC_JET
+# define JET_MUTABLE
+#else
+# define JET_MUTABLE const
+#endif
+
+#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
+#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
+
+#endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
new file mode 100644
index 000000000..1b750b122
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
@@ -0,0 +1,185 @@
+#ifndef JEMALLOC_INTERNAL_TYPES_H
+#define JEMALLOC_INTERNAL_TYPES_H
+
+/* Page size index type. */
+typedef unsigned pszind_t;
+
+/* Size class index type. */
+typedef unsigned szind_t;
+
+/* Processor / core id type. */
+typedef int malloc_cpuid_t;
+
+/*
+ * Flags bits:
+ *
+ * a: arena
+ * t: tcache
+ * 0: unused
+ * z: zero
+ * n: alignment
+ *
+ * aaaaaaaa aaaatttt tttttttt 0znnnnnn
+ */
+#define MALLOCX_ARENA_BITS 12
+#define MALLOCX_TCACHE_BITS 12
+#define MALLOCX_LG_ALIGN_BITS 6
+#define MALLOCX_ARENA_SHIFT 20
+#define MALLOCX_TCACHE_SHIFT 8
+#define MALLOCX_ARENA_MASK \
+ (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+/* NB: Arena index bias decreases the maximum number of arenas by 1. */
+#define MALLOCX_ARENA_LIMIT ((1 << MALLOCX_ARENA_BITS) - 1)
+#define MALLOCX_TCACHE_MASK \
+ (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
+#define MALLOCX_TCACHE_MAX ((1 << MALLOCX_TCACHE_BITS) - 3)
+#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags) \
+ (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags) \
+ (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
+#define MALLOCX_ZERO_GET(flags) \
+ ((bool)(flags & MALLOCX_ZERO))
+
+#define MALLOCX_TCACHE_GET(flags) \
+ (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
+#define MALLOCX_ARENA_GET(flags) \
+ (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+
+/* Smallest size class to support. */
+#define TINY_MIN (1U << LG_TINY_MIN)
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+# if (defined(__i386__) || defined(_M_IX86))
+# define LG_QUANTUM 4
+# endif
+# ifdef __ia64__
+# define LG_QUANTUM 4
+# endif
+# ifdef __alpha__
+# define LG_QUANTUM 4
+# endif
+# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
+# define LG_QUANTUM 4
+# endif
+# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+# define LG_QUANTUM 4
+# endif
+# ifdef __arm__
+# define LG_QUANTUM 3
+# endif
+# ifdef __aarch64__
+# define LG_QUANTUM 4
+# endif
+# ifdef __hppa__
+# define LG_QUANTUM 4
+# endif
+# ifdef __m68k__
+# define LG_QUANTUM 3
+# endif
+# ifdef __mips__
+# define LG_QUANTUM 3
+# endif
+# ifdef __nios2__
+# define LG_QUANTUM 3
+# endif
+# ifdef __or1k__
+# define LG_QUANTUM 3
+# endif
+# ifdef __powerpc__
+# define LG_QUANTUM 4
+# endif
+# if defined(__riscv) || defined(__riscv__)
+# define LG_QUANTUM 4
+# endif
+# ifdef __s390__
+# define LG_QUANTUM 4
+# endif
+# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+ defined(__SH4_SINGLE_ONLY__))
+# define LG_QUANTUM 4
+# endif
+# ifdef __tile__
+# define LG_QUANTUM 4
+# endif
+# ifdef __le32__
+# define LG_QUANTUM 4
+# endif
+# ifndef LG_QUANTUM
+# error "Unknown minimum alignment for architecture; specify via "
+ "--with-lg-quantum"
+# endif
+#endif
+
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a) \
+ (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK (LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define LONG_CEILING(a) \
+ (((a) + LONG_MASK) & ~LONG_MASK)
+
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+#define PTR_MASK (SIZEOF_PTR - 1)
+
+/* Return the smallest (void *) multiple that is >= a. */
+#define PTR_CEILING(a) \
+ (((a) + PTR_MASK) & ~PTR_MASK)
+
+/*
+ * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ *
+ * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
+ * only handle raw constants.
+ */
+#define LG_CACHELINE 6
+#define CACHELINE 64
+#define CACHELINE_MASK (CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define CACHELINE_CEILING(s) \
+ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/* Return the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2BASE(a, alignment) \
+ ((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+
+/* Return the offset between a and the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2OFFSET(a, alignment) \
+ ((size_t)((uintptr_t)(a) & (alignment - 1)))
+
+/* Return the smallest alignment multiple that is >= s. */
+#define ALIGNMENT_CEILING(s, alignment) \
+ (((s) + (alignment - 1)) & ((~(alignment)) + 1))
+
+/* Declare a variable-length array. */
+#if __STDC_VERSION__ < 199901L
+# ifdef _MSC_VER
+# include <malloc.h>
+# define alloca _alloca
+# else
+# ifdef JEMALLOC_HAS_ALLOCA_H
+# include <alloca.h>
+# else
+# include <stdlib.h>
+# endif
+# endif
+# define VARIABLE_ARRAY(type, name, count) \
+ type *name = alloca(sizeof(type) * (count))
+#else
+# define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#endif
+
+#endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
new file mode 100644
index 000000000..e621fbc85
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -0,0 +1,194 @@
+#ifndef JEMALLOC_PREAMBLE_H
+#define JEMALLOC_PREAMBLE_H
+
+#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
+
+#ifdef JEMALLOC_UTRACE
+#include <sys/ktrace.h>
+#endif
+
+#define JEMALLOC_NO_DEMANGLE
+#ifdef JEMALLOC_JET
+# undef JEMALLOC_IS_MALLOC
+# define JEMALLOC_N(n) jet_##n
+# include "jemalloc/internal/public_namespace.h"
+# define JEMALLOC_NO_RENAME
+# include "../jemalloc@install_suffix@.h"
+# undef JEMALLOC_NO_RENAME
+#else
+# define JEMALLOC_N(n) @private_namespace@##n
+# include "../jemalloc@install_suffix@.h"
+#endif
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#endif
+
+#include "jemalloc/internal/jemalloc_internal_macros.h"
+
+/*
+ * Note that the ordering matters here; the hook itself is name-mangled. We
+ * want the inclusion of hooks to happen early, so that we hook as much as
+ * possible.
+ */
+#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
+# ifndef JEMALLOC_JET
+# include "jemalloc/internal/private_namespace.h"
+# else
+# include "jemalloc/internal/private_namespace_jet.h"
+# endif
+#endif
+#include "jemalloc/internal/hooks.h"
+
+#ifdef JEMALLOC_DEFINE_MADVISE_FREE
+# define JEMALLOC_MADV_FREE 8
+#endif
+
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+ true
+#else
+ false
+#endif
+ ;
+static const bool have_dss =
+#ifdef JEMALLOC_DSS
+ true
+#else
+ false
+#endif
+ ;
+static const bool have_madvise_huge =
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_fill =
+#ifdef JEMALLOC_FILL
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_lazy_lock =
+#ifdef JEMALLOC_LAZY_LOCK
+ true
+#else
+ false
+#endif
+ ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
+static const bool config_prof =
+#ifdef JEMALLOC_PROF
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_prof_libgcc =
+#ifdef JEMALLOC_PROF_LIBGCC
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_prof_libunwind =
+#ifdef JEMALLOC_PROF_LIBUNWIND
+ true
+#else
+ false
+#endif
+ ;
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_stats =
+#ifdef JEMALLOC_STATS
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_tls =
+#ifdef JEMALLOC_TLS
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_utrace =
+#ifdef JEMALLOC_UTRACE
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_xmalloc =
+#ifdef JEMALLOC_XMALLOC
+ true
+#else
+ false
+#endif
+ ;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ true
+#else
+ false
+#endif
+ ;
+/*
+ * Undocumented, for jemalloc development use only at the moment. See the note
+ * in jemalloc/internal/log.h.
+ */
+static const bool config_log =
+#ifdef JEMALLOC_LOG
+ true
+#else
+ false
+#endif
+ ;
+#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/* Currently percpu_arena depends on sched_getcpu. */
+#define JEMALLOC_PERCPU_ARENA
+#endif
+static const bool have_percpu_arena =
+#ifdef JEMALLOC_PERCPU_ARENA
+ true
+#else
+ false
+#endif
+ ;
+/*
+ * Undocumented, and not recommended; the application should take full
+ * responsibility for tracking provenance.
+ */
+static const bool force_ivsalloc =
+#ifdef JEMALLOC_FORCE_IVSALLOC
+ true
+#else
+ false
+#endif
+ ;
+static const bool have_background_thread =
+#ifdef JEMALLOC_BACKGROUND_THREAD
+ true
+#else
+ false
+#endif
+ ;
+
+#endif /* JEMALLOC_PREAMBLE_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/large_externs.h b/deps/jemalloc/include/jemalloc/internal/large_externs.h
new file mode 100644
index 000000000..3f36282cd
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/large_externs.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+
+void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+ bool zero);
+bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+ size_t usize_max, bool zero);
+void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache);
+
+typedef void (large_dalloc_junk_t)(void *, size_t);
+extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
+
+typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
+extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
+
+void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc(tsdn_t *tsdn, extent_t *extent);
+size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
+prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
+void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+
+#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/log.h b/deps/jemalloc/include/jemalloc/internal/log.h
new file mode 100644
index 000000000..642085863
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/log.h
@@ -0,0 +1,115 @@
+#ifndef JEMALLOC_INTERNAL_LOG_H
+#define JEMALLOC_INTERNAL_LOG_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+
+#ifdef JEMALLOC_LOG
+# define JEMALLOC_LOG_VAR_BUFSIZE 1000
+#else
+# define JEMALLOC_LOG_VAR_BUFSIZE 1
+#endif
+
+#define JEMALLOC_LOG_BUFSIZE 4096
+
+/*
+ * The log malloc_conf option is a '|'-delimited list of log_var name segments
+ * which should be logged. The names are themselves hierarchical, with '.' as
+ * the delimiter (a "segment" is just a prefix in the log namespace). So, if
+ * you have:
+ *
+ * log("arena", "log msg for arena"); // 1
+ * log("arena.a", "log msg for arena.a"); // 2
+ * log("arena.b", "log msg for arena.b"); // 3
+ * log("arena.a.a", "log msg for arena.a.a"); // 4
+ * log("extent.a", "log msg for extent.a"); // 5
+ * log("extent.b", "log msg for extent.b"); // 6
+ *
+ * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * 6 will print at runtime. You can enable logging from all log vars by
+ * writing "log=.".
+ *
+ * None of this should be regarded as a stable API for right now. It's intended
+ * as a debugging interface, to let us keep around some of our printf-debugging
+ * statements.
+ */
+
+extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+extern atomic_b_t log_init_done;
+
+typedef struct log_var_s log_var_t;
+struct log_var_s {
+ /*
+ * Lowest bit is "inited", second lowest is "enabled". Putting them in
+ * a single word lets us avoid any fences on weak architectures.
+ */
+ atomic_u_t state;
+ const char *name;
+};
+
+#define LOG_NOT_INITIALIZED 0U
+#define LOG_INITIALIZED_NOT_ENABLED 1U
+#define LOG_ENABLED 2U
+
+#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+
+/*
+ * Returns the value we should assume for state (which is not necessarily
+ * accurate; if logging is done before logging has finished initializing, then
+ * we default to doing the safe thing by logging everything).
+ */
+unsigned log_var_update_state(log_var_t *log_var);
+
+/* We factor out the metadata management to allow us to test more easily. */
+#define log_do_begin(log_var) \
+if (config_log) { \
+ unsigned log_state = atomic_load_u(&(log_var).state, \
+ ATOMIC_RELAXED); \
+ if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \
+ log_state = log_var_update_state(&(log_var)); \
+ assert(log_state != LOG_NOT_INITIALIZED); \
+ } \
+ if (log_state == LOG_ENABLED) { \
+ {
+ /* User code executes here. */
+#define log_do_end(log_var) \
+ } \
+ } \
+}
+
+/*
+ * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
+ * preprocessing. To work around this, we take all potential extra arguments in
+ * a var-args functions. Since a varargs macro needs at least one argument in
+ * the "...", we accept the format string there, and require that the first
+ * argument in this "..." is a const char *.
+ */
+static inline void
+log_impl_varargs(const char *name, ...) {
+ char buf[JEMALLOC_LOG_BUFSIZE];
+ va_list ap;
+
+ va_start(ap, name);
+ const char *format = va_arg(ap, const char *);
+ size_t dst_offset = 0;
+ dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
+ dst_offset += malloc_vsnprintf(buf + dst_offset,
+ JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+ dst_offset += malloc_snprintf(buf + dst_offset,
+ JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+ va_end(ap);
+
+ malloc_write(buf);
+}
+
+/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
+#define LOG(log_var_str, ...) \
+do { \
+ static log_var_t log_var = LOG_VAR_INIT(log_var_str); \
+ log_do_begin(log_var) \
+ log_impl_varargs((log_var).name, __VA_ARGS__); \
+ log_do_end(log_var) \
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/malloc_io.h b/deps/jemalloc/include/jemalloc/internal/malloc_io.h
new file mode 100644
index 000000000..bfe556b52
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/malloc_io.h
@@ -0,0 +1,102 @@
+#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
+#define JEMALLOC_INTERNAL_MALLOC_IO_H
+
+#ifdef _WIN32
+# ifdef _WIN64
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX "ll"
+# else
+# define FMT64_PREFIX "ll"
+# define FMTPTR_PREFIX ""
+# endif
+# define FMTd32 "d"
+# define FMTu32 "u"
+# define FMTx32 "x"
+# define FMTd64 FMT64_PREFIX "d"
+# define FMTu64 FMT64_PREFIX "u"
+# define FMTx64 FMT64_PREFIX "x"
+# define FMTdPTR FMTPTR_PREFIX "d"
+# define FMTuPTR FMTPTR_PREFIX "u"
+# define FMTxPTR FMTPTR_PREFIX "x"
+#else
+# include <inttypes.h>
+# define FMTd32 PRId32
+# define FMTu32 PRIu32
+# define FMTx32 PRIx32
+# define FMTd64 PRId64
+# define FMTu64 PRIu64
+# define FMTx64 PRIx64
+# define FMTdPTR PRIdPTR
+# define FMTuPTR PRIuPTR
+# define FMTxPTR PRIxPTR
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define BUFERROR_BUF 64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define MALLOC_PRINTF_BUFSIZE 4096
+
+int buferror(int err, char *buf, size_t buflen);
+uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
+ int base);
+void malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+size_t malloc_vsnprintf(char *str, size_t size, const char *format,
+ va_list ap);
+size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
+ JEMALLOC_FORMAT_PRINTF(3, 4);
+/*
+ * The caller can set write_cb and cbopaque to null to choose to print with the
+ * je_malloc_message hook.
+ */
+void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *format, va_list ap);
+void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+
+static inline ssize_t
+malloc_write_fd(int fd, const void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+ /*
+ * Use syscall(2) rather than write(2) when possible in order to avoid
+ * the possibility of memory allocation within libc. This is necessary
+ * on FreeBSD; most operating systems do not have this problem though.
+ *
+ * syscall() returns long or int, depending on platform, so capture the
+ * result in the widest plausible type to avoid compiler warnings.
+ */
+ long result = syscall(SYS_write, fd, buf, count);
+#else
+ ssize_t result = (ssize_t)write(fd, buf,
+#ifdef _WIN32
+ (unsigned int)
+#endif
+ count);
+#endif
+ return (ssize_t)result;
+}
+
+static inline ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+ long result = syscall(SYS_read, fd, buf, count);
+#else
+ ssize_t result = read(fd, buf,
+#ifdef _WIN32
+ (unsigned int)
+#endif
+ count);
+#endif
+ return (ssize_t)result;
+}
+
+#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h
deleted file mode 100644
index 3cfa78729..000000000
--- a/deps/jemalloc/include/jemalloc/internal/mb.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void mb_write(void);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
-#ifdef __i386__
-/*
- * According to the Intel Architecture Software Developer's Manual, current
- * processors execute instructions in order from the perspective of other
- * processors in a multiprocessor system, but 1) Intel reserves the right to
- * change that, and 2) the compiler's optimizer could re-order instructions if
- * there weren't some form of barrier. Therefore, even if running on an
- * architecture that does not need memory barriers (everything through at least
- * i686), an "optimizer barrier" is necessary.
- */
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-# if 0
- /* This is a true memory barrier. */
- asm volatile ("pusha;"
- "xor %%eax,%%eax;"
- "cpuid;"
- "popa;"
- : /* Outputs. */
- : /* Inputs. */
- : "memory" /* Clobbers. */
- );
-#else
- /*
- * This is hopefully enough to keep the compiler from reordering
- * instructions around this one.
- */
- asm volatile ("nop;"
- : /* Outputs. */
- : /* Inputs. */
- : "memory" /* Clobbers. */
- );
-#endif
-}
-#elif (defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
- asm volatile ("sfence"
- : /* Outputs. */
- : /* Inputs. */
- : "memory" /* Clobbers. */
- );
-}
-#elif defined(__powerpc__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
- asm volatile ("eieio"
- : /* Outputs. */
- : /* Inputs. */
- : "memory" /* Clobbers. */
- );
-}
-#elif defined(__sparc64__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
- asm volatile ("membar #StoreStore"
- : /* Outputs. */
- : /* Inputs. */
- : "memory" /* Clobbers. */
- );
-}
-#elif defined(__tile__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
- __sync_synchronize();
-}
-#else
-/*
- * This is much slower than a simple memory barrier, but the semantics of mutex
- * unlock make this work.
- */
-JEMALLOC_INLINE void
-mb_write(void)
-{
- malloc_mutex_t mtx;
-
- malloc_mutex_init(&mtx);
- malloc_mutex_lock(&mtx);
- malloc_mutex_unlock(&mtx);
-}
-#endif
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h
index de44e1435..6520c2512 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex.h
@@ -1,45 +1,123 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_MUTEX_H
+#define JEMALLOC_INTERNAL_MUTEX_H
-typedef struct malloc_mutex_s malloc_mutex_t;
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+
+typedef enum {
+ /* Can only acquire one mutex of a given witness rank at a time. */
+ malloc_mutex_rank_exclusive,
+ /*
+ * Can acquire multiple mutexes of the same witness rank, but in
+ * address-ascending order only.
+ */
+ malloc_mutex_address_ordered
+} malloc_mutex_lock_order_t;
+typedef struct malloc_mutex_s malloc_mutex_t;
+struct malloc_mutex_s {
+ union {
+ struct {
+ /*
+ * prof_data is defined first to reduce cacheline
+ * bouncing: the data is not touched by the mutex holder
+ * during unlocking, while might be modified by
+ * contenders. Having it before the mutex itself could
+ * avoid prefetching a modified cacheline (for the
+ * unlocking thread).
+ */
+ mutex_prof_data_t prof_data;
#ifdef _WIN32
-# define MALLOC_MUTEX_INITIALIZER
+# if _WIN32_WINNT >= 0x0600
+ SRWLOCK lock;
+# else
+ CRITICAL_SECTION lock;
+# endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ os_unfair_lock lock;
#elif (defined(JEMALLOC_OSSPIN))
-# define MALLOC_MUTEX_INITIALIZER {0}
+ OSSpinLock lock;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+ pthread_mutex_t lock;
+ malloc_mutex_t *postponed_next;
#else
-# if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) && \
- defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
-# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-# define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+ pthread_mutex_t lock;
+#endif
+ };
+ /*
+ * We only touch witness when configured w/ debug. However we
+ * keep the field in a union when !debug so that we don't have
+ * to pollute the code base with #ifdefs, while avoid paying the
+ * memory cost.
+ */
+#if !defined(JEMALLOC_DEBUG)
+ witness_t witness;
+ malloc_mutex_lock_order_t lock_order;
+#endif
+ };
+
+#if defined(JEMALLOC_DEBUG)
+ witness_t witness;
+ malloc_mutex_lock_order_t lock_order;
+#endif
+};
+
+/*
+ * Based on benchmark results, a fixed spin with this amount of retries works
+ * well for our critical sections.
+ */
+#define MALLOC_MUTEX_MAX_SPIN 250
+
+#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) ReleaseSRWLockExclusive(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
# else
-# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-# define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+# define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
# endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#elif (defined(JEMALLOC_OSSPIN))
+# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
+#else
+# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
+# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
+# define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
#endif
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+#define LOCK_PROF_DATA_INITIALIZER \
+ {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, \
+ ATOMIC_INIT(0), 0, NULL, 0}
-struct malloc_mutex_s {
#ifdef _WIN32
- CRITICAL_SECTION lock;
+# define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLock lock;
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, 0}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
- pthread_mutex_t lock;
- malloc_mutex_t *postponed_next;
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#else
- pthread_mutex_t lock;
+# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+# define MALLOC_MUTEX_INITIALIZER \
+ {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
+ WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#endif
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
#ifdef JEMALLOC_LAZY_LOCK
extern bool isthreaded;
@@ -48,52 +126,123 @@ extern bool isthreaded;
# define isthreaded true
#endif
-bool malloc_mutex_init(malloc_mutex_t *mutex);
-void malloc_mutex_prefork(malloc_mutex_t *mutex);
-void malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool mutex_boot(void);
+bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+ witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
+void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool malloc_mutex_boot(void);
+void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
-#ifndef JEMALLOC_ENABLE_INLINE
-void malloc_mutex_lock(malloc_mutex_t *mutex);
-void malloc_mutex_unlock(malloc_mutex_t *mutex);
-#endif
+static inline void
+malloc_mutex_lock_final(malloc_mutex_t *mutex) {
+ MALLOC_MUTEX_LOCK(mutex);
+}
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
-JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
-{
+static inline bool
+malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
+ return MALLOC_MUTEX_TRYLOCK(mutex);
+}
+static inline void
+mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ if (config_stats) {
+ mutex_prof_data_t *data = &mutex->prof_data;
+ data->n_lock_ops++;
+ if (data->prev_owner != tsdn) {
+ data->prev_owner = tsdn;
+ data->n_owner_switches++;
+ }
+ }
+}
+
+/* Trylock: return false if the lock is successfully acquired. */
+static inline bool
+malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
-#ifdef _WIN32
- EnterCriticalSection(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLockLock(&mutex->lock);
-#else
- pthread_mutex_lock(&mutex->lock);
-#endif
+ if (malloc_mutex_trylock_final(mutex)) {
+ return true;
+ }
+ mutex_owner_stats_update(tsdn, mutex);
}
+ witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+
+ return false;
}
-JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
-{
+/* Aggregate lock prof data. */
+static inline void
+malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
+ nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
+ if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
+ nstime_copy(&sum->max_wait_time, &data->max_wait_time);
+ }
+
+ sum->n_wait_times += data->n_wait_times;
+ sum->n_spin_acquired += data->n_spin_acquired;
+
+ if (sum->max_n_thds < data->max_n_thds) {
+ sum->max_n_thds = data->max_n_thds;
+ }
+ uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
+ ATOMIC_RELAXED);
+ uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
+ &data->n_waiting_thds, ATOMIC_RELAXED);
+ atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
+ ATOMIC_RELAXED);
+ sum->n_owner_switches += data->n_owner_switches;
+ sum->n_lock_ops += data->n_lock_ops;
+}
+static inline void
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
-#ifdef _WIN32
- LeaveCriticalSection(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
- OSSpinLockUnlock(&mutex->lock);
-#else
- pthread_mutex_unlock(&mutex->lock);
-#endif
+ if (malloc_mutex_trylock_final(mutex)) {
+ malloc_mutex_lock_slow(mutex);
+ }
+ mutex_owner_stats_update(tsdn, mutex);
}
+ witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+static inline void
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+ if (isthreaded) {
+ MALLOC_MUTEX_UNLOCK(mutex);
+ }
+}
+
+static inline void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+static inline void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+/* Copy the prof data from mutex for processing. */
+static inline void
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
+ malloc_mutex_t *mutex) {
+ mutex_prof_data_t *source = &mutex->prof_data;
+ /* Can only read holding the mutex. */
+ malloc_mutex_assert_owner(tsdn, mutex);
+
+ /*
+ * Not *really* allowed (we shouldn't be doing non-atomic loads of
+ * atomic data), but the mutex protection makes this safe, and writing
+ * a member-for-member copy is tedious for this situation.
+ */
+ *data = *source;
+ /* n_wait_thds is not reported (modified w/o locking). */
+ atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
}
-#endif
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_pool.h b/deps/jemalloc/include/jemalloc/internal/mutex_pool.h
new file mode 100644
index 000000000..726cece90
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/mutex_pool.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
+#define JEMALLOC_INTERNAL_MUTEX_POOL_H
+
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/witness.h"
+
+/* We do mod reductions by this value, so it should be kept a power of 2. */
+#define MUTEX_POOL_SIZE 256
+
+typedef struct mutex_pool_s mutex_pool_t;
+struct mutex_pool_s {
+ malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
+};
+
+bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
+
+/* Internal helper - not meant to be called outside this module. */
+static inline malloc_mutex_t *
+mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
+ size_t hash_result[2];
+ hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
+ return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
+}
+
+static inline void
+mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
+ for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
+ malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
+ }
+}
+
+/*
+ * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
+ * You're not allowed to acquire mutexes in the pool one at a time. You have to
+ * acquire all the mutexes you'll need in a single function call, and then
+ * release them all in a single function call.
+ */
+
+static inline void
+mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+ mutex_pool_assert_not_held(tsdn, pool);
+
+ malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+ malloc_mutex_lock(tsdn, mutex);
+}
+
+static inline void
+mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+ malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+ malloc_mutex_unlock(tsdn, mutex);
+
+ mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+ uintptr_t key2) {
+ mutex_pool_assert_not_held(tsdn, pool);
+
+ malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+ malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+ if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
+ malloc_mutex_lock(tsdn, mutex1);
+ malloc_mutex_lock(tsdn, mutex2);
+ } else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
+ malloc_mutex_lock(tsdn, mutex1);
+ } else {
+ malloc_mutex_lock(tsdn, mutex2);
+ malloc_mutex_lock(tsdn, mutex1);
+ }
+}
+
+static inline void
+mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+ uintptr_t key2) {
+ malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+ malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+ if (mutex1 == mutex2) {
+ malloc_mutex_unlock(tsdn, mutex1);
+ } else {
+ malloc_mutex_unlock(tsdn, mutex1);
+ malloc_mutex_unlock(tsdn, mutex2);
+ }
+
+ mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+ malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
+}
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
new file mode 100644
index 000000000..ce183d335
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
@@ -0,0 +1,99 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
+#define JEMALLOC_INTERNAL_MUTEX_PROF_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/tsd_types.h"
+
+#define MUTEX_PROF_GLOBAL_MUTEXES \
+ OP(background_thread) \
+ OP(ctl) \
+ OP(prof)
+
+typedef enum {
+#define OP(mtx) global_prof_mutex_##mtx,
+ MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+ mutex_prof_num_global_mutexes
+} mutex_prof_global_ind_t;
+
+#define MUTEX_PROF_ARENA_MUTEXES \
+ OP(large) \
+ OP(extent_avail) \
+ OP(extents_dirty) \
+ OP(extents_muzzy) \
+ OP(extents_retained) \
+ OP(decay_dirty) \
+ OP(decay_muzzy) \
+ OP(base) \
+ OP(tcache_list)
+
+typedef enum {
+#define OP(mtx) arena_prof_mutex_##mtx,
+ MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+ mutex_prof_num_arena_mutexes
+} mutex_prof_arena_ind_t;
+
+#define MUTEX_PROF_UINT64_COUNTERS \
+ OP(num_ops, uint64_t, "n_lock_ops") \
+ OP(num_wait, uint64_t, "n_waiting") \
+ OP(num_spin_acq, uint64_t, "n_spin_acq") \
+ OP(num_owner_switch, uint64_t, "n_owner_switch") \
+ OP(total_wait_time, uint64_t, "total_wait_ns") \
+ OP(max_wait_time, uint64_t, "max_wait_ns")
+
+#define MUTEX_PROF_UINT32_COUNTERS \
+ OP(max_num_thds, uint32_t, "max_n_thds")
+
+#define MUTEX_PROF_COUNTERS \
+ MUTEX_PROF_UINT64_COUNTERS \
+ MUTEX_PROF_UINT32_COUNTERS
+
+#define OP(counter, type, human) mutex_counter_##counter,
+
+#define COUNTER_ENUM(counter_list, t) \
+ typedef enum { \
+ counter_list \
+ mutex_prof_num_##t##_counters \
+ } mutex_prof_##t##_counter_ind_t;
+
+COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
+COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
+
+#undef COUNTER_ENUM
+#undef OP
+
+typedef struct {
+ /*
+ * Counters touched on the slow path, i.e. when there is lock
+ * contention. We update them once we have the lock.
+ */
+ /* Total time (in nano seconds) spent waiting on this mutex. */
+ nstime_t tot_wait_time;
+ /* Max time (in nano seconds) spent on a single lock operation. */
+ nstime_t max_wait_time;
+ /* # of times have to wait for this mutex (after spinning). */
+ uint64_t n_wait_times;
+ /* # of times acquired the mutex through local spinning. */
+ uint64_t n_spin_acquired;
+ /* Max # of threads waiting for the mutex at the same time. */
+ uint32_t max_n_thds;
+ /* Current # of threads waiting on the lock. Atomic synced. */
+ atomic_u32_t n_waiting_thds;
+
+ /*
+ * Data touched on the fast path. These are modified right after we
+ * grab the lock, so it's placed closest to the end (i.e. right before
+ * the lock) so that we have a higher chance of them being on the same
+ * cacheline.
+ */
+ /* # of times the mutex holder is different than the previous one. */
+ uint64_t n_owner_switches;
+ /* Previous mutex holder, to facilitate n_owner_switches. */
+ tsdn_t *prev_owner;
+ /* # of lock() operations in total. */
+ uint64_t n_lock_ops;
+} mutex_prof_data_t;
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/nstime.h b/deps/jemalloc/include/jemalloc/internal/nstime.h
new file mode 100644
index 000000000..17c177c7f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/nstime.h
@@ -0,0 +1,34 @@
+#ifndef JEMALLOC_INTERNAL_NSTIME_H
+#define JEMALLOC_INTERNAL_NSTIME_H
+
+/* Maximum supported number of seconds (~584 years). */
+#define NSTIME_SEC_MAX KQU(18446744072)
+#define NSTIME_ZERO_INITIALIZER {0}
+
+typedef struct {
+ uint64_t ns;
+} nstime_t;
+
+void nstime_init(nstime_t *time, uint64_t ns);
+void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_sec(const nstime_t *time);
+uint64_t nstime_msec(const nstime_t *time);
+uint64_t nstime_nsec(const nstime_t *time);
+void nstime_copy(nstime_t *time, const nstime_t *source);
+int nstime_compare(const nstime_t *a, const nstime_t *b);
+void nstime_add(nstime_t *time, const nstime_t *addend);
+void nstime_iadd(nstime_t *time, uint64_t addend);
+void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
+void nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void nstime_idivide(nstime_t *time, uint64_t divisor);
+uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
+
+typedef bool (nstime_update_t)(nstime_t *);
+extern nstime_update_t *JET_MUTABLE nstime_update;
+
+#endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/pages.h b/deps/jemalloc/include/jemalloc/internal/pages.h
new file mode 100644
index 000000000..7dae633af
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/pages.h
@@ -0,0 +1,88 @@
+#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+
+/* Page size. LG_PAGE is determined by the configure script. */
+#ifdef PAGE_MASK
+# undef PAGE_MASK
+#endif
+#define PAGE ((size_t)(1U << LG_PAGE))
+#define PAGE_MASK ((size_t)(PAGE - 1))
+/* Return the page base address for the page containing address a. */
+#define PAGE_ADDR2BASE(a) \
+ ((void *)((uintptr_t)(a) & ~PAGE_MASK))
+/* Return the smallest pagesize multiple that is >= s. */
+#define PAGE_CEILING(s) \
+ (((s) + PAGE_MASK) & ~PAGE_MASK)
+
+/* Huge page size. LG_HUGEPAGE is determined by the configure script. */
+#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
+/* Return the huge page base address for the huge page containing address a. */
+#define HUGEPAGE_ADDR2BASE(a) \
+ ((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+/* Return the smallest pagesize multiple that is >= s. */
+#define HUGEPAGE_CEILING(s) \
+ (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+
+/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
+#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
+# define PAGES_CAN_PURGE_LAZY
+#endif
+/*
+ * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
+ *
+ * The only supported way to hard-purge on Windows is to decommit and then
+ * re-commit, but doing so is racy, and if re-commit fails it's a pain to
+ * propagate the "poisoned" memory state. Since we typically decommit as the
+ * next step after purging on Windows anyway, there's no point in adding such
+ * complexity.
+ */
+#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+ defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
+ defined(JEMALLOC_MAPS_COALESCE))
+# define PAGES_CAN_PURGE_FORCED
+#endif
+
+static const bool pages_can_purge_lazy =
+#ifdef PAGES_CAN_PURGE_LAZY
+ true
+#else
+ false
+#endif
+ ;
+static const bool pages_can_purge_forced =
+#ifdef PAGES_CAN_PURGE_FORCED
+ true
+#else
+ false
+#endif
+ ;
+
+typedef enum {
+ thp_mode_default = 0, /* Do not change hugepage settings. */
+ thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */
+ thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */
+
+ thp_mode_names_limit = 3, /* Used for option processing. */
+ thp_mode_not_supported = 3 /* No THP support detected. */
+} thp_mode_t;
+
+#define THP_MODE_DEFAULT thp_mode_default
+extern thp_mode_t opt_thp;
+extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
+extern const char *thp_mode_names[];
+
+void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
+void pages_unmap(void *addr, size_t size);
+bool pages_commit(void *addr, size_t size);
+bool pages_decommit(void *addr, size_t size);
+bool pages_purge_lazy(void *addr, size_t size);
+bool pages_purge_forced(void *addr, size_t size);
+bool pages_huge(void *addr, size_t size);
+bool pages_nohuge(void *addr, size_t size);
+bool pages_dontdump(void *addr, size_t size);
+bool pages_dodump(void *addr, size_t size);
+bool pages_boot(void);
+void pages_set_thp_state (void *ptr, size_t size);
+
+#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/ph.h b/deps/jemalloc/include/jemalloc/internal/ph.h
new file mode 100644
index 000000000..84d6778a9
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/ph.h
@@ -0,0 +1,391 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary twopass list, described in a follow on paper.
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ *******************************************************************************
+ */
+
+#ifndef PH_H_
+#define PH_H_
+
+/* Node structure. */
+#define phn(a_type) \
+struct { \
+ a_type *phn_prev; \
+ a_type *phn_next; \
+ a_type *phn_lchild; \
+}
+
+/* Root structure. */
+#define ph(a_type) \
+struct { \
+ a_type *ph_root; \
+}
+
+/* Internal utility macros. */
+#define phn_lchild_get(a_type, a_field, a_phn) \
+ (a_phn->a_field.phn_lchild)
+#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \
+ a_phn->a_field.phn_lchild = a_lchild; \
+} while (0)
+
+#define phn_next_get(a_type, a_field, a_phn) \
+ (a_phn->a_field.phn_next)
+#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \
+ a_phn->a_field.phn_prev = a_prev; \
+} while (0)
+
+#define phn_prev_get(a_type, a_field, a_phn) \
+ (a_phn->a_field.phn_prev)
+#define phn_next_set(a_type, a_field, a_phn, a_next) do { \
+ a_phn->a_field.phn_next = a_next; \
+} while (0)
+
+#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \
+ a_type *phn0child; \
+ \
+ assert(a_phn0 != NULL); \
+ assert(a_phn1 != NULL); \
+ assert(a_cmp(a_phn0, a_phn1) <= 0); \
+ \
+ phn_prev_set(a_type, a_field, a_phn1, a_phn0); \
+ phn0child = phn_lchild_get(a_type, a_field, a_phn0); \
+ phn_next_set(a_type, a_field, a_phn1, phn0child); \
+ if (phn0child != NULL) { \
+ phn_prev_set(a_type, a_field, phn0child, a_phn1); \
+ } \
+ phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \
+} while (0)
+
+#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \
+ if (a_phn0 == NULL) { \
+ r_phn = a_phn1; \
+ } else if (a_phn1 == NULL) { \
+ r_phn = a_phn0; \
+ } else if (a_cmp(a_phn0, a_phn1) < 0) { \
+ phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \
+ a_cmp); \
+ r_phn = a_phn0; \
+ } else { \
+ phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \
+ a_cmp); \
+ r_phn = a_phn1; \
+ } \
+} while (0)
+
+#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \
+ a_type *head = NULL; \
+ a_type *tail = NULL; \
+ a_type *phn0 = a_phn; \
+ a_type *phn1 = phn_next_get(a_type, a_field, phn0); \
+ \
+ /* \
+ * Multipass merge, wherein the first two elements of a FIFO \
+ * are repeatedly merged, and each result is appended to the \
+ * singly linked FIFO, until the FIFO contains only a single \
+ * element. We start with a sibling list but no reference to \
+ * its tail, so we do a single pass over the sibling list to \
+ * populate the FIFO. \
+ */ \
+ if (phn1 != NULL) { \
+ a_type *phnrest = phn_next_get(a_type, a_field, phn1); \
+ if (phnrest != NULL) { \
+ phn_prev_set(a_type, a_field, phnrest, NULL); \
+ } \
+ phn_prev_set(a_type, a_field, phn0, NULL); \
+ phn_next_set(a_type, a_field, phn0, NULL); \
+ phn_prev_set(a_type, a_field, phn1, NULL); \
+ phn_next_set(a_type, a_field, phn1, NULL); \
+ phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \
+ head = tail = phn0; \
+ phn0 = phnrest; \
+ while (phn0 != NULL) { \
+ phn1 = phn_next_get(a_type, a_field, phn0); \
+ if (phn1 != NULL) { \
+ phnrest = phn_next_get(a_type, a_field, \
+ phn1); \
+ if (phnrest != NULL) { \
+ phn_prev_set(a_type, a_field, \
+ phnrest, NULL); \
+ } \
+ phn_prev_set(a_type, a_field, phn0, \
+ NULL); \
+ phn_next_set(a_type, a_field, phn0, \
+ NULL); \
+ phn_prev_set(a_type, a_field, phn1, \
+ NULL); \
+ phn_next_set(a_type, a_field, phn1, \
+ NULL); \
+ phn_merge(a_type, a_field, phn0, phn1, \
+ a_cmp, phn0); \
+ phn_next_set(a_type, a_field, tail, \
+ phn0); \
+ tail = phn0; \
+ phn0 = phnrest; \
+ } else { \
+ phn_next_set(a_type, a_field, tail, \
+ phn0); \
+ tail = phn0; \
+ phn0 = NULL; \
+ } \
+ } \
+ phn0 = head; \
+ phn1 = phn_next_get(a_type, a_field, phn0); \
+ if (phn1 != NULL) { \
+ while (true) { \
+ head = phn_next_get(a_type, a_field, \
+ phn1); \
+ assert(phn_prev_get(a_type, a_field, \
+ phn0) == NULL); \
+ phn_next_set(a_type, a_field, phn0, \
+ NULL); \
+ assert(phn_prev_get(a_type, a_field, \
+ phn1) == NULL); \
+ phn_next_set(a_type, a_field, phn1, \
+ NULL); \
+ phn_merge(a_type, a_field, phn0, phn1, \
+ a_cmp, phn0); \
+ if (head == NULL) { \
+ break; \
+ } \
+ phn_next_set(a_type, a_field, tail, \
+ phn0); \
+ tail = phn0; \
+ phn0 = head; \
+ phn1 = phn_next_get(a_type, a_field, \
+ phn0); \
+ } \
+ } \
+ } \
+ r_phn = phn0; \
+} while (0)
+
+#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \
+ a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \
+ if (phn != NULL) { \
+ phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \
+ phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \
+ phn_prev_set(a_type, a_field, phn, NULL); \
+ ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \
+ assert(phn_next_get(a_type, a_field, phn) == NULL); \
+ phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \
+ a_ph->ph_root); \
+ } \
+} while (0)
+
+#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \
+ a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \
+ if (lchild == NULL) { \
+ r_phn = NULL; \
+ } else { \
+ ph_merge_siblings(a_type, a_field, lchild, a_cmp, \
+ r_phn); \
+ } \
+} while (0)
+
+/*
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
+ */
+#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \
+a_attr void a_prefix##new(a_ph_type *ph); \
+a_attr bool a_prefix##empty(a_ph_type *ph); \
+a_attr a_type *a_prefix##first(a_ph_type *ph); \
+a_attr a_type *a_prefix##any(a_ph_type *ph); \
+a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \
+a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \
+a_attr a_type *a_prefix##remove_any(a_ph_type *ph); \
+a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn);
+
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \
+a_attr void \
+a_prefix##new(a_ph_type *ph) { \
+ memset(ph, 0, sizeof(ph(a_type))); \
+} \
+a_attr bool \
+a_prefix##empty(a_ph_type *ph) { \
+ return (ph->ph_root == NULL); \
+} \
+a_attr a_type * \
+a_prefix##first(a_ph_type *ph) { \
+ if (ph->ph_root == NULL) { \
+ return NULL; \
+ } \
+ ph_merge_aux(a_type, a_field, ph, a_cmp); \
+ return ph->ph_root; \
+} \
+a_attr a_type * \
+a_prefix##any(a_ph_type *ph) { \
+ if (ph->ph_root == NULL) { \
+ return NULL; \
+ } \
+ a_type *aux = phn_next_get(a_type, a_field, ph->ph_root); \
+ if (aux != NULL) { \
+ return aux; \
+ } \
+ return ph->ph_root; \
+} \
+a_attr void \
+a_prefix##insert(a_ph_type *ph, a_type *phn) { \
+ memset(&phn->a_field, 0, sizeof(phn(a_type))); \
+ \
+ /* \
+ * Treat the root as an aux list during insertion, and lazily \
+ * merge during a_prefix##remove_first(). For elements that \
+ * are inserted, then removed via a_prefix##remove() before the \
+ * aux list is ever processed, this makes insert/remove \
+ * constant-time, whereas eager merging would make insert \
+ * O(log n). \
+ */ \
+ if (ph->ph_root == NULL) { \
+ ph->ph_root = phn; \
+ } else { \
+ phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \
+ a_field, ph->ph_root)); \
+ if (phn_next_get(a_type, a_field, ph->ph_root) != \
+ NULL) { \
+ phn_prev_set(a_type, a_field, \
+ phn_next_get(a_type, a_field, ph->ph_root), \
+ phn); \
+ } \
+ phn_prev_set(a_type, a_field, phn, ph->ph_root); \
+ phn_next_set(a_type, a_field, ph->ph_root, phn); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##remove_first(a_ph_type *ph) { \
+ a_type *ret; \
+ \
+ if (ph->ph_root == NULL) { \
+ return NULL; \
+ } \
+ ph_merge_aux(a_type, a_field, ph, a_cmp); \
+ \
+ ret = ph->ph_root; \
+ \
+ ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \
+ ph->ph_root); \
+ \
+ return ret; \
+} \
+a_attr a_type * \
+a_prefix##remove_any(a_ph_type *ph) { \
+ /* \
+ * Remove the most recently inserted aux list element, or the \
+ * root if the aux list is empty. This has the effect of \
+ * behaving as a LIFO (and insertion/removal is therefore \
+ * constant-time) if a_prefix##[remove_]first() are never \
+ * called. \
+ */ \
+ if (ph->ph_root == NULL) { \
+ return NULL; \
+ } \
+ a_type *ret = phn_next_get(a_type, a_field, ph->ph_root); \
+ if (ret != NULL) { \
+ a_type *aux = phn_next_get(a_type, a_field, ret); \
+ phn_next_set(a_type, a_field, ph->ph_root, aux); \
+ if (aux != NULL) { \
+ phn_prev_set(a_type, a_field, aux, \
+ ph->ph_root); \
+ } \
+ return ret; \
+ } \
+ ret = ph->ph_root; \
+ ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \
+ ph->ph_root); \
+ return ret; \
+} \
+a_attr void \
+a_prefix##remove(a_ph_type *ph, a_type *phn) { \
+ a_type *replace, *parent; \
+ \
+ if (ph->ph_root == phn) { \
+ /* \
+ * We can delete from aux list without merging it, but \
+ * we need to merge if we are dealing with the root \
+ * node and it has children. \
+ */ \
+ if (phn_lchild_get(a_type, a_field, phn) == NULL) { \
+ ph->ph_root = phn_next_get(a_type, a_field, \
+ phn); \
+ if (ph->ph_root != NULL) { \
+ phn_prev_set(a_type, a_field, \
+ ph->ph_root, NULL); \
+ } \
+ return; \
+ } \
+ ph_merge_aux(a_type, a_field, ph, a_cmp); \
+ if (ph->ph_root == phn) { \
+ ph_merge_children(a_type, a_field, ph->ph_root, \
+ a_cmp, ph->ph_root); \
+ return; \
+ } \
+ } \
+ \
+ /* Get parent (if phn is leftmost child) before mutating. */ \
+ if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \
+ if (phn_lchild_get(a_type, a_field, parent) != phn) { \
+ parent = NULL; \
+ } \
+ } \
+ /* Find a possible replacement node, and link to parent. */ \
+ ph_merge_children(a_type, a_field, phn, a_cmp, replace); \
+ /* Set next/prev for sibling linked list. */ \
+ if (replace != NULL) { \
+ if (parent != NULL) { \
+ phn_prev_set(a_type, a_field, replace, parent); \
+ phn_lchild_set(a_type, a_field, parent, \
+ replace); \
+ } else { \
+ phn_prev_set(a_type, a_field, replace, \
+ phn_prev_get(a_type, a_field, phn)); \
+ if (phn_prev_get(a_type, a_field, phn) != \
+ NULL) { \
+ phn_next_set(a_type, a_field, \
+ phn_prev_get(a_type, a_field, phn), \
+ replace); \
+ } \
+ } \
+ phn_next_set(a_type, a_field, replace, \
+ phn_next_get(a_type, a_field, phn)); \
+ if (phn_next_get(a_type, a_field, phn) != NULL) { \
+ phn_prev_set(a_type, a_field, \
+ phn_next_get(a_type, a_field, phn), \
+ replace); \
+ } \
+ } else { \
+ if (parent != NULL) { \
+ a_type *next = phn_next_get(a_type, a_field, \
+ phn); \
+ phn_lchild_set(a_type, a_field, parent, next); \
+ if (next != NULL) { \
+ phn_prev_set(a_type, a_field, next, \
+ parent); \
+ } \
+ } else { \
+ assert(phn_prev_get(a_type, a_field, phn) != \
+ NULL); \
+ phn_next_set(a_type, a_field, \
+ phn_prev_get(a_type, a_field, phn), \
+ phn_next_get(a_type, a_field, phn)); \
+ } \
+ if (phn_next_get(a_type, a_field, phn) != NULL) { \
+ phn_prev_set(a_type, a_field, \
+ phn_next_get(a_type, a_field, phn), \
+ phn_prev_get(a_type, a_field, phn)); \
+ } \
+ } \
+}
+
+#endif /* PH_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.sh b/deps/jemalloc/include/jemalloc/internal/private_namespace.sh
index cd25eb306..6ef1346a3 100755
--- a/deps/jemalloc/include/jemalloc/internal/private_namespace.sh
+++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-for symbol in `cat $1` ; do
- echo "#define ${symbol} JEMALLOC_N(${symbol})"
+for symbol in `cat "$@"` ; do
+ echo "#define ${symbol} JEMALLOC_N(${symbol})"
done
diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.sh b/deps/jemalloc/include/jemalloc/internal/private_symbols.sh
new file mode 100755
index 000000000..442a259fd
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/private_symbols.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+#
+# Generate private_symbols[_jet].awk.
+#
+# Usage: private_symbols.sh <sym_prefix> <sym>*
+#
+# <sym_prefix> is typically "" or "_".
+
+sym_prefix=$1
+shift
+
+cat <<EOF
+#!/usr/bin/env awk -f
+
+BEGIN {
+ sym_prefix = "${sym_prefix}"
+ split("\\
+EOF
+
+for public_sym in "$@" ; do
+ cat <<EOF
+ ${sym_prefix}${public_sym} \\
+EOF
+done
+
+cat <<"EOF"
+ ", exported_symbol_names)
+ # Store exported symbol names as keys in exported_symbols.
+ for (i in exported_symbol_names) {
+ exported_symbols[exported_symbol_names[i]] = 1
+ }
+}
+
+# Process 'nm -a <c_source.o>' output.
+#
+# Handle lines like:
+# 0000000000000008 D opt_junk
+# 0000000000007574 T malloc_initialized
+(NF == 3 && $2 ~ /^[ABCDGRSTVW]$/ && !($3 in exported_symbols) && $3 ~ /^[A-Za-z0-9_]+$/) {
+ print substr($3, 1+length(sym_prefix), length($3)-length(sym_prefix))
+}
+
+# Process 'dumpbin /SYMBOLS <c_source.obj>' output.
+#
+# Handle lines like:
+# 353 00008098 SECT4 notype External | opt_junk
+# 3F1 00000000 SECT7 notype () External | malloc_initialized
+($3 ~ /^SECT[0-9]+/ && $(NF-2) == "External" && !($NF in exported_symbols)) {
+ print $NF
+}
+EOF
diff --git a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt b/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
deleted file mode 100644
index 93516d242..000000000
--- a/deps/jemalloc/include/jemalloc/internal/private_symbols.txt
+++ /dev/null
@@ -1,413 +0,0 @@
-a0calloc
-a0free
-a0malloc
-arena_alloc_junk_small
-arena_bin_index
-arena_bin_info
-arena_boot
-arena_dalloc
-arena_dalloc_bin
-arena_dalloc_bin_locked
-arena_dalloc_junk_large
-arena_dalloc_junk_small
-arena_dalloc_large
-arena_dalloc_large_locked
-arena_dalloc_small
-arena_dss_prec_get
-arena_dss_prec_set
-arena_malloc
-arena_malloc_large
-arena_malloc_small
-arena_mapbits_allocated_get
-arena_mapbits_binind_get
-arena_mapbits_dirty_get
-arena_mapbits_get
-arena_mapbits_large_binind_set
-arena_mapbits_large_get
-arena_mapbits_large_set
-arena_mapbits_large_size_get
-arena_mapbits_small_runind_get
-arena_mapbits_small_set
-arena_mapbits_unallocated_set
-arena_mapbits_unallocated_size_get
-arena_mapbits_unallocated_size_set
-arena_mapbits_unzeroed_get
-arena_mapbits_unzeroed_set
-arena_mapbitsp_get
-arena_mapbitsp_read
-arena_mapbitsp_write
-arena_mapp_get
-arena_maxclass
-arena_new
-arena_palloc
-arena_postfork_child
-arena_postfork_parent
-arena_prefork
-arena_prof_accum
-arena_prof_accum_impl
-arena_prof_accum_locked
-arena_prof_ctx_get
-arena_prof_ctx_set
-arena_prof_promoted
-arena_ptr_small_binind_get
-arena_purge_all
-arena_quarantine_junk_small
-arena_ralloc
-arena_ralloc_junk_large
-arena_ralloc_no_move
-arena_redzone_corruption
-arena_run_regind
-arena_salloc
-arena_stats_merge
-arena_tcache_fill_small
-arenas
-arenas_booted
-arenas_cleanup
-arenas_extend
-arenas_initialized
-arenas_lock
-arenas_tls
-arenas_tsd
-arenas_tsd_boot
-arenas_tsd_cleanup_wrapper
-arenas_tsd_get
-arenas_tsd_get_wrapper
-arenas_tsd_init_head
-arenas_tsd_set
-atomic_add_u
-atomic_add_uint32
-atomic_add_uint64
-atomic_add_z
-atomic_sub_u
-atomic_sub_uint32
-atomic_sub_uint64
-atomic_sub_z
-base_alloc
-base_boot
-base_calloc
-base_node_alloc
-base_node_dealloc
-base_postfork_child
-base_postfork_parent
-base_prefork
-bitmap_full
-bitmap_get
-bitmap_info_init
-bitmap_info_ngroups
-bitmap_init
-bitmap_set
-bitmap_sfu
-bitmap_size
-bitmap_unset
-bt_init
-buferror
-choose_arena
-choose_arena_hard
-chunk_alloc
-chunk_alloc_dss
-chunk_alloc_mmap
-chunk_boot
-chunk_dealloc
-chunk_dealloc_mmap
-chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
-chunk_dss_prec_get
-chunk_dss_prec_set
-chunk_dss_prefork
-chunk_in_dss
-chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
-chunk_unmap
-chunks_mtx
-chunks_rtree
-chunksize
-chunksize_mask
-ckh_bucket_search
-ckh_count
-ckh_delete
-ckh_evict_reloc_insert
-ckh_insert
-ckh_isearch
-ckh_iter
-ckh_new
-ckh_pointer_hash
-ckh_pointer_keycomp
-ckh_rebuild
-ckh_remove
-ckh_search
-ckh_string_hash
-ckh_string_keycomp
-ckh_try_bucket_insert
-ckh_try_insert
-ctl_boot
-ctl_bymib
-ctl_byname
-ctl_nametomib
-ctl_postfork_child
-ctl_postfork_parent
-ctl_prefork
-dss_prec_names
-extent_tree_ad_first
-extent_tree_ad_insert
-extent_tree_ad_iter
-extent_tree_ad_iter_recurse
-extent_tree_ad_iter_start
-extent_tree_ad_last
-extent_tree_ad_new
-extent_tree_ad_next
-extent_tree_ad_nsearch
-extent_tree_ad_prev
-extent_tree_ad_psearch
-extent_tree_ad_remove
-extent_tree_ad_reverse_iter
-extent_tree_ad_reverse_iter_recurse
-extent_tree_ad_reverse_iter_start
-extent_tree_ad_search
-extent_tree_szad_first
-extent_tree_szad_insert
-extent_tree_szad_iter
-extent_tree_szad_iter_recurse
-extent_tree_szad_iter_start
-extent_tree_szad_last
-extent_tree_szad_new
-extent_tree_szad_next
-extent_tree_szad_nsearch
-extent_tree_szad_prev
-extent_tree_szad_psearch
-extent_tree_szad_remove
-extent_tree_szad_reverse_iter
-extent_tree_szad_reverse_iter_recurse
-extent_tree_szad_reverse_iter_start
-extent_tree_szad_search
-get_errno
-hash
-hash_fmix_32
-hash_fmix_64
-hash_get_block_32
-hash_get_block_64
-hash_rotl_32
-hash_rotl_64
-hash_x64_128
-hash_x86_128
-hash_x86_32
-huge_allocated
-huge_boot
-huge_dalloc
-huge_dalloc_junk
-huge_dss_prec_get
-huge_malloc
-huge_mtx
-huge_ndalloc
-huge_nmalloc
-huge_palloc
-huge_postfork_child
-huge_postfork_parent
-huge_prefork
-huge_prof_ctx_get
-huge_prof_ctx_set
-huge_ralloc
-huge_ralloc_no_move
-huge_salloc
-iallocm
-icalloc
-icalloct
-idalloc
-idalloct
-imalloc
-imalloct
-ipalloc
-ipalloct
-iqalloc
-iqalloct
-iralloc
-iralloct
-iralloct_realign
-isalloc
-isthreaded
-ivsalloc
-ixalloc
-jemalloc_postfork_child
-jemalloc_postfork_parent
-jemalloc_prefork
-malloc_cprintf
-malloc_mutex_init
-malloc_mutex_lock
-malloc_mutex_postfork_child
-malloc_mutex_postfork_parent
-malloc_mutex_prefork
-malloc_mutex_unlock
-malloc_printf
-malloc_snprintf
-malloc_strtoumax
-malloc_tsd_boot
-malloc_tsd_cleanup_register
-malloc_tsd_dalloc
-malloc_tsd_malloc
-malloc_tsd_no_cleanup
-malloc_vcprintf
-malloc_vsnprintf
-malloc_write
-map_bias
-mb_write
-mutex_boot
-narenas_auto
-narenas_total
-narenas_total_get
-ncpus
-nhbins
-opt_abort
-opt_dss
-opt_junk
-opt_lg_chunk
-opt_lg_dirty_mult
-opt_lg_prof_interval
-opt_lg_prof_sample
-opt_lg_tcache_max
-opt_narenas
-opt_prof
-opt_prof_accum
-opt_prof_active
-opt_prof_final
-opt_prof_gdump
-opt_prof_leak
-opt_prof_prefix
-opt_quarantine
-opt_redzone
-opt_stats_print
-opt_tcache
-opt_utrace
-opt_valgrind
-opt_xmalloc
-opt_zero
-p2rz
-pages_purge
-pow2_ceil
-prof_backtrace
-prof_boot0
-prof_boot1
-prof_boot2
-prof_bt_count
-prof_ctx_get
-prof_ctx_set
-prof_dump_open
-prof_free
-prof_gdump
-prof_idump
-prof_interval
-prof_lookup
-prof_malloc
-prof_mdump
-prof_postfork_child
-prof_postfork_parent
-prof_prefork
-prof_promote
-prof_realloc
-prof_sample_accum_update
-prof_sample_threshold_update
-prof_tdata_booted
-prof_tdata_cleanup
-prof_tdata_get
-prof_tdata_init
-prof_tdata_initialized
-prof_tdata_tls
-prof_tdata_tsd
-prof_tdata_tsd_boot
-prof_tdata_tsd_cleanup_wrapper
-prof_tdata_tsd_get
-prof_tdata_tsd_get_wrapper
-prof_tdata_tsd_init_head
-prof_tdata_tsd_set
-quarantine
-quarantine_alloc_hook
-quarantine_boot
-quarantine_booted
-quarantine_cleanup
-quarantine_init
-quarantine_tls
-quarantine_tsd
-quarantine_tsd_boot
-quarantine_tsd_cleanup_wrapper
-quarantine_tsd_get
-quarantine_tsd_get_wrapper
-quarantine_tsd_init_head
-quarantine_tsd_set
-register_zone
-rtree_delete
-rtree_get
-rtree_get_locked
-rtree_new
-rtree_postfork_child
-rtree_postfork_parent
-rtree_prefork
-rtree_set
-s2u
-sa2u
-set_errno
-small_size2bin
-stats_cactive
-stats_cactive_add
-stats_cactive_get
-stats_cactive_sub
-stats_chunks
-stats_print
-tcache_alloc_easy
-tcache_alloc_large
-tcache_alloc_small
-tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
-tcache_bin_flush_large
-tcache_bin_flush_small
-tcache_bin_info
-tcache_boot0
-tcache_boot1
-tcache_booted
-tcache_create
-tcache_dalloc_large
-tcache_dalloc_small
-tcache_destroy
-tcache_enabled_booted
-tcache_enabled_get
-tcache_enabled_initialized
-tcache_enabled_set
-tcache_enabled_tls
-tcache_enabled_tsd
-tcache_enabled_tsd_boot
-tcache_enabled_tsd_cleanup_wrapper
-tcache_enabled_tsd_get
-tcache_enabled_tsd_get_wrapper
-tcache_enabled_tsd_init_head
-tcache_enabled_tsd_set
-tcache_event
-tcache_event_hard
-tcache_flush
-tcache_get
-tcache_initialized
-tcache_maxclass
-tcache_salloc
-tcache_stats_merge
-tcache_thread_cleanup
-tcache_tls
-tcache_tsd
-tcache_tsd_boot
-tcache_tsd_cleanup_wrapper
-tcache_tsd_get
-tcache_tsd_get_wrapper
-tcache_tsd_init_head
-tcache_tsd_set
-thread_allocated_booted
-thread_allocated_initialized
-thread_allocated_tls
-thread_allocated_tsd
-thread_allocated_tsd_boot
-thread_allocated_tsd_cleanup_wrapper
-thread_allocated_tsd_get
-thread_allocated_tsd_get_wrapper
-thread_allocated_tsd_init_head
-thread_allocated_tsd_set
-tsd_init_check_recursion
-tsd_init_finish
-u2rz
diff --git a/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh b/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh
deleted file mode 100755
index 23fed8e80..000000000
--- a/deps/jemalloc/include/jemalloc/internal/private_unnamespace.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-for symbol in `cat $1` ; do
- echo "#undef ${symbol}"
-done
diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h
index 7b2b06512..15cc2d18f 100644
--- a/deps/jemalloc/include/jemalloc/internal/prng.h
+++ b/deps/jemalloc/include/jemalloc/internal/prng.h
@@ -1,5 +1,8 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_PRNG_H
+#define JEMALLOC_INTERNAL_PRNG_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
/*
* Simple linear congruential pseudo-random number generator:
@@ -15,46 +18,168 @@
* See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
*
* This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position. For example. the lowest bit has a cycle of 2,
+ * proportional to bit position. For example, the lowest bit has a cycle of 2,
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
- *
- * Macro parameters:
- * uint32_t r : Result.
- * unsigned lg_range : (0..32], number of least significant bits to return.
- * uint32_t state : Seed value.
- * const uint32_t a, c : See above discussion.
*/
-#define prng32(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 32); \
- \
- r = (state * (a)) + (c); \
- state = r; \
- r >>= (32 - lg_range); \
-} while (false)
-
-/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
-#define prng64(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 64); \
- \
- r = (state * (a)) + (c); \
- state = r; \
- r >>= (64 - lg_range); \
-} while (false)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
+/* INTERNAL DEFINITIONS -- IGNORE */
/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#define PRNG_A_32 UINT32_C(1103515241)
+#define PRNG_C_32 UINT32_C(12347)
+
+#define PRNG_A_64 UINT64_C(6364136223846793005)
+#define PRNG_C_64 UINT64_C(1442695040888963407)
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state) {
+ return (state * PRNG_A_32) + PRNG_C_32;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_state_next_u64(uint64_t state) {
+ return (state * PRNG_A_64) + PRNG_C_64;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state) {
+#if LG_SIZEOF_PTR == 2
+ return (state * PRNG_A_32) + PRNG_C_32;
+#elif LG_SIZEOF_PTR == 3
+ return (state * PRNG_A_64) + PRNG_C_64;
+#else
+#error Unsupported pointer size
+#endif
+}
-#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
+/* BEGIN PUBLIC API */
+/******************************************************************************/
+
+/*
+ * The prng_lg_range functions give a uniform int in the half-open range [0,
+ * 2**lg_range). If atomic is true, they do so safely from multiple threads.
+ * Multithreaded 64-bit prngs aren't supported.
+ */
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
+ uint32_t ret, state0, state1;
+
+ assert(lg_range > 0);
+ assert(lg_range <= 32);
+
+ state0 = atomic_load_u32(state, ATOMIC_RELAXED);
+
+ if (atomic) {
+ do {
+ state1 = prng_state_next_u32(state0);
+ } while (!atomic_compare_exchange_weak_u32(state, &state0,
+ state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+ } else {
+ state1 = prng_state_next_u32(state0);
+ atomic_store_u32(state, state1, ATOMIC_RELAXED);
+ }
+ ret = state1 >> (32 - lg_range);
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
+ uint64_t ret, state1;
+
+ assert(lg_range > 0);
+ assert(lg_range <= 64);
+
+ state1 = prng_state_next_u64(*state);
+ *state = state1;
+ ret = state1 >> (64 - lg_range);
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
+ size_t ret, state0, state1;
+
+ assert(lg_range > 0);
+ assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+ state0 = atomic_load_zu(state, ATOMIC_RELAXED);
+
+ if (atomic) {
+ do {
+ state1 = prng_state_next_zu(state0);
+ } while (atomic_compare_exchange_weak_zu(state, &state0,
+ state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+ } else {
+ state1 = prng_state_next_zu(state0);
+ atomic_store_zu(state, state1, ATOMIC_RELAXED);
+ }
+ ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+ return ret;
+}
+
+/*
+ * The prng_range functions behave like the prng_lg_range, but return a result
+ * in [0, range) instead of [0, 2**lg_range).
+ */
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
+ uint32_t ret;
+ unsigned lg_range;
+
+ assert(range > 1);
+
+ /* Compute the ceiling of lg(range). */
+ lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+ /* Generate a result in [0..range) via repeated trial. */
+ do {
+ ret = prng_lg_range_u32(state, lg_range, atomic);
+ } while (ret >= range);
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_range_u64(uint64_t *state, uint64_t range) {
+ uint64_t ret;
+ unsigned lg_range;
+
+ assert(range > 1);
+
+ /* Compute the ceiling of lg(range). */
+ lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+ /* Generate a result in [0..range) via repeated trial. */
+ do {
+ ret = prng_lg_range_u64(state, lg_range);
+ } while (ret >= range);
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
+ size_t ret;
+ unsigned lg_range;
+
+ assert(range > 1);
+
+ /* Compute the ceiling of lg(range). */
+ lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+ /* Generate a result in [0..range) via repeated trial. */
+ do {
+ ret = prng_lg_range_zu(state, lg_range, atomic);
+ } while (ret >= range);
+
+ return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_PRNG_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h
deleted file mode 100644
index 6f162d21e..000000000
--- a/deps/jemalloc/include/jemalloc/internal/prof.h
+++ /dev/null
@@ -1,613 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
-
-/* Option defaults. */
-#ifdef JEMALLOC_PROF
-# define PROF_PREFIX_DEFAULT "jeprof"
-#else
-# define PROF_PREFIX_DEFAULT ""
-#endif
-#define LG_PROF_SAMPLE_DEFAULT 19
-#define LG_PROF_INTERVAL_DEFAULT -1
-
-/*
- * Hard limit on stack backtrace depth. The version of prof_backtrace() that
- * is based on __builtin_return_address() necessarily has a hard-coded number
- * of backtrace frame handlers, and should be kept in sync with this setting.
- */
-#define PROF_BT_MAX 128
-
-/* Maximum number of backtraces to store in each per thread LRU cache. */
-#define PROF_TCMAX 1024
-
-/* Initial hash table size. */
-#define PROF_CKH_MINITEMS 64
-
-/* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUFSIZE 65536
-
-/* Size of stack-allocated buffer used by prof_printf(). */
-#define PROF_PRINTF_BUFSIZE 128
-
-/*
- * Number of mutexes shared among all ctx's. No space is allocated for these
- * unless profiling is enabled, so it's okay to over-provision.
- */
-#define PROF_NCTX_LOCKS 1024
-
-/*
- * prof_tdata pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
-#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
-#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct prof_bt_s {
- /* Backtrace, stored as len program counters. */
- void **vec;
- unsigned len;
-};
-
-#ifdef JEMALLOC_PROF_LIBGCC
-/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
-typedef struct {
- prof_bt_t *bt;
- unsigned nignore;
- unsigned max;
-} prof_unwind_data_t;
-#endif
-
-struct prof_cnt_s {
- /*
- * Profiling counters. An allocation/deallocation pair can operate on
- * different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
- * negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require something
- * like 128-bit counters; this implementation doesn't bother to solve
- * that problem.
- */
- int64_t curobjs;
- int64_t curbytes;
- uint64_t accumobjs;
- uint64_t accumbytes;
-};
-
-struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's cnts_ql. */
- ql_elm(prof_thr_cnt_t) cnts_link;
-
- /* Linkage into thread's LRU. */
- ql_elm(prof_thr_cnt_t) lru_link;
-
- /*
- * Associated context. If a thread frees an object that it did not
- * allocate, it is possible that the context is not cached in the
- * thread's hash table, in which case it must be able to look up the
- * context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's cnts_ql.
- */
- prof_ctx_t *ctx;
-
- /*
- * Threads use memory barriers to update the counters. Since there is
- * only ever one writer, the only challenge is for the reader to get a
- * consistent read of the counters.
- *
- * The writer uses this series of operations:
- *
- * 1) Increment epoch to an odd number.
- * 2) Update counters.
- * 3) Increment epoch to an even number.
- *
- * The reader must assure 1) that the epoch is even while it reads the
- * counters, and 2) that the epoch doesn't change between the time it
- * starts and finishes reading the counters.
- */
- unsigned epoch;
-
- /* Profiling counters. */
- prof_cnt_t cnts;
-};
-
-struct prof_ctx_s {
- /* Associated backtrace. */
- prof_bt_t *bt;
-
- /* Protects nlimbo, cnt_merged, and cnts_ql. */
- malloc_mutex_t *lock;
-
- /*
- * Number of threads that currently cause this ctx to be in a state of
- * limbo due to one of:
- * - Initializing per thread counters associated with this ctx.
- * - Preparing to destroy this ctx.
- * - Dumping a heap profile that includes this ctx.
- * nlimbo must be 1 (single destroyer) in order to safely destroy the
- * ctx.
- */
- unsigned nlimbo;
-
- /* Temporary storage for summation during dump. */
- prof_cnt_t cnt_summed;
-
- /* When threads exit, they merge their stats into cnt_merged. */
- prof_cnt_t cnt_merged;
-
- /*
- * List of profile counters, one for each thread that has allocated in
- * this context.
- */
- ql_head(prof_thr_cnt_t) cnts_ql;
-
- /* Linkage for list of contexts to be dumped. */
- ql_elm(prof_ctx_t) dump_link;
-};
-typedef ql_head(prof_ctx_t) prof_ctx_list_t;
-
-struct prof_tdata_s {
- /*
- * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
- * cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no
- * others will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t
- * counter data into the associated prof_ctx_t objects, and unlink/free
- * the prof_thr_cnt_t objects.
- */
- ckh_t bt2cnt;
-
- /* LRU for contents of bt2cnt. */
- ql_head(prof_thr_cnt_t) lru_ql;
-
- /* Backtrace vector, used for calls to prof_backtrace(). */
- void **vec;
-
- /* Sampling state. */
- uint64_t prng_state;
- uint64_t threshold;
- uint64_t accum;
-
- /* State used to avoid dumping while operating on prof internals. */
- bool enq;
- bool enq_idump;
- bool enq_gdump;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false. No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
-extern bool opt_prof_active;
-extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump; /* High-water memory dumping. */
-extern bool opt_prof_final; /* Final profile dumping. */
-extern bool opt_prof_leak; /* Dump leak summary at exit. */
-extern bool opt_prof_accum; /* Report cumulative bytes. */
-extern char opt_prof_prefix[
- /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
- PATH_MAX +
-#endif
- 1];
-
-/*
- * Profile dump interval, measured in bytes allocated. Each arena triggers a
- * profile dump when it reaches this threshold. The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
-extern uint64_t prof_interval;
-
-/*
- * If true, promote small sampled objects to large objects, since small run
- * headers do not have embedded profile context pointers.
- */
-extern bool prof_promote;
-
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt, unsigned nignore);
-prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
-#ifdef JEMALLOC_JET
-size_t prof_bt_count(void);
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *prof_dump_open;
-#endif
-void prof_idump(void);
-bool prof_mdump(const char *filename);
-void prof_gdump(void);
-prof_tdata_t *prof_tdata_init(void);
-void prof_tdata_cleanup(void *arg);
-void prof_boot0(void);
-void prof_boot1(void);
-bool prof_boot2(void);
-void prof_prefork(void);
-void prof_postfork_parent(void);
-void prof_postfork_child(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#define PROF_ALLOC_PREP(nignore, size, ret) do { \
- prof_tdata_t *prof_tdata; \
- prof_bt_t bt; \
- \
- assert(size == s2u(size)); \
- \
- prof_tdata = prof_tdata_get(true); \
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
- if (prof_tdata != NULL) \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- else \
- ret = NULL; \
- break; \
- } \
- \
- if (opt_prof_active == false) { \
- /* Sampling is currently inactive, so avoid sampling. */\
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } else if (opt_lg_prof_sample == 0) { \
- /* Don't bother with sampling logic, since sampling */\
- /* interval is 1. */\
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else { \
- if (prof_tdata->threshold == 0) { \
- /* Initialize. Seed the prng differently for */\
- /* each thread. */\
- prof_tdata->prng_state = \
- (uint64_t)(uintptr_t)&size; \
- prof_sample_threshold_update(prof_tdata); \
- } \
- \
- /* Determine whether to capture a backtrace based on */\
- /* whether size is enough for prof_accum to reach */\
- /* prof_tdata->threshold. However, delay updating */\
- /* these variables until prof_{m,re}alloc(), because */\
- /* we don't know for sure that the allocation will */\
- /* succeed. */\
- /* */\
- /* Use subtraction rather than addition to avoid */\
- /* potential integer overflow. */\
- if (size >= prof_tdata->threshold - \
- prof_tdata->accum) { \
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore); \
- ret = prof_lookup(&bt); \
- } else \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } \
-} while (0)
-
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
-
-prof_tdata_t *prof_tdata_get(bool create);
-void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
-bool prof_sample_accum_update(size_t size);
-void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr, size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-malloc_tsd_externs(prof_tdata, prof_tdata_t *)
-malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
- prof_tdata_cleanup)
-
-JEMALLOC_INLINE prof_tdata_t *
-prof_tdata_get(bool create)
-{
- prof_tdata_t *prof_tdata;
-
- cassert(config_prof);
-
- prof_tdata = *prof_tdata_tsd_get();
- if (create && prof_tdata == NULL)
- prof_tdata = prof_tdata_init();
-
- return (prof_tdata);
-}
-
-JEMALLOC_INLINE void
-prof_sample_threshold_update(prof_tdata_t *prof_tdata)
-{
- /*
- * The body of this function is compiled out unless heap profiling is
- * enabled, so that it is possible to compile jemalloc with floating
- * point support completely disabled. Avoiding floating point code is
- * important on memory-constrained systems, but it also enables a
- * workaround for versions of glibc that don't properly save/restore
- * floating point registers during dynamic lazy symbol loading (which
- * internally calls into whatever malloc implementation happens to be
- * integrated into the application). Note that some compilers (e.g.
- * gcc 4.8) may use floating point registers for fast memory moves, so
- * jemalloc must be compiled with such optimizations disabled (e.g.
- * -mno-sse) in order for the workaround to be complete.
- */
-#ifdef JEMALLOC_PROF
- uint64_t r;
- double u;
-
- cassert(config_prof);
-
- /*
- * Compute sample threshold as a geometrically distributed random
- * variable with mean (2^opt_lg_prof_sample).
- *
- * __ __
- * | log(u) | 1
- * prof_tdata->threshold = | -------- |, where p = -------------------
- * | log(1-p) | opt_lg_prof_sample
- * 2
- *
- * For more information on the math, see:
- *
- * Non-Uniform Random Variate Generation
- * Luc Devroye
- * Springer-Verlag, New York, 1986
- * pp 500
- * (http://luc.devroye.org/rnbookindex.html)
- */
- prng64(r, 53, prof_tdata->prng_state,
- UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
- u = (double)r * (1.0/9007199254740992.0L);
- prof_tdata->threshold = (uint64_t)(log(u) /
- log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
- + (uint64_t)1U;
-#endif
-}
-
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
-
- cassert(config_prof);
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
-}
-
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
-{
- arena_chunk_t *chunk;
-
- cassert(config_prof);
- assert(ptr != NULL);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- arena_prof_ctx_set(ptr, usize, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
-}
-
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size)
-{
- prof_tdata_t *prof_tdata;
-
- cassert(config_prof);
- /* Sampling logic is unnecessary if the interval is 1. */
- assert(opt_lg_prof_sample != 0);
-
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
- return (true);
-
- /* Take care to avoid integer overflow. */
- if (size >= prof_tdata->threshold - prof_tdata->accum) {
- prof_tdata->accum -= (prof_tdata->threshold - size);
- /* Compute new sample threshold. */
- prof_sample_threshold_update(prof_tdata);
- while (prof_tdata->accum >= prof_tdata->threshold) {
- prof_tdata->accum -= prof_tdata->threshold;
- prof_sample_threshold_update(prof_tdata);
- }
- return (false);
- } else {
- prof_tdata->accum += size;
- return (true);
- }
-}
-
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
-{
-
- cassert(config_prof);
- assert(ptr != NULL);
- assert(usize == isalloc(ptr, true));
-
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. For malloc()-like allocation, it is
- * always possible to tell in advance how large an
- * object's usable size will be, so there should never
- * be a difference between the usize passed to
- * PROF_ALLOC_PREP() and prof_malloc().
- */
- assert((uintptr_t)cnt == (uintptr_t)1U);
- }
- }
-
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
-}
-
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx)
-{
- prof_thr_cnt_t *told_cnt;
-
- cassert(config_prof);
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
-
- if (ptr != NULL) {
- assert(usize == isalloc(ptr, true));
- if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(usize)) {
- /*
- * Don't sample. The usize passed to
- * PROF_ALLOC_PREP() was larger than what
- * actually got allocated, so a backtrace was
- * captured for this allocation, even though
- * its actual usize was insufficient to cross
- * the sample threshold.
- */
- cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- }
- }
-
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
- /*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_usize;
- malloc_mutex_unlock(old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, usize, cnt->ctx);
- cnt->epoch++;
- } else if (ptr != NULL)
- prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_usize;
- }
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
-}
-
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
-{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
-
- cassert(config_prof);
-
- if ((uintptr_t)ctx > (uintptr_t)1) {
- prof_thr_cnt_t *tcnt;
- assert(size == isalloc(ptr, true));
- tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(ctx->lock);
- }
- }
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_externs.h b/deps/jemalloc/include/jemalloc/internal/prof_externs.h
new file mode 100644
index 000000000..04348696f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prof_externs.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t bt2gctx_mtx;
+
+extern bool opt_prof;
+extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
+extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool opt_prof_gdump; /* High-water memory dumping. */
+extern bool opt_prof_final; /* Final profile dumping. */
+extern bool opt_prof_leak; /* Dump leak summary at exit. */
+extern bool opt_prof_accum; /* Report cumulative bytes. */
+extern char opt_prof_prefix[
+ /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+ PATH_MAX +
+#endif
+ 1];
+
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool prof_gdump_val;
+
+/*
+ * Profile dump interval, measured in bytes allocated. Each arena triggers a
+ * profile dump when it reaches this threshold. The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t prof_interval;
+
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t lg_prof_sample;
+
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+#ifdef JEMALLOC_JET
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+#endif
+typedef int (prof_dump_open_t)(bool, const char *);
+extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+#ifdef JEMALLOC_JET
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+ uint64_t *accumbytes);
+#endif
+bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
+void prof_idump(tsdn_t *tsdn);
+bool prof_mdump(tsd_t *tsd, const char *filename);
+void prof_gdump(tsdn_t *tsdn);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+bool prof_active_get(tsdn_t *tsdn);
+bool prof_active_set(tsdn_t *tsdn, bool active);
+const char *prof_thread_name_get(tsd_t *tsd);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(tsd_t *tsd);
+bool prof_thread_active_set(tsd_t *tsd, bool active);
+bool prof_thread_active_init_get(tsdn_t *tsdn);
+bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool prof_gdump_get(tsdn_t *tsdn);
+bool prof_gdump_set(tsdn_t *tsdn, bool active);
+void prof_boot0(void);
+void prof_boot1(void);
+bool prof_boot2(tsd_t *tsd);
+void prof_prefork0(tsdn_t *tsdn);
+void prof_prefork1(tsdn_t *tsdn);
+void prof_postfork_parent(tsdn_t *tsdn);
+void prof_postfork_child(tsdn_t *tsdn);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
+
+#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
new file mode 100644
index 000000000..a6efb4851
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
@@ -0,0 +1,83 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
+
+#include "jemalloc/internal/mutex.h"
+
+static inline bool
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+ cassert(config_prof);
+
+ bool overflow;
+ uint64_t a0, a1;
+
+ /*
+ * If the application allocates fast enough (and/or if idump is slow
+ * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+ * idump trigger coalescing. This is an intentional mechanism that
+ * avoids rate-limiting allocation.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+ a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
+ do {
+ a1 = a0 + accumbytes;
+ assert(a1 >= a0);
+ overflow = (a1 >= prof_interval);
+ if (overflow) {
+ a1 %= prof_interval;
+ }
+ } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+ a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+ malloc_mutex_lock(tsdn, &prof_accum->mtx);
+ a0 = prof_accum->accumbytes;
+ a1 = a0 + accumbytes;
+ overflow = (a1 >= prof_interval);
+ if (overflow) {
+ a1 %= prof_interval;
+ }
+ prof_accum->accumbytes = a1;
+ malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+ return overflow;
+}
+
+static inline void
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+ cassert(config_prof);
+
+ /*
+ * Cancel out as much of the excessive prof_accumbytes increase as
+ * possible without underflowing. Interval-triggered dumps occur
+ * slightly more often than intended as a result of incomplete
+ * canceling.
+ */
+ uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+ a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
+ do {
+ a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS -
+ usize) : 0;
+ } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+ a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+ malloc_mutex_lock(tsdn, &prof_accum->mtx);
+ a0 = prof_accum->accumbytes;
+ a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) :
+ 0;
+ prof_accum->accumbytes = a1;
+ malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+ /*
+ * Even if opt_prof is true, sampling can be temporarily disabled by
+ * setting prof_active to false. No locking is used when reading
+ * prof_active in the fast path, so there are no guarantees regarding
+ * how long it will take for all threads to notice state changes.
+ */
+ return prof_active;
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
new file mode 100644
index 000000000..6ff465ad7
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
@@ -0,0 +1,206 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+
+#include "jemalloc/internal/sz.h"
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void) {
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return prof_gdump_val;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create) {
+ prof_tdata_t *tdata;
+
+ cassert(config_prof);
+
+ tdata = tsd_prof_tdata_get(tsd);
+ if (create) {
+ if (unlikely(tdata == NULL)) {
+ if (tsd_nominal(tsd)) {
+ tdata = prof_tdata_init(tsd);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ } else if (unlikely(tdata->expired)) {
+ tdata = prof_tdata_reinit(tsd, tdata);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ assert(tdata == NULL || tdata->attached);
+ }
+
+ return tdata;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+ alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_tctx_reset(tsdn, ptr, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+ prof_tdata_t **tdata_out) {
+ prof_tdata_t *tdata;
+
+ cassert(config_prof);
+
+ tdata = prof_tdata_get(tsd, true);
+ if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
+ tdata = NULL;
+ }
+
+ if (tdata_out != NULL) {
+ *tdata_out = tdata;
+ }
+
+ if (unlikely(tdata == NULL)) {
+ return true;
+ }
+
+ if (likely(tdata->bytes_until_sample >= usize)) {
+ if (update) {
+ tdata->bytes_until_sample -= usize;
+ }
+ return true;
+ } else {
+ if (tsd_reentrancy_level_get(tsd) > 0) {
+ return true;
+ }
+ /* Compute new sample threshold. */
+ if (update) {
+ prof_sample_threshold_update(tdata);
+ }
+ return !tdata->active;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
+ prof_tctx_t *ret;
+ prof_tdata_t *tdata;
+ prof_bt_t bt;
+
+ assert(usize == sz_s2u(usize));
+
+ if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+ &tdata))) {
+ ret = (prof_tctx_t *)(uintptr_t)1U;
+ } else {
+ bt_init(&bt, tdata->vec);
+ prof_backtrace(&bt);
+ ret = prof_lookup(tsd, &bt);
+ }
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
+ prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+ assert(usize == isalloc(tsdn, ptr));
+
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+ prof_malloc_sample_object(tsdn, ptr, usize, tctx);
+ } else {
+ prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
+ (prof_tctx_t *)(uintptr_t)1U);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+ bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+ prof_tctx_t *old_tctx) {
+ bool sampled, old_sampled, moved;
+
+ cassert(config_prof);
+ assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+
+ if (prof_active && !updated && ptr != NULL) {
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+ if (prof_sample_accum_update(tsd, usize, true, NULL)) {
+ /*
+ * Don't sample. The usize passed to prof_alloc_prep()
+ * was larger than what actually got allocated, so a
+ * backtrace was captured for this allocation, even
+ * though its actual usize was insufficient to cross the
+ * sample threshold.
+ */
+ prof_alloc_rollback(tsd, tctx, true);
+ tctx = (prof_tctx_t *)(uintptr_t)1U;
+ }
+ }
+
+ sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+ old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+ moved = (ptr != old_ptr);
+
+ if (unlikely(sampled)) {
+ prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
+ } else if (moved) {
+ prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
+ (prof_tctx_t *)(uintptr_t)1U);
+ } else if (unlikely(old_sampled)) {
+ /*
+ * prof_tctx_set() would work for the !moved case as well, but
+ * prof_tctx_reset() is slightly cheaper, and the proper thing
+ * to do here in the presence of explicit knowledge re: moved
+ * state.
+ */
+ prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
+ } else {
+ assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
+ (uintptr_t)1U);
+ }
+
+ /*
+ * The prof_free_sampled_object() call must come after the
+ * prof_malloc_sample_object() call, because tctx and old_tctx may be
+ * the same, in which case reversing the call order could cause the tctx
+ * to be prematurely destroyed as a side effect of momentarily zeroed
+ * counters.
+ */
+ if (unlikely(old_sampled)) {
+ prof_free_sampled_object(tsd, old_usize, old_tctx);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
+ prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+
+ cassert(config_prof);
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+ prof_free_sampled_object(tsd, usize, tctx);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_structs.h b/deps/jemalloc/include/jemalloc/internal/prof_structs.h
new file mode 100644
index 000000000..0d58ae100
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prof_structs.h
@@ -0,0 +1,201 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
+#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
+
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/rb.h"
+
+struct prof_bt_s {
+ /* Backtrace, stored as len program counters. */
+ void **vec;
+ unsigned len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+ prof_bt_t *bt;
+ unsigned max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_t mtx;
+ uint64_t accumbytes;
+#else
+ atomic_u64_t accumbytes;
+#endif
+};
+
+struct prof_cnt_s {
+ /* Profiling counters. */
+ uint64_t curobjs;
+ uint64_t curbytes;
+ uint64_t accumobjs;
+ uint64_t accumbytes;
+};
+
+typedef enum {
+ prof_tctx_state_initializing,
+ prof_tctx_state_nominal,
+ prof_tctx_state_dumping,
+ prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
+
+struct prof_tctx_s {
+ /* Thread data for thread that performed the allocation. */
+ prof_tdata_t *tdata;
+
+ /*
+ * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+ * defunct during teardown.
+ */
+ uint64_t thr_uid;
+ uint64_t thr_discrim;
+
+ /* Profiling counters, protected by tdata->lock. */
+ prof_cnt_t cnts;
+
+ /* Associated global context. */
+ prof_gctx_t *gctx;
+
+ /*
+ * UID that distinguishes multiple tctx's created by the same thread,
+ * but coexisting in gctx->tctxs. There are two ways that such
+ * coexistence can occur:
+ * - A dumper thread can cause a tctx to be retained in the purgatory
+ * state.
+ * - Although a single "producer" thread must create all tctx's which
+ * share the same thr_uid, multiple "consumers" can each concurrently
+ * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
+ * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+ * threshold can be hit again before the first consumer finishes
+ * executing prof_tctx_destroy().
+ */
+ uint64_t tctx_uid;
+
+ /* Linkage into gctx's tctxs. */
+ rb_node(prof_tctx_t) tctx_link;
+
+ /*
+ * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+ * sample vs destroy race.
+ */
+ bool prepared;
+
+ /* Current dump-related state, protected by gctx->lock. */
+ prof_tctx_state_t state;
+
+ /*
+ * Copy of cnts snapshotted during early dump phase, protected by
+ * dump_mtx.
+ */
+ prof_cnt_t dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+
+struct prof_gctx_s {
+ /* Protects nlimbo, cnt_summed, and tctxs. */
+ malloc_mutex_t *lock;
+
+ /*
+ * Number of threads that currently cause this gctx to be in a state of
+ * limbo due to one of:
+ * - Initializing this gctx.
+ * - Initializing per thread counters associated with this gctx.
+ * - Preparing to destroy this gctx.
+ * - Dumping a heap profile that includes this gctx.
+ * nlimbo must be 1 (single destroyer) in order to safely destroy the
+ * gctx.
+ */
+ unsigned nlimbo;
+
+ /*
+ * Tree of profile counters, one for each thread that has allocated in
+ * this context.
+ */
+ prof_tctx_tree_t tctxs;
+
+ /* Linkage for tree of contexts to be dumped. */
+ rb_node(prof_gctx_t) dump_link;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Associated backtrace. */
+ prof_bt_t bt;
+
+ /* Backtrace vector, variable size, referred to by bt. */
+ void *vec[1];
+};
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
+
+struct prof_tdata_s {
+ malloc_mutex_t *lock;
+
+ /* Monotonically increasing unique thread identifier. */
+ uint64_t thr_uid;
+
+ /*
+ * Monotonically increasing discriminator among tdata structures
+ * associated with the same thr_uid.
+ */
+ uint64_t thr_discrim;
+
+ /* Included in heap profile dumps if non-NULL. */
+ char *thread_name;
+
+ bool attached;
+ bool expired;
+
+ rb_node(prof_tdata_t) tdata_link;
+
+ /*
+ * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
+ * necessary when incrementing this field, because only one thread ever
+ * does so.
+ */
+ uint64_t tctx_uid_next;
+
+ /*
+ * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
+ * backtraces for which it has non-zero allocation/deallocation counters
+ * associated with thread-specific prof_tctx_t objects. Other threads
+ * may write to prof_tctx_t contents when freeing associated objects.
+ */
+ ckh_t bt2tctx;
+
+ /* Sampling state. */
+ uint64_t prng_state;
+ uint64_t bytes_until_sample;
+
+ /* State used to avoid dumping while operating on prof internals. */
+ bool enq;
+ bool enq_idump;
+ bool enq_gdump;
+
+ /*
+ * Set to true during an early dump phase for tdata's which are
+ * currently being dumped. New threads' tdata's have this initialized
+ * to false so that they aren't accidentally included in later dump
+ * phases.
+ */
+ bool dumping;
+
+ /*
+ * True if profiling is active for this tdata's thread
+ * (thread.prof.active mallctl).
+ */
+ bool active;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Backtrace vector, used for calls to prof_backtrace(). */
+ void *vec[PROF_BT_MAX];
+};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
+
+#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/prof_types.h b/deps/jemalloc/include/jemalloc/internal/prof_types.h
new file mode 100644
index 000000000..1eff995ec
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prof_types.h
@@ -0,0 +1,56 @@
+#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
+#define JEMALLOC_INTERNAL_PROF_TYPES_H
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_accum_s prof_accum_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#ifdef JEMALLOC_PROF
+# define PROF_PREFIX_DEFAULT "jeprof"
+#else
+# define PROF_PREFIX_DEFAULT ""
+#endif
+#define LG_PROF_SAMPLE_DEFAULT 19
+#define LG_PROF_INTERVAL_DEFAULT -1
+
+/*
+ * Hard limit on stack backtrace depth. The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
+ */
+#define PROF_BT_MAX 128
+
+/* Initial hash table size. */
+#define PROF_CKH_MINITEMS 64
+
+/* Size of memory buffer to use when writing dump files. */
+#define PROF_DUMP_BUFSIZE 65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define PROF_PRINTF_BUFSIZE 128
+
+/*
+ * Number of mutexes shared among all gctx's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NCTX_LOCKS 1024
+
+/*
+ * Number of mutexes shared among all tdata's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NTDATA_LOCKS 256
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
+#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
+#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
+
+#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/public_namespace.sh b/deps/jemalloc/include/jemalloc/internal/public_namespace.sh
index 362109f71..4d415ba01 100755
--- a/deps/jemalloc/include/jemalloc/internal/public_namespace.sh
+++ b/deps/jemalloc/include/jemalloc/internal/public_namespace.sh
@@ -2,5 +2,5 @@
for nm in `cat $1` ; do
n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'`
- echo "#define je_${n} JEMALLOC_N(${n})"
+ echo "#define je_${n} JEMALLOC_N(${n})"
done
diff --git a/deps/jemalloc/include/jemalloc/internal/ql.h b/deps/jemalloc/include/jemalloc/internal/ql.h
index f70c5f6f3..802904077 100644
--- a/deps/jemalloc/include/jemalloc/internal/ql.h
+++ b/deps/jemalloc/include/jemalloc/internal/ql.h
@@ -1,61 +1,64 @@
-/*
- * List definitions.
- */
-#define ql_head(a_type) \
+#ifndef JEMALLOC_INTERNAL_QL_H
+#define JEMALLOC_INTERNAL_QL_H
+
+#include "jemalloc/internal/qr.h"
+
+/* List definitions. */
+#define ql_head(a_type) \
struct { \
a_type *qlh_first; \
}
-#define ql_head_initializer(a_head) {NULL}
+#define ql_head_initializer(a_head) {NULL}
-#define ql_elm(a_type) qr(a_type)
+#define ql_elm(a_type) qr(a_type)
/* List functions. */
-#define ql_new(a_head) do { \
+#define ql_new(a_head) do { \
(a_head)->qlh_first = NULL; \
} while (0)
-#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
-#define ql_first(a_head) ((a_head)->qlh_first)
+#define ql_first(a_head) ((a_head)->qlh_first)
-#define ql_last(a_head, a_field) \
+#define ql_last(a_head, a_field) \
((ql_first(a_head) != NULL) \
? qr_prev(ql_first(a_head), a_field) : NULL)
-#define ql_next(a_head, a_elm, a_field) \
+#define ql_next(a_head, a_elm, a_field) \
((ql_last(a_head, a_field) != (a_elm)) \
? qr_next((a_elm), a_field) : NULL)
-#define ql_prev(a_head, a_elm, a_field) \
+#define ql_prev(a_head, a_elm, a_field) \
((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \
: NULL)
-#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \
qr_before_insert((a_qlelm), (a_elm), a_field); \
if (ql_first(a_head) == (a_qlelm)) { \
ql_first(a_head) = (a_elm); \
} \
} while (0)
-#define ql_after_insert(a_qlelm, a_elm, a_field) \
+#define ql_after_insert(a_qlelm, a_elm, a_field) \
qr_after_insert((a_qlelm), (a_elm), a_field)
-#define ql_head_insert(a_head, a_elm, a_field) do { \
+#define ql_head_insert(a_head, a_elm, a_field) do { \
if (ql_first(a_head) != NULL) { \
qr_before_insert(ql_first(a_head), (a_elm), a_field); \
} \
ql_first(a_head) = (a_elm); \
} while (0)
-#define ql_tail_insert(a_head, a_elm, a_field) do { \
+#define ql_tail_insert(a_head, a_elm, a_field) do { \
if (ql_first(a_head) != NULL) { \
qr_before_insert(ql_first(a_head), (a_elm), a_field); \
} \
ql_first(a_head) = qr_next((a_elm), a_field); \
} while (0)
-#define ql_remove(a_head, a_elm, a_field) do { \
+#define ql_remove(a_head, a_elm, a_field) do { \
if (ql_first(a_head) == (a_elm)) { \
ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
} \
@@ -66,18 +69,20 @@ struct { \
} \
} while (0)
-#define ql_head_remove(a_head, a_type, a_field) do { \
+#define ql_head_remove(a_head, a_type, a_field) do { \
a_type *t = ql_first(a_head); \
ql_remove((a_head), t, a_field); \
} while (0)
-#define ql_tail_remove(a_head, a_type, a_field) do { \
+#define ql_tail_remove(a_head, a_type, a_field) do { \
a_type *t = ql_last(a_head, a_field); \
ql_remove((a_head), t, a_field); \
} while (0)
-#define ql_foreach(a_var, a_head, a_field) \
+#define ql_foreach(a_var, a_head, a_field) \
qr_foreach((a_var), ql_first(a_head), a_field)
-#define ql_reverse_foreach(a_var, a_head, a_field) \
+#define ql_reverse_foreach(a_var, a_head, a_field) \
qr_reverse_foreach((a_var), ql_first(a_head), a_field)
+
+#endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/qr.h b/deps/jemalloc/include/jemalloc/internal/qr.h
index 602944b9b..1e1056b38 100644
--- a/deps/jemalloc/include/jemalloc/internal/qr.h
+++ b/deps/jemalloc/include/jemalloc/internal/qr.h
@@ -1,38 +1,39 @@
+#ifndef JEMALLOC_INTERNAL_QR_H
+#define JEMALLOC_INTERNAL_QR_H
+
/* Ring definitions. */
-#define qr(a_type) \
+#define qr(a_type) \
struct { \
a_type *qre_next; \
a_type *qre_prev; \
}
/* Ring functions. */
-#define qr_new(a_qr, a_field) do { \
+#define qr_new(a_qr, a_field) do { \
(a_qr)->a_field.qre_next = (a_qr); \
(a_qr)->a_field.qre_prev = (a_qr); \
} while (0)
-#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
-#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
-#define qr_before_insert(a_qrelm, a_qr, a_field) do { \
+#define qr_before_insert(a_qrelm, a_qr, a_field) do { \
(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
(a_qr)->a_field.qre_next = (a_qrelm); \
(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
(a_qrelm)->a_field.qre_prev = (a_qr); \
} while (0)
-#define qr_after_insert(a_qrelm, a_qr, a_field) \
- do \
- { \
+#define qr_after_insert(a_qrelm, a_qr, a_field) do { \
(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
(a_qr)->a_field.qre_prev = (a_qrelm); \
(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
(a_qrelm)->a_field.qre_next = (a_qr); \
- } while (0)
+} while (0)
-#define qr_meld(a_qr_a, a_qr_b, a_field) do { \
- void *t; \
+#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
+ a_type *t; \
(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
t = (a_qr_a)->a_field.qre_prev; \
@@ -40,12 +41,14 @@ struct { \
(a_qr_b)->a_field.qre_prev = t; \
} while (0)
-/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
- * have two copies of the code. */
-#define qr_split(a_qr_a, a_qr_b, a_field) \
- qr_meld((a_qr_a), (a_qr_b), a_field)
+/*
+ * qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code.
+ */
+#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
+ qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
-#define qr_remove(a_qr, a_field) do { \
+#define qr_remove(a_qr, a_field) do { \
(a_qr)->a_field.qre_prev->a_field.qre_next \
= (a_qr)->a_field.qre_next; \
(a_qr)->a_field.qre_next->a_field.qre_prev \
@@ -54,14 +57,16 @@ struct { \
(a_qr)->a_field.qre_prev = (a_qr); \
} while (0)
-#define qr_foreach(var, a_qr, a_field) \
+#define qr_foreach(var, a_qr, a_field) \
for ((var) = (a_qr); \
(var) != NULL; \
(var) = (((var)->a_field.qre_next != (a_qr)) \
? (var)->a_field.qre_next : NULL))
-#define qr_reverse_foreach(var, a_qr, a_field) \
+#define qr_reverse_foreach(var, a_qr, a_field) \
for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
(var) != NULL; \
(var) = (((var) != (a_qr)) \
? (var)->a_field.qre_prev : NULL))
+
+#endif /* JEMALLOC_INTERNAL_QR_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h
deleted file mode 100644
index 16f677f73..000000000
--- a/deps/jemalloc/include/jemalloc/internal/quarantine.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct quarantine_obj_s quarantine_obj_t;
-typedef struct quarantine_s quarantine_t;
-
-/* Default per thread quarantine size if valgrind is enabled. */
-#define JEMALLOC_VALGRIND_QUARANTINE_DEFAULT (ZU(1) << 24)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct quarantine_obj_s {
- void *ptr;
- size_t usize;
-};
-
-struct quarantine_s {
- size_t curbytes;
- size_t curobjs;
- size_t first;
-#define LG_MAXOBJS_INIT 10
- size_t lg_maxobjs;
- quarantine_obj_t objs[1]; /* Dynamically sized ring buffer. */
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-quarantine_t *quarantine_init(size_t lg_maxobjs);
-void quarantine(void *ptr);
-void quarantine_cleanup(void *arg);
-bool quarantine_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), quarantine, quarantine_t *)
-
-void quarantine_alloc_hook(void);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_))
-malloc_tsd_externs(quarantine, quarantine_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, quarantine, quarantine_t *, NULL,
- quarantine_cleanup)
-
-JEMALLOC_ALWAYS_INLINE void
-quarantine_alloc_hook(void)
-{
- quarantine_t *quarantine;
-
- assert(config_fill && opt_quarantine);
-
- quarantine = *quarantine_tsd_get();
- if (quarantine == NULL)
- quarantine_init(LG_MAXOBJS_INIT);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h
index 423802eb2..47fa5ca99 100644
--- a/deps/jemalloc/include/jemalloc/internal/rb.h
+++ b/deps/jemalloc/include/jemalloc/internal/rb.h
@@ -20,17 +20,21 @@
*/
#ifndef RB_H_
-#define RB_H_
+#define RB_H_
+
+#ifndef __PGI
+#define RB_COMPACT
+#endif
#ifdef RB_COMPACT
/* Node structure. */
-#define rb_node(a_type) \
+#define rb_node(a_type) \
struct { \
a_type *rbn_left; \
a_type *rbn_right_red; \
}
#else
-#define rb_node(a_type) \
+#define rb_node(a_type) \
struct { \
a_type *rbn_left; \
a_type *rbn_right; \
@@ -39,111 +43,116 @@ struct { \
#endif
/* Root structure. */
-#define rb_tree(a_type) \
+#define rb_tree(a_type) \
struct { \
a_type *rbt_root; \
- a_type rbt_nil; \
}
/* Left accessors. */
-#define rbtn_left_get(a_type, a_field, a_node) \
+#define rbtn_left_get(a_type, a_field, a_node) \
((a_node)->a_field.rbn_left)
-#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \
+#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \
(a_node)->a_field.rbn_left = a_left; \
} while (0)
#ifdef RB_COMPACT
/* Right accessors. */
-#define rbtn_right_get(a_type, a_field, a_node) \
+#define rbtn_right_get(a_type, a_field, a_node) \
((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \
& ((ssize_t)-2)))
-#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
(a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \
| (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \
} while (0)
/* Color accessors. */
-#define rbtn_red_get(a_type, a_field, a_node) \
+#define rbtn_red_get(a_type, a_field, a_node) \
((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \
& ((size_t)1)))
-#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
(a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \
(a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \
| ((ssize_t)a_red)); \
} while (0)
-#define rbtn_red_set(a_type, a_field, a_node) do { \
+#define rbtn_red_set(a_type, a_field, a_node) do { \
(a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \
(a_node)->a_field.rbn_right_red) | ((size_t)1)); \
} while (0)
-#define rbtn_black_set(a_type, a_field, a_node) do { \
+#define rbtn_black_set(a_type, a_field, a_node) do { \
(a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \
(a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \
} while (0)
+
+/* Node initializer. */
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \
+ /* Bookkeeping bit cannot be used by node pointer. */ \
+ assert(((uintptr_t)(a_node) & 0x1) == 0); \
+ rbtn_left_set(a_type, a_field, (a_node), NULL); \
+ rbtn_right_set(a_type, a_field, (a_node), NULL); \
+ rbtn_red_set(a_type, a_field, (a_node)); \
+} while (0)
#else
/* Right accessors. */
-#define rbtn_right_get(a_type, a_field, a_node) \
+#define rbtn_right_get(a_type, a_field, a_node) \
((a_node)->a_field.rbn_right)
-#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
(a_node)->a_field.rbn_right = a_right; \
} while (0)
/* Color accessors. */
-#define rbtn_red_get(a_type, a_field, a_node) \
+#define rbtn_red_get(a_type, a_field, a_node) \
((a_node)->a_field.rbn_red)
-#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
(a_node)->a_field.rbn_red = (a_red); \
} while (0)
-#define rbtn_red_set(a_type, a_field, a_node) do { \
+#define rbtn_red_set(a_type, a_field, a_node) do { \
(a_node)->a_field.rbn_red = true; \
} while (0)
-#define rbtn_black_set(a_type, a_field, a_node) do { \
+#define rbtn_black_set(a_type, a_field, a_node) do { \
(a_node)->a_field.rbn_red = false; \
} while (0)
-#endif
/* Node initializer. */
-#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \
- rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
- rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \
+ rbtn_left_set(a_type, a_field, (a_node), NULL); \
+ rbtn_right_set(a_type, a_field, (a_node), NULL); \
rbtn_red_set(a_type, a_field, (a_node)); \
} while (0)
+#endif
/* Tree initializer. */
-#define rb_new(a_type, a_field, a_rbt) do { \
- (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \
- rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \
- rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \
+#define rb_new(a_type, a_field, a_rbt) do { \
+ (a_rbt)->rbt_root = NULL; \
} while (0)
/* Internal utility macros. */
-#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \
+#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \
(r_node) = (a_root); \
- if ((r_node) != &(a_rbt)->rbt_nil) { \
+ if ((r_node) != NULL) { \
for (; \
- rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+ rbtn_left_get(a_type, a_field, (r_node)) != NULL; \
(r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \
} \
} \
} while (0)
-#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \
+#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \
(r_node) = (a_root); \
- if ((r_node) != &(a_rbt)->rbt_nil) { \
- for (; rbtn_right_get(a_type, a_field, (r_node)) != \
- &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \
- (r_node))) { \
+ if ((r_node) != NULL) { \
+ for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL; \
+ (r_node) = rbtn_right_get(a_type, a_field, (r_node))) { \
} \
} \
} while (0)
-#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \
+#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \
(r_node) = rbtn_right_get(a_type, a_field, (a_node)); \
rbtn_right_set(a_type, a_field, (a_node), \
rbtn_left_get(a_type, a_field, (r_node))); \
rbtn_left_set(a_type, a_field, (r_node), (a_node)); \
} while (0)
-#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \
+#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \
(r_node) = rbtn_left_get(a_type, a_field, (a_node)); \
rbtn_left_set(a_type, a_field, (a_node), \
rbtn_right_get(a_type, a_field, (r_node))); \
@@ -155,9 +164,11 @@ struct { \
* functions generated by an equivalently parameterized call to rb_gen().
*/
-#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
+#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
a_attr void \
a_prefix##new(a_rbt_type *rbtree); \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree); \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree); \
a_attr a_type * \
@@ -167,11 +178,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node); \
a_attr a_type * \
a_prefix##prev(a_rbt_type *rbtree, a_type *node); \
a_attr a_type * \
-a_prefix##search(a_rbt_type *rbtree, a_type *key); \
+a_prefix##search(a_rbt_type *rbtree, const a_type *key); \
a_attr a_type * \
-a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \
+a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key); \
a_attr a_type * \
-a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \
+a_prefix##psearch(a_rbt_type *rbtree, const a_type *key); \
a_attr void \
a_prefix##insert(a_rbt_type *rbtree, a_type *node); \
a_attr void \
@@ -181,7 +192,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
a_rbt_type *, a_type *, void *), void *arg); \
a_attr a_type * \
a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
- a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \
+a_attr void \
+a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \
+ void *arg);
/*
* The rb_gen() macro generates a type-specific red-black tree implementation,
@@ -198,7 +212,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* int (a_cmp *)(a_type *a_node, a_type *a_other);
* ^^^^^^
* or a_key
- * Interpretation of comparision function return values:
+ * Interpretation of comparison function return values:
* -1 : a_node < a_other
* 0 : a_node == a_other
* 1 : a_node > a_other
@@ -224,6 +238,13 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* Args:
* tree: Pointer to an uninitialized red-black tree object.
*
+ * static bool
+ * ex_empty(ex_t *tree);
+ * Description: Determine whether tree is empty.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * Ret: True if tree is empty, false otherwise.
+ *
* static ex_node_t *
* ex_first(ex_t *tree);
* static ex_node_t *
@@ -245,7 +266,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* last/first.
*
* static ex_node_t *
- * ex_search(ex_t *tree, ex_node_t *key);
+ * ex_search(ex_t *tree, const ex_node_t *key);
* Description: Search for node that matches key.
* Args:
* tree: Pointer to an initialized red-black tree object.
@@ -253,9 +274,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* Ret: Node in tree that matches key, or NULL if no match.
*
* static ex_node_t *
- * ex_nsearch(ex_t *tree, ex_node_t *key);
+ * ex_nsearch(ex_t *tree, const ex_node_t *key);
* static ex_node_t *
- * ex_psearch(ex_t *tree, ex_node_t *key);
+ * ex_psearch(ex_t *tree, const ex_node_t *key);
* Description: Search for node that matches key. If no match is found,
* return what would be key's successor/predecessor, were
* key in tree.
@@ -303,40 +324,52 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
* arg : Opaque pointer passed to cb().
* Ret: NULL if iteration completed, or the non-NULL callback return value
* that caused termination of the iteration.
+ *
+ * static void
+ * ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg);
+ * Description: Iterate over the tree with post-order traversal, remove
+ * each node, and run the callback if non-null. This is
+ * used for destroying a tree without paying the cost to
+ * rebalance it. The tree must not be otherwise altered
+ * during traversal.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * cb : Callback function, which, if non-null, is called for each node
+ * during iteration. There is no way to stop iteration once it
+ * has begun.
+ * arg : Opaque pointer passed to cb().
*/
-#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \
+#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \
a_attr void \
a_prefix##new(a_rbt_type *rbtree) { \
rb_new(a_type, a_field, rbtree); \
} \
+a_attr bool \
+a_prefix##empty(a_rbt_type *rbtree) { \
+ return (rbtree->rbt_root == NULL); \
+} \
a_attr a_type * \
a_prefix##first(a_rbt_type *rbtree) { \
a_type *ret; \
rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
- if (ret == &rbtree->rbt_nil) { \
- ret = NULL; \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
a_prefix##last(a_rbt_type *rbtree) { \
a_type *ret; \
rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
- if (ret == &rbtree->rbt_nil) { \
- ret = NULL; \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
a_prefix##next(a_rbt_type *rbtree, a_type *node) { \
a_type *ret; \
- if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ if (rbtn_right_get(a_type, a_field, node) != NULL) { \
rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \
a_field, node), ret); \
} else { \
a_type *tnode = rbtree->rbt_root; \
- assert(tnode != &rbtree->rbt_nil); \
- ret = &rbtree->rbt_nil; \
+ assert(tnode != NULL); \
+ ret = NULL; \
while (true) { \
int cmp = (a_cmp)(node, tnode); \
if (cmp < 0) { \
@@ -347,24 +380,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) { \
} else { \
break; \
} \
- assert(tnode != &rbtree->rbt_nil); \
+ assert(tnode != NULL); \
} \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = (NULL); \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \
a_type *ret; \
- if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ if (rbtn_left_get(a_type, a_field, node) != NULL) { \
rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \
a_field, node), ret); \
} else { \
a_type *tnode = rbtree->rbt_root; \
- assert(tnode != &rbtree->rbt_nil); \
- ret = &rbtree->rbt_nil; \
+ assert(tnode != NULL); \
+ ret = NULL; \
while (true) { \
int cmp = (a_cmp)(node, tnode); \
if (cmp < 0) { \
@@ -375,20 +405,17 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \
} else { \
break; \
} \
- assert(tnode != &rbtree->rbt_nil); \
+ assert(tnode != NULL); \
} \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = (NULL); \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
-a_prefix##search(a_rbt_type *rbtree, a_type *key) { \
+a_prefix##search(a_rbt_type *rbtree, const a_type *key) { \
a_type *ret; \
int cmp; \
ret = rbtree->rbt_root; \
- while (ret != &rbtree->rbt_nil \
+ while (ret != NULL \
&& (cmp = (a_cmp)(key, ret)) != 0) { \
if (cmp < 0) { \
ret = rbtn_left_get(a_type, a_field, ret); \
@@ -396,17 +423,14 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) { \
ret = rbtn_right_get(a_type, a_field, ret); \
} \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = (NULL); \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
-a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \
+a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) { \
a_type *ret; \
a_type *tnode = rbtree->rbt_root; \
- ret = &rbtree->rbt_nil; \
- while (tnode != &rbtree->rbt_nil) { \
+ ret = NULL; \
+ while (tnode != NULL) { \
int cmp = (a_cmp)(key, tnode); \
if (cmp < 0) { \
ret = tnode; \
@@ -418,17 +442,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \
break; \
} \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = (NULL); \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
-a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \
+a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \
a_type *ret; \
a_type *tnode = rbtree->rbt_root; \
- ret = &rbtree->rbt_nil; \
- while (tnode != &rbtree->rbt_nil) { \
+ ret = NULL; \
+ while (tnode != NULL) { \
int cmp = (a_cmp)(key, tnode); \
if (cmp < 0) { \
tnode = rbtn_left_get(a_type, a_field, tnode); \
@@ -440,10 +461,7 @@ a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \
break; \
} \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = (NULL); \
- } \
- return (ret); \
+ return ret; \
} \
a_attr void \
a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
@@ -454,7 +472,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
rbt_node_new(a_type, a_field, rbtree, node); \
/* Wind. */ \
path->node = rbtree->rbt_root; \
- for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ for (pathp = path; pathp->node != NULL; pathp++) { \
int cmp = pathp->cmp = a_cmp(node, pathp->node); \
assert(cmp != 0); \
if (cmp < 0) { \
@@ -474,7 +492,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
rbtn_left_set(a_type, a_field, cnode, left); \
if (rbtn_red_get(a_type, a_field, left)) { \
a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
- if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ if (leftleft != NULL && rbtn_red_get(a_type, a_field, \
+ leftleft)) { \
/* Fix up 4-node. */ \
a_type *tnode; \
rbtn_black_set(a_type, a_field, leftleft); \
@@ -489,7 +508,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, cnode, right); \
if (rbtn_red_get(a_type, a_field, right)) { \
a_type *left = rbtn_left_get(a_type, a_field, cnode); \
- if (rbtn_red_get(a_type, a_field, left)) { \
+ if (left != NULL && rbtn_red_get(a_type, a_field, \
+ left)) { \
/* Split 4-node. */ \
rbtn_black_set(a_type, a_field, left); \
rbtn_black_set(a_type, a_field, right); \
@@ -522,7 +542,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
/* Wind. */ \
nodep = NULL; /* Silence compiler warning. */ \
path->node = rbtree->rbt_root; \
- for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ for (pathp = path; pathp->node != NULL; pathp++) { \
int cmp = pathp->cmp = a_cmp(node, pathp->node); \
if (cmp < 0) { \
pathp[1].node = rbtn_left_get(a_type, a_field, \
@@ -534,8 +554,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
/* Find node's successor, in preparation for swap. */ \
pathp->cmp = 1; \
nodep = pathp; \
- for (pathp++; pathp->node != &rbtree->rbt_nil; \
- pathp++) { \
+ for (pathp++; pathp->node != NULL; pathp++) { \
pathp->cmp = -1; \
pathp[1].node = rbtn_left_get(a_type, a_field, \
pathp->node); \
@@ -577,10 +596,10 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
} \
} else { \
a_type *left = rbtn_left_get(a_type, a_field, node); \
- if (left != &rbtree->rbt_nil) { \
+ if (left != NULL) { \
/* node has no successor, but it has a left child. */\
/* Splice node out, without losing the left child. */\
- assert(rbtn_red_get(a_type, a_field, node) == false); \
+ assert(!rbtn_red_get(a_type, a_field, node)); \
assert(rbtn_red_get(a_type, a_field, left)); \
rbtn_black_set(a_type, a_field, left); \
if (pathp == path) { \
@@ -597,34 +616,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
return; \
} else if (pathp == path) { \
/* The tree only contained one node. */ \
- rbtree->rbt_root = &rbtree->rbt_nil; \
+ rbtree->rbt_root = NULL; \
return; \
} \
} \
if (rbtn_red_get(a_type, a_field, pathp->node)) { \
/* Prune red node, which requires no fixup. */ \
assert(pathp[-1].cmp < 0); \
- rbtn_left_set(a_type, a_field, pathp[-1].node, \
- &rbtree->rbt_nil); \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \
return; \
} \
/* The node to be pruned is black, so unwind until balance is */\
/* restored. */\
- pathp->node = &rbtree->rbt_nil; \
+ pathp->node = NULL; \
for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
assert(pathp->cmp != 0); \
if (pathp->cmp < 0) { \
rbtn_left_set(a_type, a_field, pathp->node, \
pathp[1].node); \
- assert(rbtn_red_get(a_type, a_field, pathp[1].node) \
- == false); \
if (rbtn_red_get(a_type, a_field, pathp->node)) { \
a_type *right = rbtn_right_get(a_type, a_field, \
pathp->node); \
a_type *rightleft = rbtn_left_get(a_type, a_field, \
right); \
a_type *tnode; \
- if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ if (rightleft != NULL && rbtn_red_get(a_type, a_field, \
+ rightleft)) { \
/* In the following diagrams, ||, //, and \\ */\
/* indicate the path to the removed node. */\
/* */\
@@ -667,7 +684,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
pathp->node); \
a_type *rightleft = rbtn_left_get(a_type, a_field, \
right); \
- if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ if (rightleft != NULL && rbtn_red_get(a_type, a_field, \
+ rightleft)) { \
/* || */\
/* pathp(b) */\
/* // \ */\
@@ -681,7 +699,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
/* Balance restored, but rotation modified */\
- /* subree root, which may actually be the tree */\
+ /* subtree root, which may actually be the tree */\
/* root. */\
if (pathp == path) { \
/* Set root. */ \
@@ -721,7 +739,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
left); \
a_type *leftrightleft = rbtn_left_get(a_type, a_field, \
leftright); \
- if (rbtn_red_get(a_type, a_field, leftrightleft)) { \
+ if (leftrightleft != NULL && rbtn_red_get(a_type, \
+ a_field, leftrightleft)) { \
/* || */\
/* pathp(b) */\
/* / \\ */\
@@ -747,7 +766,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
/* (b) */\
/* / */\
/* (b) */\
- assert(leftright != &rbtree->rbt_nil); \
+ assert(leftright != NULL); \
rbtn_red_set(a_type, a_field, leftright); \
rbtn_rotate_right(a_type, a_field, pathp->node, \
tnode); \
@@ -770,7 +789,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
return; \
} else if (rbtn_red_get(a_type, a_field, pathp->node)) { \
a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
- if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ if (leftleft != NULL && rbtn_red_get(a_type, a_field, \
+ leftleft)) { \
/* || */\
/* pathp(r) */\
/* / \\ */\
@@ -808,7 +828,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
} \
} else { \
a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
- if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ if (leftleft != NULL && rbtn_red_get(a_type, a_field, \
+ leftleft)) { \
/* || */\
/* pathp(b) */\
/* / \\ */\
@@ -849,22 +870,22 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
} \
/* Set root. */ \
rbtree->rbt_root = path->node; \
- assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \
+ assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \
} \
a_attr a_type * \
a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \
a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
- if (node == &rbtree->rbt_nil) { \
- return (&rbtree->rbt_nil); \
+ if (node == NULL) { \
+ return NULL; \
} else { \
a_type *ret; \
if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \
- a_field, node), cb, arg)) != &rbtree->rbt_nil \
- || (ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node, \
+ arg)) != NULL) { \
+ return ret; \
} \
- return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
- a_field, node), cb, arg)); \
+ return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg); \
} \
} \
a_attr a_type * \
@@ -874,22 +895,22 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \
if (cmp < 0) { \
a_type *ret; \
if ((ret = a_prefix##iter_start(rbtree, start, \
- rbtn_left_get(a_type, a_field, node), cb, arg)) != \
- &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL || \
+ (ret = cb(rbtree, node, arg)) != NULL) { \
+ return ret; \
} \
- return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
- a_field, node), cb, arg)); \
+ return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg); \
} else if (cmp > 0) { \
- return (a_prefix##iter_start(rbtree, start, \
- rbtn_right_get(a_type, a_field, node), cb, arg)); \
+ return a_prefix##iter_start(rbtree, start, \
+ rbtn_right_get(a_type, a_field, node), cb, arg); \
} else { \
a_type *ret; \
if ((ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ return ret; \
} \
- return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
- a_field, node), cb, arg)); \
+ return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg); \
} \
} \
a_attr a_type * \
@@ -902,25 +923,22 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
} else { \
ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = NULL; \
- } \
- return (ret); \
+ return ret; \
} \
a_attr a_type * \
a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \
a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
- if (node == &rbtree->rbt_nil) { \
- return (&rbtree->rbt_nil); \
+ if (node == NULL) { \
+ return NULL; \
} else { \
a_type *ret; \
if ((ret = a_prefix##reverse_iter_recurse(rbtree, \
- rbtn_right_get(a_type, a_field, node), cb, arg)) != \
- &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \
+ (ret = cb(rbtree, node, arg)) != NULL) { \
+ return ret; \
} \
- return (a_prefix##reverse_iter_recurse(rbtree, \
- rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ return a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg); \
} \
} \
a_attr a_type * \
@@ -931,22 +949,22 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \
if (cmp > 0) { \
a_type *ret; \
if ((ret = a_prefix##reverse_iter_start(rbtree, start, \
- rbtn_right_get(a_type, a_field, node), cb, arg)) != \
- &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL || \
+ (ret = cb(rbtree, node, arg)) != NULL) { \
+ return ret; \
} \
- return (a_prefix##reverse_iter_recurse(rbtree, \
- rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ return a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg); \
} else if (cmp < 0) { \
- return (a_prefix##reverse_iter_start(rbtree, start, \
- rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ return a_prefix##reverse_iter_start(rbtree, start, \
+ rbtn_left_get(a_type, a_field, node), cb, arg); \
} else { \
a_type *ret; \
if ((ret = cb(rbtree, node, arg)) != NULL) { \
- return (ret); \
+ return ret; \
} \
- return (a_prefix##reverse_iter_recurse(rbtree, \
- rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ return a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg); \
} \
} \
a_attr a_type * \
@@ -960,10 +978,29 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \
cb, arg); \
} \
- if (ret == &rbtree->rbt_nil) { \
- ret = NULL; \
+ return ret; \
+} \
+a_attr void \
+a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)( \
+ a_type *, void *), void *arg) { \
+ if (node == NULL) { \
+ return; \
} \
- return (ret); \
+ a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field, \
+ node), cb, arg); \
+ rbtn_left_set(a_type, a_field, (node), NULL); \
+ a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field, \
+ node), cb, arg); \
+ rbtn_right_set(a_type, a_field, (node), NULL); \
+ if (cb) { \
+ cb(node, arg); \
+ } \
+} \
+a_attr void \
+a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \
+ void *arg) { \
+ a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \
+ rbtree->rbt_root = NULL; \
}
#endif /* RB_H_ */
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree.h b/deps/jemalloc/include/jemalloc/internal/rtree.h
index bc74769f5..b59d33a80 100644
--- a/deps/jemalloc/include/jemalloc/internal/rtree.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree.h
@@ -1,172 +1,492 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_H
+#define JEMALLOC_INTERNAL_RTREE_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/tsd.h"
+
/*
* This radix tree implementation is tailored to the singular purpose of
- * tracking which chunks are currently owned by jemalloc. This functionality
- * is mandatory for OS X, where jemalloc must be able to respond to object
- * ownership queries.
+ * associating metadata with extents that are currently owned by jemalloc.
*
*******************************************************************************
*/
-#ifdef JEMALLOC_H_TYPES
+
+/* Number of high insignificant bits. */
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+/* Number of low insigificant bits. */
+#define RTREE_NLIB LG_PAGE
+/* Number of significant bits. */
+#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
+/* Number of levels in radix tree. */
+#if RTREE_NSB <= 10
+# define RTREE_HEIGHT 1
+#elif RTREE_NSB <= 36
+# define RTREE_HEIGHT 2
+#elif RTREE_NSB <= 52
+# define RTREE_HEIGHT 3
+#else
+# error Unsupported number of significant virtual address bits
+#endif
+/* Use compact leaf representation if virtual address encoding allows. */
+#if RTREE_NHIB >= LG_CEIL_NSIZES
+# define RTREE_LEAF_COMPACT
+#endif
+
+/* Needed for initialization only. */
+#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
+
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+struct rtree_node_elm_s {
+ atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
+};
+
+struct rtree_leaf_elm_s {
+#ifdef RTREE_LEAF_COMPACT
+ /*
+ * Single pointer-width field containing all three leaf element fields.
+ * For example, on a 64-bit x64 system with 48 significant virtual
+ * memory address bits, the index, extent, and slab fields are packed as
+ * such:
+ *
+ * x: index
+ * e: extent
+ * b: slab
+ *
+ * 00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
+ */
+ atomic_p_t le_bits;
+#else
+ atomic_p_t le_extent; /* (extent_t *) */
+ atomic_u_t le_szind; /* (szind_t) */
+ atomic_b_t le_slab; /* (bool) */
+#endif
+};
+
+typedef struct rtree_level_s rtree_level_t;
+struct rtree_level_s {
+ /* Number of key bits distinguished by this level. */
+ unsigned bits;
+ /*
+ * Cumulative number of key bits distinguished by traversing to
+ * corresponding tree level.
+ */
+ unsigned cumbits;
+};
typedef struct rtree_s rtree_t;
+struct rtree_s {
+ malloc_mutex_t init_lock;
+ /* Number of elements based on rtree_levels[0].bits. */
+#if RTREE_HEIGHT > 1
+ rtree_node_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#else
+ rtree_leaf_elm_t root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#endif
+};
/*
- * Size of each radix tree node (must be a power of 2). This impacts tree
- * depth.
+ * Split the bits into one to three partitions depending on number of
+ * significant bits. It the number of bits does not divide evenly into the
+ * number of levels, place one remainder bit per level starting at the leaf
+ * level.
*/
-#define RTREE_NODESIZE (1U << 16)
+static const rtree_level_t rtree_levels[] = {
+#if RTREE_HEIGHT == 1
+ {RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_HEIGHT == 2
+ {RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
+ {RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_HEIGHT == 3
+ {RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
+ {RTREE_NSB/3 + RTREE_NSB%3/2,
+ RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
+ {RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+#else
+# error Unsupported rtree height
+#endif
+};
-typedef void *(rtree_alloc_t)(size_t);
-typedef void (rtree_dalloc_t)(void *);
+bool rtree_new(rtree_t *rtree, bool zeroed);
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
-struct rtree_s {
- rtree_alloc_t *alloc;
- rtree_dalloc_t *dalloc;
- malloc_mutex_t mutex;
- void **root;
- unsigned height;
- unsigned level2bits[1]; /* Dynamically sized. */
-};
+typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
+
+typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
+extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
+
+typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
+extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
+#ifdef JEMALLOC_JET
+void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
+#endif
+rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
+
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leafkey(uintptr_t key) {
+ unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+ unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
+ rtree_levels[RTREE_HEIGHT-1].bits);
+ unsigned maskbits = ptrbits - cumbits;
+ uintptr_t mask = ~((ZU(1) << maskbits) - 1);
+ return (key & mask);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+rtree_cache_direct_map(uintptr_t key) {
+ unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+ unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
+ rtree_levels[RTREE_HEIGHT-1].bits);
+ unsigned maskbits = ptrbits - cumbits;
+ return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
+}
+
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_subkey(uintptr_t key, unsigned level) {
+ unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+ unsigned cumbits = rtree_levels[level].cumbits;
+ unsigned shiftbits = ptrbits - cumbits;
+ unsigned maskbits = rtree_levels[level].bits;
+ uintptr_t mask = (ZU(1) << maskbits) - 1;
+ return ((key >> shiftbits) & mask);
+}
+
+/*
+ * Atomic getters.
+ *
+ * dependent: Reading a value on behalf of a pointer to a valid allocation
+ * is guaranteed to be a clean read even without synchronization,
+ * because the rtree update became visible in memory before the
+ * pointer came into existence.
+ * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be
+ * dependent on a previous rtree write, which means a stale read
+ * could result if synchronization were omitted here.
+ */
+# ifdef RTREE_LEAF_COMPACT
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+ bool dependent) {
+ return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
+ ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+# ifdef __aarch64__
+ /*
+ * aarch64 doesn't sign extend the highest virtual address bit to set
+ * the higher ones. Instead, the high bits gets zeroed.
+ */
+ uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
+ /* Mask off the slab bit. */
+ uintptr_t low_bit_mask = ~(uintptr_t)1;
+ uintptr_t mask = high_bit_mask & low_bit_mask;
+ return (extent_t *)(bits & mask);
+# else
+ /* Restore sign-extended high bits, mask slab bit. */
+ return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
+ RTREE_NHIB) & ~((uintptr_t)0x1));
+# endif
+}
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
+ return (szind_t)(bits >> LG_VADDR);
+}
-rtree_t *rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc);
-void rtree_delete(rtree_t *rtree);
-void rtree_prefork(rtree_t *rtree);
-void rtree_postfork_parent(rtree_t *rtree);
-void rtree_postfork_child(rtree_t *rtree);
+JEMALLOC_ALWAYS_INLINE bool
+rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
+ return (bool)(bits & (uintptr_t)0x1);
+}
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+# endif
-#ifndef JEMALLOC_ENABLE_INLINE
-#ifdef JEMALLOC_DEBUG
-uint8_t rtree_get_locked(rtree_t *rtree, uintptr_t key);
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, bool dependent) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+ return rtree_leaf_elm_bits_extent_get(bits);
+#else
+ extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
+ ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+ return extent;
#endif
-uint8_t rtree_get(rtree_t *rtree, uintptr_t key);
-bool rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val);
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, bool dependent) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+ return rtree_leaf_elm_bits_szind_get(bits);
+#else
+ return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
+ : ATOMIC_ACQUIRE);
#endif
+}
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-#define RTREE_GET_GENERATE(f) \
-/* The least significant bits of the key are ignored. */ \
-JEMALLOC_INLINE uint8_t \
-f(rtree_t *rtree, uintptr_t key) \
-{ \
- uint8_t ret; \
- uintptr_t subkey; \
- unsigned i, lshift, height, bits; \
- void **node, **child; \
- \
- RTREE_LOCK(&rtree->mutex); \
- for (i = lshift = 0, height = rtree->height, node = rtree->root;\
- i < height - 1; \
- i++, lshift += bits, node = child) { \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
- 3)) - bits); \
- child = (void**)node[subkey]; \
- if (child == NULL) { \
- RTREE_UNLOCK(&rtree->mutex); \
- return (0); \
- } \
- } \
- \
- /* \
- * node is a leaf, so it contains values rather than node \
- * pointers. \
- */ \
- bits = rtree->level2bits[i]; \
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
- bits); \
- { \
- uint8_t *leaf = (uint8_t *)node; \
- ret = leaf[subkey]; \
- } \
- RTREE_UNLOCK(&rtree->mutex); \
- \
- RTREE_GET_VALIDATE \
- return (ret); \
-}
-
-#ifdef JEMALLOC_DEBUG
-# define RTREE_LOCK(l) malloc_mutex_lock(l)
-# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
-# define RTREE_GET_VALIDATE
-RTREE_GET_GENERATE(rtree_get_locked)
-# undef RTREE_LOCK
-# undef RTREE_UNLOCK
-# undef RTREE_GET_VALIDATE
+JEMALLOC_ALWAYS_INLINE bool
+rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, bool dependent) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+ return rtree_leaf_elm_bits_slab_get(bits);
+#else
+ return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
+ ATOMIC_ACQUIRE);
#endif
+}
-#define RTREE_LOCK(l)
-#define RTREE_UNLOCK(l)
-#ifdef JEMALLOC_DEBUG
- /*
- * Suppose that it were possible for a jemalloc-allocated chunk to be
- * munmap()ped, followed by a different allocator in another thread re-using
- * overlapping virtual memory, all without invalidating the cached rtree
- * value. The result would be a false positive (the rtree would claim that
- * jemalloc owns memory that it had actually discarded). This scenario
- * seems impossible, but the following assertion is a prudent sanity check.
- */
-# define RTREE_GET_VALIDATE \
- assert(rtree_get_locked(rtree, key) == ret);
+static inline void
+rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, extent_t *extent) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
+ uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
+ LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
+ | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+ atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
#else
-# define RTREE_GET_VALIDATE
+ atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
#endif
-RTREE_GET_GENERATE(rtree_get)
-#undef RTREE_LOCK
-#undef RTREE_UNLOCK
-#undef RTREE_GET_VALIDATE
-
-JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, uint8_t val)
-{
- uintptr_t subkey;
- unsigned i, lshift, height, bits;
- void **node, **child;
-
- malloc_mutex_lock(&rtree->mutex);
- for (i = lshift = 0, height = rtree->height, node = rtree->root;
- i < height - 1;
- i++, lshift += bits, node = child) {
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
- bits);
- child = (void**)node[subkey];
- if (child == NULL) {
- size_t size = ((i + 1 < height - 1) ? sizeof(void *)
- : (sizeof(uint8_t))) << rtree->level2bits[i+1];
- child = (void**)rtree->alloc(size);
- if (child == NULL) {
- malloc_mutex_unlock(&rtree->mutex);
- return (true);
- }
- memset(child, 0, size);
- node[subkey] = child;
- }
+}
+
+static inline void
+rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, szind_t szind) {
+ assert(szind <= NSIZES);
+
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
+ true);
+ uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
+ ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+ (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+ ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+ atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
+ atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
+#endif
+}
+
+static inline void
+rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+ rtree_leaf_elm_t *elm, bool slab) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
+ true);
+ uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
+ LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+ (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
+ atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
+ atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
+#endif
+}
+
+static inline void
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+ extent_t *extent, szind_t szind, bool slab) {
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
+ ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+ ((uintptr_t)slab);
+ atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
+ rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
+ rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+ /*
+ * Write extent last, since the element is atomically considered valid
+ * as soon as the extent field is non-NULL.
+ */
+ rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
+#endif
+}
+
+static inline void
+rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
+ assert(!slab || szind < NBINS);
+
+ /*
+ * The caller implicitly assures that it is the only writer to the szind
+ * and slab fields, and that the extent field cannot currently change.
+ */
+ rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
+ rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent, bool init_missing) {
+ assert(key != 0);
+ assert(!dependent || !init_missing);
+
+ size_t slot = rtree_cache_direct_map(key);
+ uintptr_t leafkey = rtree_leafkey(key);
+ assert(leafkey != RTREE_LEAFKEY_INVALID);
+
+ /* Fast path: L1 direct mapped cache. */
+ if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+ rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+ assert(leaf != NULL);
+ uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+ return &leaf[subkey];
}
+ /*
+ * Search the L2 LRU cache. On hit, swap the matching element into the
+ * slot in L1 cache, and move the position in L2 up by 1.
+ */
+#define RTREE_CACHE_CHECK_L2(i) do { \
+ if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) { \
+ rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf; \
+ assert(leaf != NULL); \
+ if (i > 0) { \
+ /* Bubble up by one. */ \
+ rtree_ctx->l2_cache[i].leafkey = \
+ rtree_ctx->l2_cache[i - 1].leafkey; \
+ rtree_ctx->l2_cache[i].leaf = \
+ rtree_ctx->l2_cache[i - 1].leaf; \
+ rtree_ctx->l2_cache[i - 1].leafkey = \
+ rtree_ctx->cache[slot].leafkey; \
+ rtree_ctx->l2_cache[i - 1].leaf = \
+ rtree_ctx->cache[slot].leaf; \
+ } else { \
+ rtree_ctx->l2_cache[0].leafkey = \
+ rtree_ctx->cache[slot].leafkey; \
+ rtree_ctx->l2_cache[0].leaf = \
+ rtree_ctx->cache[slot].leaf; \
+ } \
+ rtree_ctx->cache[slot].leafkey = leafkey; \
+ rtree_ctx->cache[slot].leaf = leaf; \
+ uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1); \
+ return &leaf[subkey]; \
+ } \
+} while (0)
+ /* Check the first cache entry. */
+ RTREE_CACHE_CHECK_L2(0);
+ /* Search the remaining cache elements. */
+ for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) {
+ RTREE_CACHE_CHECK_L2(i);
+ }
+#undef RTREE_CACHE_CHECK_L2
+
+ return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
+ dependent, init_missing);
+}
+
+static inline bool
+rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
+ extent_t *extent, szind_t szind, bool slab) {
+ /* Use rtree_clear() to set the extent to NULL. */
+ assert(extent != NULL);
- /* node is a leaf, so it contains values rather than node pointers. */
- bits = rtree->level2bits[i];
- subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
- {
- uint8_t *leaf = (uint8_t *)node;
- leaf[subkey] = val;
+ rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+ key, false, true);
+ if (elm == NULL) {
+ return true;
}
- malloc_mutex_unlock(&rtree->mutex);
- return (false);
+ assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
+ rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
+
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
+ bool dependent) {
+ rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+ key, dependent, false);
+ if (!dependent && elm == NULL) {
+ return NULL;
+ }
+ assert(elm != NULL);
+ return elm;
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent) {
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+ dependent);
+ if (!dependent && elm == NULL) {
+ return NULL;
+ }
+ return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent) {
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+ dependent);
+ if (!dependent && elm == NULL) {
+ return NSIZES;
+ }
+ return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+}
+
+/*
+ * rtree_slab_read() is intentionally omitted because slab is always read in
+ * conjunction with szind, which makes rtree_szind_slab_read() a better choice.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) {
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+ dependent);
+ if (!dependent && elm == NULL) {
+ return true;
+ }
+ *r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+ *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+ dependent);
+ if (!dependent && elm == NULL) {
+ return true;
+ }
+#ifdef RTREE_LEAF_COMPACT
+ uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+ *r_szind = rtree_leaf_elm_bits_szind_get(bits);
+ *r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
+ *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+ *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
#endif
+ return false;
+}
+
+static inline void
+rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, szind_t szind, bool slab) {
+ assert(!slab || szind < NBINS);
+
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+ rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
+}
+
+static inline void
+rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key) {
+ rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+ assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
+ NULL);
+ rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
+}
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
new file mode 100644
index 000000000..93a75173a
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
+#define JEMALLOC_INTERNAL_RTREE_CTX_H
+
+/*
+ * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively. Each
+ * entry supports an entire leaf, so the cache hit rate is typically high even
+ * with a small number of entries. In rare cases extent activity will straddle
+ * the boundary between two leaf nodes. Furthermore, an arena may use a
+ * combination of dss and mmap. Note that as memory usage grows past the amount
+ * that this cache can directly cover, the cache will become less effective if
+ * locality of reference is low, but the consequence is merely cache misses
+ * while traversing the tree nodes.
+ *
+ * The L1 direct mapped cache offers consistent and low cost on cache hit.
+ * However collision could affect hit rate negatively. This is resolved by
+ * combining with a L2 LRU cache, which requires linear search and re-ordering
+ * on access but suffers no collision. Note that, the cache will itself suffer
+ * cache misses if made overly large, plus the cost of linear search in the LRU
+ * cache.
+ */
+#define RTREE_CTX_LG_NCACHE 4
+#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
+#define RTREE_CTX_NCACHE_L2 8
+
+/*
+ * Zero initializer required for tsd initialization only. Proper initialization
+ * done via rtree_ctx_data_init().
+ */
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
+
+
+typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
+
+typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
+struct rtree_ctx_cache_elm_s {
+ uintptr_t leafkey;
+ rtree_leaf_elm_t *leaf;
+};
+
+typedef struct rtree_ctx_s rtree_ctx_t;
+struct rtree_ctx_s {
+ /* Direct mapped cache. */
+ rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
+ /* L2 LRU cache. */
+ rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2];
+};
+
+void rtree_ctx_data_init(rtree_ctx_t *ctx);
+
+#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
index 29c80c1fb..998994d09 100755
--- a/deps/jemalloc/include/jemalloc/internal/size_classes.sh
+++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
@@ -1,17 +1,26 @@
#!/bin/sh
+#
+# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g>
# The following limits are chosen such that they cover all supported platforms.
-# Range of quanta.
-lg_qmin=3
-lg_qmax=4
+# Pointer sizes.
+lg_zarr="2 3"
+
+# Quanta.
+lg_qarr=$1
# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
-lg_tmin=3
+lg_tmin=$2
+
+# Maximum lookup size.
+lg_kmax=12
-# Range of page sizes.
-lg_pmin=12
-lg_pmax=16
+# Page sizes.
+lg_parr=`echo $3 | tr ',' ' '`
+
+# Size class group size (number of size classes for each size doubling).
+lg_g=$4
pow2() {
e=$1
@@ -22,68 +31,317 @@ pow2() {
done
}
+lg() {
+ x=$1
+ lg_result=0
+ while [ ${x} -gt 1 ] ; do
+ lg_result=$((${lg_result} + 1))
+ x=$((${x} / 2))
+ done
+}
+
+lg_ceil() {
+ y=$1
+ lg ${y}; lg_floor=${lg_result}
+ pow2 ${lg_floor}; pow2_floor=${pow2_result}
+ if [ ${pow2_floor} -lt ${y} ] ; then
+ lg_ceil_result=$((${lg_floor} + 1))
+ else
+ lg_ceil_result=${lg_floor}
+ fi
+}
+
+reg_size_compute() {
+ lg_grp=$1
+ lg_delta=$2
+ ndelta=$3
+
+ pow2 ${lg_grp}; grp=${pow2_result}
+ pow2 ${lg_delta}; delta=${pow2_result}
+ reg_size=$((${grp} + ${delta}*${ndelta}))
+}
+
+slab_size() {
+ lg_p=$1
+ lg_grp=$2
+ lg_delta=$3
+ ndelta=$4
+
+ pow2 ${lg_p}; p=${pow2_result}
+ reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
+
+ # Compute smallest slab size that is an integer multiple of reg_size.
+ try_slab_size=${p}
+ try_nregs=$((${try_slab_size} / ${reg_size}))
+ perfect=0
+ while [ ${perfect} -eq 0 ] ; do
+ perfect_slab_size=${try_slab_size}
+ perfect_nregs=${try_nregs}
+
+ try_slab_size=$((${try_slab_size} + ${p}))
+ try_nregs=$((${try_slab_size} / ${reg_size}))
+ if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
+ perfect=1
+ fi
+ done
+
+ slab_size_pgs=$((${perfect_slab_size} / ${p}))
+}
+
+size_class() {
+ index=$1
+ lg_grp=$2
+ lg_delta=$3
+ ndelta=$4
+ lg_p=$5
+ lg_kmax=$6
+
+ if [ ${lg_delta} -ge ${lg_p} ] ; then
+ psz="yes"
+ else
+ pow2 ${lg_p}; p=${pow2_result}
+ pow2 ${lg_grp}; grp=${pow2_result}
+ pow2 ${lg_delta}; delta=${pow2_result}
+ sz=$((${grp} + ${delta} * ${ndelta}))
+ npgs=$((${sz} / ${p}))
+ if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+ psz="yes"
+ else
+ psz="no"
+ fi
+ fi
+
+ lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
+ if [ ${pow2_result} -lt ${ndelta} ] ; then
+ rem="yes"
+ else
+ rem="no"
+ fi
+
+ lg_size=${lg_grp}
+ if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
+ lg_size=$((${lg_grp} + 1))
+ else
+ lg_size=${lg_grp}
+ rem="yes"
+ fi
+
+ if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
+ bin="yes"
+ slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs}
+ else
+ bin="no"
+ pgs=0
+ fi
+ if [ ${lg_size} -lt ${lg_kmax} \
+ -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
+ lg_delta_lookup=${lg_delta}
+ else
+ lg_delta_lookup="no"
+ fi
+ printf ' SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup}
+ # Defined upon return:
+ # - psz ("yes" or "no")
+ # - bin ("yes" or "no")
+ # - pgs
+ # - lg_delta_lookup (${lg_delta} or "no")
+}
+
+sep_line() {
+ echo " \\"
+}
+
+size_classes() {
+ lg_z=$1
+ lg_q=$2
+ lg_t=$3
+ lg_p=$4
+ lg_g=$5
+
+ pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
+ pow2 ${lg_g}; g=${pow2_result}
+
+ echo "#define SIZE_CLASSES \\"
+ echo " /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
+
+ ntbins=0
+ nlbins=0
+ lg_tiny_maxclass='"NA"'
+ nbins=0
+ npsizes=0
+
+ # Tiny size classes.
+ ndelta=0
+ index=0
+ lg_grp=${lg_t}
+ lg_delta=${lg_grp}
+ while [ ${lg_grp} -lt ${lg_q} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ fi
+ if [ ${psz} = "yes" ] ; then
+ npsizes=$((${npsizes} + 1))
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ fi
+ ntbins=$((${ntbins} + 1))
+ lg_tiny_maxclass=${lg_grp} # Final written value is correct.
+ index=$((${index} + 1))
+ lg_delta=${lg_grp}
+ lg_grp=$((${lg_grp} + 1))
+ done
+
+ # First non-tiny group.
+ if [ ${ntbins} -gt 0 ] ; then
+ sep_line
+ # The first size class has an unusual encoding, because the size has to be
+ # split between grp and delta*ndelta.
+ lg_grp=$((${lg_grp} - 1))
+ ndelta=1
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ if [ ${psz} = "yes" ] ; then
+ npsizes=$((${npsizes} + 1))
+ fi
+ fi
+ while [ ${ndelta} -lt ${g} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ if [ ${psz} = "yes" ] ; then
+ npsizes=$((${npsizes} + 1))
+ fi
+ done
+
+ # All remaining groups.
+ lg_grp=$((${lg_grp} + ${lg_g}))
+ while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
+ sep_line
+ ndelta=1
+ if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
+ ndelta_limit=$((${g} - 1))
+ else
+ ndelta_limit=${g}
+ fi
+ while [ ${ndelta} -le ${ndelta_limit} ] ; do
+ size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
+ if [ ${lg_delta_lookup} != "no" ] ; then
+ nlbins=$((${index} + 1))
+ # Final written value is correct:
+ lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ fi
+ if [ ${psz} = "yes" ] ; then
+ npsizes=$((${npsizes} + 1))
+ fi
+ if [ ${bin} != "no" ] ; then
+ nbins=$((${index} + 1))
+ # Final written value is correct:
+ small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ if [ ${lg_g} -gt 0 ] ; then
+ lg_large_minclass=$((${lg_grp} + 1))
+ else
+ lg_large_minclass=$((${lg_grp} + 2))
+ fi
+ fi
+ # Final written value is correct:
+ large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+ index=$((${index} + 1))
+ ndelta=$((${ndelta} + 1))
+ done
+ lg_grp=$((${lg_grp} + 1))
+ lg_delta=$((${lg_delta} + 1))
+ done
+ echo
+ nsizes=${index}
+ lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result}
+
+ # Defined upon completion:
+ # - ntbins
+ # - nlbins
+ # - nbins
+ # - nsizes
+ # - lg_ceil_nsizes
+ # - npsizes
+ # - lg_tiny_maxclass
+ # - lookup_maxclass
+ # - small_maxclass
+ # - lg_large_minclass
+ # - large_maxclass
+}
+
cat <<EOF
+#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H
+#define JEMALLOC_INTERNAL_SIZE_CLASSES_H
+
/* This file was automatically generated by size_classes.sh. */
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
+/*
+ * This header file defines:
+ *
+ * LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
+ * LG_TINY_MIN: Lg of minimum size class to support.
+ * SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ * bin, pgs, lg_delta_lookup) tuples.
+ * index: Size class index.
+ * lg_grp: Lg group base size (no deltas added).
+ * lg_delta: Lg delta to previous size class.
+ * ndelta: Delta multiplier. size == 1<<lg_grp + ndelta<<lg_delta
+ * psz: 'yes' if a multiple of the page size, 'no' otherwise.
+ * bin: 'yes' if a small bin size class, 'no' otherwise.
+ * pgs: Slab page count if a small bin size class, 0 otherwise.
+ * lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
+ * otherwise.
+ * NTBINS: Number of tiny bins.
+ * NLBINS: Number of bins supported by the lookup table.
+ * NBINS: Number of small size class bins.
+ * NSIZES: Number of size classes.
+ * LG_CEIL_NSIZES: Number of bits required to store NSIZES.
+ * NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
+ * LG_TINY_MAXCLASS: Lg of maximum tiny size class.
+ * LOOKUP_MAXCLASS: Maximum size class included in lookup table.
+ * SMALL_MAXCLASS: Maximum small size class.
+ * LG_LARGE_MINCLASS: Lg of minimum large size class.
+ * LARGE_MAXCLASS: Maximum (large) size class.
+ */
+
+#define LG_SIZE_CLASS_GROUP ${lg_g}
+#define LG_TINY_MIN ${lg_tmin}
EOF
-lg_q=${lg_qmin}
-while [ ${lg_q} -le ${lg_qmax} ] ; do
- lg_t=${lg_tmin}
- while [ ${lg_t} -le ${lg_q} ] ; do
- lg_p=${lg_pmin}
- while [ ${lg_p} -le ${lg_pmax} ] ; do
- echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
- echo "#define SIZE_CLASSES_DEFINED"
- pow2 ${lg_q}; q=${pow2_result}
- pow2 ${lg_t}; t=${pow2_result}
- pow2 ${lg_p}; p=${pow2_result}
- bin=0
- psz=0
- sz=${t}
- delta=$((${sz} - ${psz}))
- echo "/* SIZE_CLASS(bin, delta, sz) */"
- echo "#define SIZE_CLASSES \\"
-
- # Tiny size classes.
- while [ ${sz} -lt ${q} ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${sz}))
- delta=$((${sz} - ${psz}))
- done
- # Quantum-multiple size classes. For each doubling of sz, as many as 4
- # size classes exist. Their spacing is the greater of:
- # - q
- # - sz/4, where sz is a power of 2
- while [ ${sz} -lt ${p} ] ; do
- if [ ${sz} -ge $((${q} * 4)) ] ; then
- i=$((${sz} / 4))
- else
- i=${q}
- fi
- next_2pow=$((${sz} * 2))
- while [ ${sz} -lt $next_2pow ] ; do
- echo " SIZE_CLASS(${bin}, ${delta}, ${sz}) \\"
- bin=$((${bin} + 1))
- psz=${sz}
- sz=$((${sz} + ${i}))
- delta=$((${sz} - ${psz}))
- done
+for lg_z in ${lg_zarr} ; do
+ for lg_q in ${lg_qarr} ; do
+ lg_t=${lg_tmin}
+ while [ ${lg_t} -le ${lg_q} ] ; do
+ # Iterate through page sizes and compute how many bins there are.
+ for lg_p in ${lg_parr} ; do
+ echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+ size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
+ echo "#define SIZE_CLASSES_DEFINED"
+ echo "#define NTBINS ${ntbins}"
+ echo "#define NLBINS ${nlbins}"
+ echo "#define NBINS ${nbins}"
+ echo "#define NSIZES ${nsizes}"
+ echo "#define LG_CEIL_NSIZES ${lg_ceil_nsizes}"
+ echo "#define NPSIZES ${npsizes}"
+ echo "#define LG_TINY_MAXCLASS ${lg_tiny_maxclass}"
+ echo "#define LOOKUP_MAXCLASS ${lookup_maxclass}"
+ echo "#define SMALL_MAXCLASS ${small_maxclass}"
+ echo "#define LG_LARGE_MINCLASS ${lg_large_minclass}"
+ echo "#define LARGE_MINCLASS (ZU(1) << LG_LARGE_MINCLASS)"
+ echo "#define LARGE_MAXCLASS ${large_maxclass}"
+ echo "#endif"
+ echo
done
- echo
- echo "#define NBINS ${bin}"
- echo "#define SMALL_MAXCLASS ${psz}"
- echo "#endif"
- echo
- lg_p=$((${lg_p} + 1))
+ lg_t=$((${lg_t} + 1))
done
- lg_t=$((${lg_t} + 1))
done
- lg_q=$((${lg_q} + 1))
done
cat <<EOF
@@ -92,31 +350,12 @@ cat <<EOF
#endif
#undef SIZE_CLASSES_DEFINED
/*
- * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
- * cannot support more than 256 small size classes. Further constrain NBINS to
- * 255 to support prof_promote, since all small size classes, plus a "not
- * small" size class must be stored in 8 bits of arena_chunk_map_t's bits
- * field.
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
+ * cannot support more than 256 small size classes.
*/
-#if (NBINS > 255)
+#if (NBINS > 256)
# error "Too many small size classes"
#endif
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */
EOF
diff --git a/deps/jemalloc/include/jemalloc/internal/smoothstep.h b/deps/jemalloc/include/jemalloc/internal/smoothstep.h
new file mode 100644
index 000000000..2e14430f5
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/smoothstep.h
@@ -0,0 +1,232 @@
+#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H
+#define JEMALLOC_INTERNAL_SMOOTHSTEP_H
+
+/*
+ * This file was generated by the following command:
+ * sh smoothstep.sh smoother 200 24 3 15
+ */
+/******************************************************************************/
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ * 3 2
+ * smoothstep(x) = -2x + 3x
+ *
+ * 5 4 3
+ * smootherstep(x) = 6x - 15x + 10x
+ *
+ * 7 6 5 4
+ * smootheststep(x) = -20x + 70x - 84x + 35x
+ */
+
+#define SMOOTHSTEP_VARIANT "smoother"
+#define SMOOTHSTEP_NSTEPS 200
+#define SMOOTHSTEP_BFP 24
+#define SMOOTHSTEP \
+ /* STEP(step, h, x, y) */ \
+ STEP( 1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
+ STEP( 2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
+ STEP( 3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
+ STEP( 4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
+ STEP( 5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
+ STEP( 6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
+ STEP( 7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
+ STEP( 8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
+ STEP( 9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
+ STEP( 10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
+ STEP( 11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
+ STEP( 12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
+ STEP( 13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
+ STEP( 14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
+ STEP( 15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
+ STEP( 16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
+ STEP( 17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
+ STEP( 18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
+ STEP( 19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
+ STEP( 20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
+ STEP( 21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
+ STEP( 22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
+ STEP( 23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
+ STEP( 24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
+ STEP( 25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
+ STEP( 26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
+ STEP( 27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
+ STEP( 28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
+ STEP( 29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
+ STEP( 30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
+ STEP( 31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
+ STEP( 32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
+ STEP( 33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
+ STEP( 34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
+ STEP( 35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
+ STEP( 36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
+ STEP( 37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
+ STEP( 38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
+ STEP( 39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
+ STEP( 40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
+ STEP( 41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
+ STEP( 42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
+ STEP( 43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
+ STEP( 44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
+ STEP( 45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
+ STEP( 46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
+ STEP( 47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
+ STEP( 48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
+ STEP( 49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
+ STEP( 50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
+ STEP( 51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
+ STEP( 52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
+ STEP( 53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
+ STEP( 54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
+ STEP( 55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
+ STEP( 56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
+ STEP( 57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
+ STEP( 58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
+ STEP( 59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
+ STEP( 60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
+ STEP( 61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
+ STEP( 62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
+ STEP( 63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
+ STEP( 64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
+ STEP( 65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
+ STEP( 66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
+ STEP( 67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
+ STEP( 68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
+ STEP( 69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
+ STEP( 70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
+ STEP( 71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
+ STEP( 72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
+ STEP( 73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
+ STEP( 74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
+ STEP( 75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
+ STEP( 76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
+ STEP( 77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
+ STEP( 78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
+ STEP( 79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
+ STEP( 80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
+ STEP( 81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
+ STEP( 82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
+ STEP( 83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
+ STEP( 84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
+ STEP( 85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
+ STEP( 86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
+ STEP( 87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
+ STEP( 88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
+ STEP( 89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
+ STEP( 90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
+ STEP( 91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
+ STEP( 92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
+ STEP( 93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
+ STEP( 94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
+ STEP( 95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
+ STEP( 96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
+ STEP( 97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
+ STEP( 98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
+ STEP( 99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
+ STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
+ STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
+ STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
+ STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
+ STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
+ STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
+ STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
+ STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
+ STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
+ STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
+ STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
+ STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
+ STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
+ STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
+ STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
+ STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
+ STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
+ STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
+ STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
+ STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
+ STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
+ STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
+ STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
+ STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
+ STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
+ STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
+ STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
+ STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
+ STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
+ STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
+ STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
+ STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
+ STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
+ STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
+ STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
+ STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
+ STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
+ STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
+ STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
+ STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
+ STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
+ STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
+ STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
+ STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
+ STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
+ STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
+ STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
+ STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
+ STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
+ STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
+ STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
+ STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
+ STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
+ STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
+ STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
+ STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
+ STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
+ STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
+ STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
+ STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
+ STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
+ STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
+ STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
+ STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
+ STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
+ STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
+ STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
+ STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
+ STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
+ STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
+ STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
+ STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
+ STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
+ STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
+ STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
+ STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
+ STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
+ STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
+ STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
+ STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
+ STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
+ STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
+ STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
+ STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
+ STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
+ STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
+ STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
+ STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
+ STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
+ STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
+ STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
+ STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
+ STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
+ STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
+ STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
+ STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
+ STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
+ STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
+ STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
+ STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
+ STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+
+#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/smoothstep.sh b/deps/jemalloc/include/jemalloc/internal/smoothstep.sh
new file mode 100755
index 000000000..65de97bf4
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/smoothstep.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+#
+# Generate a discrete lookup table for a sigmoid function in the smoothstep
+# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table
+# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps]. Encode
+# the entries using a binary fixed point representation.
+#
+# Usage: smoothstep.sh <variant> <nsteps> <bfp> <xprec> <yprec>
+#
+# <variant> is in {smooth, smoother, smoothest}.
+# <nsteps> must be greater than zero.
+# <bfp> must be in [0..62]; reasonable values are roughly [10..30].
+# <xprec> is x decimal precision.
+# <yprec> is y decimal precision.
+
+#set -x
+
+cmd="sh smoothstep.sh $*"
+variant=$1
+nsteps=$2
+bfp=$3
+xprec=$4
+yprec=$5
+
+case "${variant}" in
+ smooth)
+ ;;
+ smoother)
+ ;;
+ smoothest)
+ ;;
+ *)
+ echo "Unsupported variant"
+ exit 1
+ ;;
+esac
+
+smooth() {
+ step=$1
+ y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+ h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoother() {
+ step=$1
+ y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+ h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoothest() {
+ step=$1
+ y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+ h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+cat <<EOF
+#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H
+#define JEMALLOC_INTERNAL_SMOOTHSTEP_H
+
+/*
+ * This file was generated by the following command:
+ * $cmd
+ */
+/******************************************************************************/
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1. The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ * 3 2
+ * smoothstep(x) = -2x + 3x
+ *
+ * 5 4 3
+ * smootherstep(x) = 6x - 15x + 10x
+ *
+ * 7 6 5 4
+ * smootheststep(x) = -20x + 70x - 84x + 35x
+ */
+
+#define SMOOTHSTEP_VARIANT "${variant}"
+#define SMOOTHSTEP_NSTEPS ${nsteps}
+#define SMOOTHSTEP_BFP ${bfp}
+#define SMOOTHSTEP \\
+ /* STEP(step, h, x, y) */ \\
+EOF
+
+s=1
+while [ $s -le $nsteps ] ; do
+ $variant ${s}
+ x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+ printf ' STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y}
+
+ s=$((s+1))
+done
+echo
+
+cat <<EOF
+#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
+EOF
diff --git a/deps/jemalloc/include/jemalloc/internal/spin.h b/deps/jemalloc/include/jemalloc/internal/spin.h
new file mode 100644
index 000000000..22804c687
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/spin.h
@@ -0,0 +1,40 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_H
+#define JEMALLOC_INTERNAL_SPIN_H
+
+#define SPIN_INITIALIZER {0U}
+
+typedef struct {
+ unsigned iteration;
+} spin_t;
+
+static inline void
+spin_cpu_spinwait() {
+# if HAVE_CPU_SPINWAIT
+ CPU_SPINWAIT;
+# else
+ volatile int x = 0;
+ x = x;
+# endif
+}
+
+static inline void
+spin_adaptive(spin_t *spin) {
+ volatile uint32_t i;
+
+ if (spin->iteration < 5) {
+ for (i = 0; i < (1U << spin->iteration); i++) {
+ spin_cpu_spinwait();
+ }
+ spin->iteration++;
+ } else {
+#ifdef _WIN32
+ SwitchToThread();
+#else
+ sched_yield();
+#endif
+ }
+}
+
+#undef SPIN_INLINE
+
+#endif /* JEMALLOC_INTERNAL_SPIN_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h
index 27f68e368..852e34269 100644
--- a/deps/jemalloc/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats.h
@@ -1,173 +1,30 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct tcache_bin_stats_s tcache_bin_stats_t;
-typedef struct malloc_bin_stats_s malloc_bin_stats_t;
-typedef struct malloc_large_stats_s malloc_large_stats_t;
-typedef struct arena_stats_s arena_stats_t;
-typedef struct chunk_stats_s chunk_stats_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct tcache_bin_stats_s {
- /*
- * Number of allocation requests that corresponded to the size of this
- * bin.
- */
- uint64_t nrequests;
-};
-
-struct malloc_bin_stats_s {
- /*
- * Current number of bytes allocated, including objects currently
- * cached by tcache.
- */
- size_t allocated;
-
- /*
- * Total number of allocation/deallocation requests served directly by
- * the bin. Note that tcache may allocate an object, then recycle it
- * many times, resulting many increments to nrequests, but only one
- * each to nmalloc and ndalloc.
- */
- uint64_t nmalloc;
- uint64_t ndalloc;
-
- /*
- * Number of allocation requests that correspond to the size of this
- * bin. This includes requests served by tcache, though tcache only
- * periodically merges into this counter.
- */
- uint64_t nrequests;
-
- /* Number of tcache fills from this bin. */
- uint64_t nfills;
-
- /* Number of tcache flushes to this bin. */
- uint64_t nflushes;
-
- /* Total number of runs created for this bin's size class. */
- uint64_t nruns;
-
- /*
- * Total number of runs reused by extracting them from the runs tree for
- * this bin's size class.
- */
- uint64_t reruns;
-
- /* Current number of runs in this bin. */
- size_t curruns;
+#ifndef JEMALLOC_INTERNAL_STATS_H
+#define JEMALLOC_INTERNAL_STATS_H
+
+/* OPTION(opt, var_name, default, set_value_to) */
+#define STATS_PRINT_OPTIONS \
+ OPTION('J', json, false, true) \
+ OPTION('g', general, true, false) \
+ OPTION('m', merged, config_stats, false) \
+ OPTION('d', destroyed, config_stats, false) \
+ OPTION('a', unmerged, config_stats, false) \
+ OPTION('b', bins, true, false) \
+ OPTION('l', large, true, false) \
+ OPTION('x', mutex, true, false)
+
+enum {
+#define OPTION(o, v, d, s) stats_print_option_num_##v,
+ STATS_PRINT_OPTIONS
+#undef OPTION
+ stats_print_tot_num_options
};
-struct malloc_large_stats_s {
- /*
- * Total number of allocation/deallocation requests served directly by
- * the arena. Note that tcache may allocate an object, then recycle it
- * many times, resulting many increments to nrequests, but only one
- * each to nmalloc and ndalloc.
- */
- uint64_t nmalloc;
- uint64_t ndalloc;
-
- /*
- * Number of allocation requests that correspond to this size class.
- * This includes requests served by tcache, though tcache only
- * periodically merges into this counter.
- */
- uint64_t nrequests;
-
- /* Current number of runs of this size class. */
- size_t curruns;
-};
-
-struct arena_stats_s {
- /* Number of bytes currently mapped. */
- size_t mapped;
-
- /*
- * Total number of purge sweeps, total number of madvise calls made,
- * and total pages purged in order to keep dirty unused memory under
- * control.
- */
- uint64_t npurge;
- uint64_t nmadvise;
- uint64_t purged;
-
- /* Per-size-category statistics. */
- size_t allocated_large;
- uint64_t nmalloc_large;
- uint64_t ndalloc_large;
- uint64_t nrequests_large;
+/* Options for stats_print. */
+extern bool opt_stats_print;
+extern char opt_stats_print_opts[stats_print_tot_num_options+1];
- /*
- * One element for each possible size class, including sizes that
- * overlap with bin size classes. This is necessary because ipalloc()
- * sometimes has to use such large objects in order to assure proper
- * alignment.
- */
- malloc_large_stats_t *lstats;
-};
-
-struct chunk_stats_s {
- /* Number of chunks that were allocated. */
- uint64_t nchunks;
-
- /* High-water mark for number of chunks allocated. */
- size_t highchunks;
-
- /*
- * Current number of chunks allocated. This value isn't maintained for
- * any other purpose, so keep track of it in order to be able to set
- * highchunks.
- */
- size_t curchunks;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool opt_stats_print;
-
-extern size_t stats_cactive;
-
-void stats_print(void (*write)(void *, const char *), void *cbopaque,
+/* Implements je_malloc_stats_print. */
+void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
const char *opts);
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-size_t stats_cactive_get(void);
-void stats_cactive_add(size_t size);
-void stats_cactive_sub(size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
-JEMALLOC_INLINE size_t
-stats_cactive_get(void)
-{
-
- return (atomic_read_z(&stats_cactive));
-}
-
-JEMALLOC_INLINE void
-stats_cactive_add(size_t size)
-{
-
- atomic_add_z(&stats_cactive, size);
-}
-
-JEMALLOC_INLINE void
-stats_cactive_sub(size_t size)
-{
-
- atomic_sub_z(&stats_cactive, size);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/sz.h b/deps/jemalloc/include/jemalloc/internal/sz.h
new file mode 100644
index 000000000..979462898
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/sz.h
@@ -0,0 +1,317 @@
+#ifndef JEMALLOC_INTERNAL_SIZE_H
+#define JEMALLOC_INTERNAL_SIZE_H
+
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/util.h"
+
+/*
+ * sz module: Size computations.
+ *
+ * Some abbreviations used here:
+ * p: Page
+ * ind: Index
+ * s, sz: Size
+ * u: Usable size
+ * a: Aligned
+ *
+ * These are not always used completely consistently, but should be enough to
+ * interpret function names. E.g. sz_psz2ind converts page size to page size
+ * index; sz_sa2u converts a (size, alignment) allocation request to the usable
+ * size that would result from such an allocation.
+ */
+
+/*
+ * sz_pind2sz_tab encodes the same information as could be computed by
+ * sz_pind2sz_compute().
+ */
+extern size_t const sz_pind2sz_tab[NPSIZES+1];
+/*
+ * sz_index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by sz_index2size_compute().
+ */
+extern size_t const sz_index2size_tab[NSIZES];
+/*
+ * sz_size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes. In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via sz_size2index().
+ */
+extern uint8_t const sz_size2index_tab[];
+
+static const size_t sz_large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ PAGE
+#else
+ 0
+#endif
+ ;
+
+JEMALLOC_ALWAYS_INLINE pszind_t
+sz_psz2ind(size_t psz) {
+ if (unlikely(psz > LARGE_MAXCLASS)) {
+ return NPSIZES;
+ }
+ {
+ pszind_t x = lg_floor((psz<<1)-1);
+ pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+ (LG_SIZE_CLASS_GROUP + LG_PAGE);
+ pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+ pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+ LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+ size_t delta_inverse_mask = ZU(-1) << lg_delta;
+ pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+ ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+ pszind_t ind = grp + mod;
+ return ind;
+ }
+}
+
+static inline size_t
+sz_pind2sz_compute(pszind_t pind) {
+ if (unlikely(pind == NPSIZES)) {
+ return LARGE_MAXCLASS + PAGE;
+ }
+ {
+ size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+ size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+ size_t grp_size_mask = ~((!!grp)-1);
+ size_t grp_size = ((ZU(1) << (LG_PAGE +
+ (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+ size_t shift = (grp == 0) ? 1 : grp;
+ size_t lg_delta = shift + (LG_PAGE-1);
+ size_t mod_size = (mod+1) << lg_delta;
+
+ size_t sz = grp_size + mod_size;
+ return sz;
+ }
+}
+
+static inline size_t
+sz_pind2sz_lookup(pszind_t pind) {
+ size_t ret = (size_t)sz_pind2sz_tab[pind];
+ assert(ret == sz_pind2sz_compute(pind));
+ return ret;
+}
+
+static inline size_t
+sz_pind2sz(pszind_t pind) {
+ assert(pind < NPSIZES+1);
+ return sz_pind2sz_lookup(pind);
+}
+
+static inline size_t
+sz_psz2u(size_t psz) {
+ if (unlikely(psz > LARGE_MAXCLASS)) {
+ return LARGE_MAXCLASS + PAGE;
+ }
+ {
+ size_t x = lg_floor((psz<<1)-1);
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+ LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+ size_t delta = ZU(1) << lg_delta;
+ size_t delta_mask = delta - 1;
+ size_t usize = (psz + delta_mask) & ~delta_mask;
+ return usize;
+ }
+}
+
+static inline szind_t
+sz_size2index_compute(size_t size) {
+ if (unlikely(size > LARGE_MAXCLASS)) {
+ return NSIZES;
+ }
+#if (NTBINS != 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+ return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+ }
+#endif
+ {
+ szind_t x = lg_floor((size<<1)-1);
+ szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+ x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+ szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+ szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+ size_t delta_inverse_mask = ZU(-1) << lg_delta;
+ szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+ ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+ szind_t index = NTBINS + grp + mod;
+ return index;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_lookup(size_t size) {
+ assert(size <= LOOKUP_MAXCLASS);
+ {
+ szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
+ assert(ret == sz_size2index_compute(size));
+ return ret;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index(size_t size) {
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS)) {
+ return sz_size2index_lookup(size);
+ }
+ return sz_size2index_compute(size);
+}
+
+static inline size_t
+sz_index2size_compute(szind_t index) {
+#if (NTBINS > 0)
+ if (index < NTBINS) {
+ return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+ }
+#endif
+ {
+ size_t reduced_index = index - NTBINS;
+ size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+ size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+ 1);
+
+ size_t grp_size_mask = ~((!!grp)-1);
+ size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+ (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+ size_t shift = (grp == 0) ? 1 : grp;
+ size_t lg_delta = shift + (LG_QUANTUM-1);
+ size_t mod_size = (mod+1) << lg_delta;
+
+ size_t usize = grp_size + mod_size;
+ return usize;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_lookup(szind_t index) {
+ size_t ret = (size_t)sz_index2size_tab[index];
+ assert(ret == sz_index2size_compute(index));
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+ assert(index < NSIZES);
+ return sz_index2size_lookup(index);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute(size_t size) {
+ if (unlikely(size > LARGE_MAXCLASS)) {
+ return 0;
+ }
+#if (NTBINS > 0)
+ if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+ size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+ size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+ return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+ (ZU(1) << lg_ceil));
+ }
+#endif
+ {
+ size_t x = lg_floor((size<<1)-1);
+ size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+ ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+ size_t delta = ZU(1) << lg_delta;
+ size_t delta_mask = delta - 1;
+ size_t usize = (size + delta_mask) & ~delta_mask;
+ return usize;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_lookup(size_t size) {
+ size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
+
+ assert(ret == sz_s2u_compute(size));
+ return ret;
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u(size_t size) {
+ assert(size > 0);
+ if (likely(size <= LOOKUP_MAXCLASS)) {
+ return sz_s2u_lookup(size);
+ }
+ return sz_s2u_compute(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_sa2u(size_t size, size_t alignment) {
+ size_t usize;
+
+ assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+
+ /* Try for a small size class. */
+ if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each
+ * small size class, every object is aligned at the smallest
+ * power of two that is non-zero in the base two representation
+ * of the size. For example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ */
+ usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
+ if (usize < LARGE_MINCLASS) {
+ return usize;
+ }
+ }
+
+ /* Large size class. Beware of overflow. */
+
+ if (unlikely(alignment > LARGE_MAXCLASS)) {
+ return 0;
+ }
+
+ /* Make sure result is a large size class. */
+ if (size <= LARGE_MINCLASS) {
+ usize = LARGE_MINCLASS;
+ } else {
+ usize = sz_s2u(size);
+ if (usize < size) {
+ /* size_t overflow. */
+ return 0;
+ }
+ }
+
+ /*
+ * Calculate the multi-page mapping that large_palloc() would need in
+ * order to guarantee the alignment.
+ */
+ if (usize + sz_large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
+ /* size_t overflow. */
+ return 0;
+ }
+ return usize;
+}
+
+#endif /* JEMALLOC_INTERNAL_SIZE_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h
deleted file mode 100644
index c3d4b58d4..000000000
--- a/deps/jemalloc/include/jemalloc/internal/tcache.h
+++ /dev/null
@@ -1,443 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct tcache_bin_info_s tcache_bin_info_t;
-typedef struct tcache_bin_s tcache_bin_t;
-typedef struct tcache_s tcache_t;
-
-/*
- * tcache pointers close to NULL are used to encode state information that is
- * used for two purposes: preventing thread caching on a per thread basis and
- * cleaning up during thread shutdown.
- */
-#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1)
-#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2)
-#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3)
-#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
-
-/*
- * Absolute maximum number of cache slots for each small bin in the thread
- * cache. This is an additional constraint beyond that imposed as: twice the
- * number of regions per run for this size class.
- *
- * This constant must be an even number.
- */
-#define TCACHE_NSLOTS_SMALL_MAX 200
-
-/* Number of cache slots for large size classes. */
-#define TCACHE_NSLOTS_LARGE 20
-
-/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
-#define LG_TCACHE_MAXCLASS_DEFAULT 15
-
-/*
- * TCACHE_GC_SWEEP is the approximate number of allocation events between
- * full GC sweeps. Integer rounding may cause the actual number to be
- * slightly higher, since GC is performed incrementally.
- */
-#define TCACHE_GC_SWEEP 8192
-
-/* Number of tcache allocation/deallocation events between incremental GCs. */
-#define TCACHE_GC_INCR \
- ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-typedef enum {
- tcache_enabled_false = 0, /* Enable cast to/from bool. */
- tcache_enabled_true = 1,
- tcache_enabled_default = 2
-} tcache_enabled_t;
-
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
- unsigned ncached_max; /* Upper limit on ncached. */
-};
-
-struct tcache_bin_s {
- tcache_bin_stats_t tstats;
- int low_water; /* Min # cached since last GC. */
- unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */
- unsigned ncached; /* # of cached objects. */
- void **avail; /* Stack of available objects. */
-};
-
-struct tcache_s {
- ql_elm(tcache_t) link; /* Used for aggregating stats. */
- uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */
- arena_t *arena; /* This thread's arena. */
- unsigned ev_cnt; /* Event count since incremental GC. */
- unsigned next_gc_bin; /* Next bin to GC. */
- tcache_bin_t tbins[1]; /* Dynamically sized. */
- /*
- * The pointer stacks associated with tbins follow as a contiguous
- * array. During tcache initialization, the avail pointer in each
- * element of tbins is initialized to point to the proper offset within
- * this array.
- */
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool opt_tcache;
-extern ssize_t opt_lg_tcache_max;
-
-extern tcache_bin_info_t *tcache_bin_info;
-
-/*
- * Number of tcache bins. There are NBINS small-object bins, plus 0 or more
- * large-object bins.
- */
-extern size_t nhbins;
-
-/* Maximum cached size class. */
-extern size_t tcache_maxclass;
-
-size_t tcache_salloc(const void *ptr);
-void tcache_event_hard(tcache_t *tcache);
-void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
- size_t binind);
-void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
-void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache);
-void tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void tcache_arena_dissociate(tcache_t *tcache);
-tcache_t *tcache_create(arena_t *arena);
-void tcache_destroy(tcache_t *tcache);
-void tcache_thread_cleanup(void *arg);
-void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
-bool tcache_boot0(void);
-bool tcache_boot1(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
-malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
-
-void tcache_event(tcache_t *tcache);
-void tcache_flush(void);
-bool tcache_enabled_get(void);
-tcache_t *tcache_get(bool create);
-void tcache_enabled_set(bool enabled);
-void *tcache_alloc_easy(tcache_bin_t *tbin);
-void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
-void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
-void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
-/* Map of thread-specific caches. */
-malloc_tsd_externs(tcache, tcache_t *)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache, tcache_t *, NULL,
- tcache_thread_cleanup)
-/* Per thread flag that allows thread caches to be disabled. */
-malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, tcache_enabled, tcache_enabled_t,
- tcache_enabled_default, malloc_tsd_no_cleanup)
-
-JEMALLOC_INLINE void
-tcache_flush(void)
-{
- tcache_t *tcache;
-
- cassert(config_tcache);
-
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
- return;
- tcache_destroy(tcache);
- tcache = NULL;
- tcache_tsd_set(&tcache);
-}
-
-JEMALLOC_INLINE bool
-tcache_enabled_get(void)
-{
- tcache_enabled_t tcache_enabled;
-
- cassert(config_tcache);
-
- tcache_enabled = *tcache_enabled_tsd_get();
- if (tcache_enabled == tcache_enabled_default) {
- tcache_enabled = (tcache_enabled_t)opt_tcache;
- tcache_enabled_tsd_set(&tcache_enabled);
- }
-
- return ((bool)tcache_enabled);
-}
-
-JEMALLOC_INLINE void
-tcache_enabled_set(bool enabled)
-{
- tcache_enabled_t tcache_enabled;
- tcache_t *tcache;
-
- cassert(config_tcache);
-
- tcache_enabled = (tcache_enabled_t)enabled;
- tcache_enabled_tsd_set(&tcache_enabled);
- tcache = *tcache_tsd_get();
- if (enabled) {
- if (tcache == TCACHE_STATE_DISABLED) {
- tcache = NULL;
- tcache_tsd_set(&tcache);
- }
- } else /* disabled */ {
- if (tcache > TCACHE_STATE_MAX) {
- tcache_destroy(tcache);
- tcache = NULL;
- }
- if (tcache == NULL) {
- tcache = TCACHE_STATE_DISABLED;
- tcache_tsd_set(&tcache);
- }
- }
-}
-
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(bool create)
-{
- tcache_t *tcache;
-
- if (config_tcache == false)
- return (NULL);
- if (config_lazy_lock && isthreaded == false)
- return (NULL);
-
- tcache = *tcache_tsd_get();
- if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
- if (tcache == TCACHE_STATE_DISABLED)
- return (NULL);
- if (tcache == NULL) {
- if (create == false) {
- /*
- * Creating a tcache here would cause
- * allocation as a side effect of free().
- * Ordinarily that would be okay since
- * tcache_create() failure is a soft failure
- * that doesn't propagate. However, if TLS
- * data are freed via free() as in glibc,
- * subtle corruption could result from setting
- * a TLS variable after its backing memory is
- * freed.
- */
- return (NULL);
- }
- if (tcache_enabled_get() == false) {
- tcache_enabled_set(false); /* Memoize. */
- return (NULL);
- }
- return (tcache_create(choose_arena(NULL)));
- }
- if (tcache == TCACHE_STATE_PURGATORY) {
- /*
- * Make a note that an allocator function was called
- * after tcache_thread_cleanup() was called.
- */
- tcache = TCACHE_STATE_REINCARNATED;
- tcache_tsd_set(&tcache);
- return (NULL);
- }
- if (tcache == TCACHE_STATE_REINCARNATED)
- return (NULL);
- not_reached();
- }
-
- return (tcache);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_event(tcache_t *tcache)
-{
-
- if (TCACHE_GC_INCR == 0)
- return;
-
- tcache->ev_cnt++;
- assert(tcache->ev_cnt <= TCACHE_GC_INCR);
- if (tcache->ev_cnt == TCACHE_GC_INCR)
- tcache_event_hard(tcache);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin)
-{
- void *ret;
-
- if (tbin->ncached == 0) {
- tbin->low_water = -1;
- return (NULL);
- }
- tbin->ncached--;
- if ((int)tbin->ncached < tbin->low_water)
- tbin->low_water = tbin->ncached;
- ret = tbin->avail[tbin->ncached];
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
-{
- void *ret;
- size_t binind;
- tcache_bin_t *tbin;
-
- binind = SMALL_SIZE2BIN(size);
- assert(binind < NBINS);
- tbin = &tcache->tbins[binind];
- size = arena_bin_info[binind].reg_size;
- ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
- ret = tcache_alloc_small_hard(tcache, tbin, binind);
- if (ret == NULL)
- return (NULL);
- }
- assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
-
- if (zero == false) {
- if (config_fill) {
- if (opt_junk) {
- arena_alloc_junk_small(ret,
- &arena_bin_info[binind], false);
- } else if (opt_zero)
- memset(ret, 0, size);
- }
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- } else {
- if (config_fill && opt_junk) {
- arena_alloc_junk_small(ret, &arena_bin_info[binind],
- true);
- }
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
- }
-
- if (config_stats)
- tbin->tstats.nrequests++;
- if (config_prof)
- tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
- tcache_event(tcache);
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
-{
- void *ret;
- size_t binind;
- tcache_bin_t *tbin;
-
- size = PAGE_CEILING(size);
- assert(size <= tcache_maxclass);
- binind = NBINS + (size >> LG_PAGE) - 1;
- assert(binind < nhbins);
- tbin = &tcache->tbins[binind];
- ret = tcache_alloc_easy(tbin);
- if (ret == NULL) {
- /*
- * Only allocate one large object at a time, because it's quite
- * expensive to create one and not use it.
- */
- ret = arena_malloc_large(tcache->arena, size, zero);
- if (ret == NULL)
- return (NULL);
- } else {
- if (config_prof && prof_promote && size == PAGE) {
- arena_chunk_t *chunk =
- (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
- size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
- LG_PAGE);
- arena_mapbits_large_binind_set(chunk, pageind,
- BININD_INVALID);
- }
- if (zero == false) {
- if (config_fill) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
- }
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- } else {
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
- }
-
- if (config_stats)
- tbin->tstats.nrequests++;
- if (config_prof)
- tcache->prof_accumbytes += size;
- }
-
- tcache_event(tcache);
- return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
-{
- tcache_bin_t *tbin;
- tcache_bin_info_t *tbin_info;
-
- assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
-
- if (config_fill && opt_junk)
- arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
-
- tbin = &tcache->tbins[binind];
- tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
- }
- assert(tbin->ncached < tbin_info->ncached_max);
- tbin->avail[tbin->ncached] = ptr;
- tbin->ncached++;
-
- tcache_event(tcache);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
-{
- size_t binind;
- tcache_bin_t *tbin;
- tcache_bin_info_t *tbin_info;
-
- assert((size & PAGE_MASK) == 0);
- assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
- assert(tcache_salloc(ptr) <= tcache_maxclass);
-
- binind = NBINS + (size >> LG_PAGE) - 1;
-
- if (config_fill && opt_junk)
- memset(ptr, 0x5a, size);
-
- tbin = &tcache->tbins[binind];
- tbin_info = &tcache_bin_info[binind];
- if (tbin->ncached == tbin_info->ncached_max) {
- tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
- 1), tcache);
- }
- assert(tbin->ncached < tbin_info->ncached_max);
- tbin->avail[tbin->ncached] = ptr;
- tbin->ncached++;
-
- tcache_event(tcache);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
new file mode 100644
index 000000000..790367bd4
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+
+#include "jemalloc/internal/size_classes.h"
+
+extern bool opt_tcache;
+extern ssize_t opt_lg_tcache_max;
+
+extern cache_bin_info_t *tcache_bin_info;
+
+/*
+ * Number of tcache bins. There are NBINS small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern unsigned nhbins;
+
+/* Maximum cached size class. */
+extern size_t tcache_maxclass;
+
+/*
+ * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
+ * usable via the MALLOCX_TCACHE() flag. The automatic per thread tcaches are
+ * completely disjoint from this data structure. tcaches starts off as a sparse
+ * array, so it has no physical memory footprint until individual pages are
+ * touched. This allows the entire array to be allocated the first time an
+ * explicit tcache is created without a disproportionate impact on memory usage.
+ */
+extern tcaches_t *tcaches;
+
+size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+ cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+ szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
+ unsigned rem, tcache_t *tcache);
+void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+ arena_t *arena);
+tcache_t *tcache_create_explicit(tsd_t *tsd);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(tsdn_t *tsdn);
+void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+void tcache_prefork(tsdn_t *tsdn);
+void tcache_postfork_parent(tsdn_t *tsdn);
+void tcache_postfork_child(tsdn_t *tsdn);
+void tcache_flush(tsd_t *tsd);
+bool tsd_tcache_data_init(tsd_t *tsd);
+bool tsd_tcache_enabled_data_init(tsd_t *tsd);
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
new file mode 100644
index 000000000..0f6ab8cb5
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
@@ -0,0 +1,223 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
+#define JEMALLOC_INTERNAL_TCACHE_INLINES_H
+
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/util.h"
+
+static inline bool
+tcache_enabled_get(tsd_t *tsd) {
+ return tsd_tcache_enabled_get(tsd);
+}
+
+static inline void
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
+ bool was_enabled = tsd_tcache_enabled_get(tsd);
+
+ if (!was_enabled && enabled) {
+ tsd_tcache_data_init(tsd);
+ } else if (was_enabled && !enabled) {
+ tcache_cleanup(tsd);
+ }
+ /* Commit the state last. Above calls check current state. */
+ tsd_tcache_enabled_set(tsd, enabled);
+ tsd_slow_update(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_event(tsd_t *tsd, tcache_t *tcache) {
+ if (TCACHE_GC_INCR == 0) {
+ return;
+ }
+
+ if (unlikely(ticker_tick(&tcache->gc_ticker))) {
+ tcache_event_hard(tsd, tcache);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+ UNUSED size_t size, szind_t binind, bool zero, bool slow_path) {
+ void *ret;
+ cache_bin_t *bin;
+ bool tcache_success;
+ size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
+ assert(binind < NBINS);
+ bin = tcache_small_bin_get(tcache, binind);
+ ret = cache_bin_alloc_easy(bin, &tcache_success);
+ assert(tcache_success == (ret != NULL));
+ if (unlikely(!tcache_success)) {
+ bool tcache_hard_success;
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL)) {
+ return NULL;
+ }
+
+ ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+ bin, binind, &tcache_hard_success);
+ if (tcache_hard_success == false) {
+ return NULL;
+ }
+ }
+
+ assert(ret);
+ /*
+ * Only compute usize if required. The checks in the following if
+ * statement are all static.
+ */
+ if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+ usize = sz_index2size(binind);
+ assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
+ }
+
+ if (likely(!zero)) {
+ if (slow_path && config_fill) {
+ if (unlikely(opt_junk_alloc)) {
+ arena_alloc_junk_small(ret, &bin_infos[binind],
+ false);
+ } else if (unlikely(opt_zero)) {
+ memset(ret, 0, usize);
+ }
+ }
+ } else {
+ if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
+ arena_alloc_junk_small(ret, &bin_infos[binind], true);
+ }
+ memset(ret, 0, usize);
+ }
+
+ if (config_stats) {
+ bin->tstats.nrequests++;
+ }
+ if (config_prof) {
+ tcache->prof_accumbytes += usize;
+ }
+ tcache_event(tsd, tcache);
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+ szind_t binind, bool zero, bool slow_path) {
+ void *ret;
+ cache_bin_t *bin;
+ bool tcache_success;
+
+ assert(binind >= NBINS &&binind < nhbins);
+ bin = tcache_large_bin_get(tcache, binind);
+ ret = cache_bin_alloc_easy(bin, &tcache_success);
+ assert(tcache_success == (ret != NULL));
+ if (unlikely(!tcache_success)) {
+ /*
+ * Only allocate one large object at a time, because it's quite
+ * expensive to create one and not use it.
+ */
+ arena = arena_choose(tsd, arena);
+ if (unlikely(arena == NULL)) {
+ return NULL;
+ }
+
+ ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero);
+ if (ret == NULL) {
+ return NULL;
+ }
+ } else {
+ size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
+ /* Only compute usize on demand */
+ if (config_prof || (slow_path && config_fill) ||
+ unlikely(zero)) {
+ usize = sz_index2size(binind);
+ assert(usize <= tcache_maxclass);
+ }
+
+ if (likely(!zero)) {
+ if (slow_path && config_fill) {
+ if (unlikely(opt_junk_alloc)) {
+ memset(ret, JEMALLOC_ALLOC_JUNK,
+ usize);
+ } else if (unlikely(opt_zero)) {
+ memset(ret, 0, usize);
+ }
+ }
+ } else {
+ memset(ret, 0, usize);
+ }
+
+ if (config_stats) {
+ bin->tstats.nrequests++;
+ }
+ if (config_prof) {
+ tcache->prof_accumbytes += usize;
+ }
+ }
+
+ tcache_event(tsd, tcache);
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+ bool slow_path) {
+ cache_bin_t *bin;
+ cache_bin_info_t *bin_info;
+
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
+
+ if (slow_path && config_fill && unlikely(opt_junk_free)) {
+ arena_dalloc_junk_small(ptr, &bin_infos[binind]);
+ }
+
+ bin = tcache_small_bin_get(tcache, binind);
+ bin_info = &tcache_bin_info[binind];
+ if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ tcache_bin_flush_small(tsd, tcache, bin, binind,
+ (bin_info->ncached_max >> 1));
+ }
+ assert(bin->ncached < bin_info->ncached_max);
+ bin->ncached++;
+ *(bin->avail - bin->ncached) = ptr;
+
+ tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+ bool slow_path) {
+ cache_bin_t *bin;
+ cache_bin_info_t *bin_info;
+
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+ assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
+
+ if (slow_path && config_fill && unlikely(opt_junk_free)) {
+ large_dalloc_junk(ptr, sz_index2size(binind));
+ }
+
+ bin = tcache_large_bin_get(tcache, binind);
+ bin_info = &tcache_bin_info[binind];
+ if (unlikely(bin->ncached == bin_info->ncached_max)) {
+ tcache_bin_flush_large(tsd, bin, binind,
+ (bin_info->ncached_max >> 1), tcache);
+ }
+ assert(bin->ncached < bin_info->ncached_max);
+ bin->ncached++;
+ *(bin->avail - bin->ncached) = ptr;
+
+ tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcaches_get(tsd_t *tsd, unsigned ind) {
+ tcaches_t *elm = &tcaches[ind];
+ if (unlikely(elm->tcache == NULL)) {
+ elm->tcache = tcache_create_explicit(tsd);
+ }
+ return elm->tcache;
+}
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
new file mode 100644
index 000000000..07b738705
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
@@ -0,0 +1,61 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/ticker.h"
+
+struct tcache_s {
+ /*
+ * To minimize our cache-footprint, we put the frequently accessed data
+ * together at the start of this struct.
+ */
+
+ /* Cleared after arena_prof_accum(). */
+ uint64_t prof_accumbytes;
+ /* Drives incremental GC. */
+ ticker_t gc_ticker;
+ /*
+ * The pointer stacks associated with bins follow as a contiguous array.
+ * During tcache initialization, the avail pointer in each element of
+ * tbins is initialized to point to the proper offset within this array.
+ */
+ cache_bin_t bins_small[NBINS];
+
+ /*
+ * This data is less hot; we can be a little less careful with our
+ * footprint here.
+ */
+ /* Lets us track all the tcaches in an arena. */
+ ql_elm(tcache_t) link;
+ /*
+ * The descriptor lets the arena find our cache bins without seeing the
+ * tcache definition. This enables arenas to aggregate stats across
+ * tcaches without having a tcache dependency.
+ */
+ cache_bin_array_descriptor_t cache_bin_array_descriptor;
+
+ /* The arena this tcache is associated with. */
+ arena_t *arena;
+ /* Next bin to GC. */
+ szind_t next_gc_bin;
+ /* For small bins, fill (ncached_max >> lg_fill_div). */
+ uint8_t lg_fill_div[NBINS];
+ /*
+ * We put the cache bins for large size classes at the end of the
+ * struct, since some of them might not get used. This might end up
+ * letting us avoid touching an extra page if we don't have to.
+ */
+ cache_bin_t bins_large[NSIZES-NBINS];
+};
+
+/* Linkage for list of available (previously used) explicit tcache IDs. */
+struct tcaches_s {
+ union {
+ tcache_t *tcache;
+ tcaches_t *next;
+ };
+};
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache_types.h b/deps/jemalloc/include/jemalloc/internal/tcache_types.h
new file mode 100644
index 000000000..e49bc9d79
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_types.h
@@ -0,0 +1,56 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
+#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
+
+#include "jemalloc/internal/size_classes.h"
+
+typedef struct tcache_s tcache_t;
+typedef struct tcaches_s tcaches_t;
+
+/*
+ * tcache pointers close to NULL are used to encode state information that is
+ * used for two purposes: preventing thread caching on a per thread basis and
+ * cleaning up during thread shutdown.
+ */
+#define TCACHE_STATE_DISABLED ((tcache_t *)(uintptr_t)1)
+#define TCACHE_STATE_REINCARNATED ((tcache_t *)(uintptr_t)2)
+#define TCACHE_STATE_PURGATORY ((tcache_t *)(uintptr_t)3)
+#define TCACHE_STATE_MAX TCACHE_STATE_PURGATORY
+
+/*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define TCACHE_NSLOTS_SMALL_MIN 20
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache. This is an additional constraint beyond that imposed as: twice the
+ * number of regions per slab for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define TCACHE_NSLOTS_SMALL_MAX 200
+
+/* Number of cache slots for large size classes. */
+#define TCACHE_NSLOTS_LARGE 20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define LG_TCACHE_MAXCLASS_DEFAULT 15
+
+/*
+ * TCACHE_GC_SWEEP is the approximate number of allocation events between
+ * full GC sweeps. Integer rounding may cause the actual number to be
+ * slightly higher, since GC is performed incrementally.
+ */
+#define TCACHE_GC_SWEEP 8192
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+#define TCACHE_GC_INCR \
+ ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+
+/* Used in TSD static initializer only. Real init in tcache_data_init(). */
+#define TCACHE_ZERO_INITIALIZER {0}
+
+/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
+#define TCACHE_ENABLED_ZERO_INITIALIZER false
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/ticker.h b/deps/jemalloc/include/jemalloc/internal/ticker.h
new file mode 100644
index 000000000..4b3604708
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/ticker.h
@@ -0,0 +1,78 @@
+#ifndef JEMALLOC_INTERNAL_TICKER_H
+#define JEMALLOC_INTERNAL_TICKER_H
+
+#include "jemalloc/internal/util.h"
+
+/**
+ * A ticker makes it easy to count-down events until some limit. You
+ * ticker_init the ticker to trigger every nticks events. You then notify it
+ * that an event has occurred with calls to ticker_tick (or that nticks events
+ * have occurred with a call to ticker_ticks), which will return true (and reset
+ * the counter) if the countdown hit zero.
+ */
+
+typedef struct {
+ int32_t tick;
+ int32_t nticks;
+} ticker_t;
+
+static inline void
+ticker_init(ticker_t *ticker, int32_t nticks) {
+ ticker->tick = nticks;
+ ticker->nticks = nticks;
+}
+
+static inline void
+ticker_copy(ticker_t *ticker, const ticker_t *other) {
+ *ticker = *other;
+}
+
+static inline int32_t
+ticker_read(const ticker_t *ticker) {
+ return ticker->tick;
+}
+
+/*
+ * Not intended to be a public API. Unfortunately, on x86, neither gcc nor
+ * clang seems smart enough to turn
+ * ticker->tick -= nticks;
+ * if (unlikely(ticker->tick < 0)) {
+ * fixup ticker
+ * return true;
+ * }
+ * return false;
+ * into
+ * subq %nticks_reg, (%ticker_reg)
+ * js fixup ticker
+ *
+ * unless we force "fixup ticker" out of line. In that case, gcc gets it right,
+ * but clang now does worse than before. So, on x86 with gcc, we force it out
+ * of line, but otherwise let the inlining occur. Ordinarily this wouldn't be
+ * worth the hassle, but this is on the fast path of both malloc and free (via
+ * tcache_event).
+ */
+#if defined(__GNUC__) && !defined(__clang__) \
+ && (defined(__x86_64__) || defined(__i386__))
+JEMALLOC_NOINLINE
+#endif
+static bool
+ticker_fixup(ticker_t *ticker) {
+ ticker->tick = ticker->nticks;
+ return true;
+}
+
+static inline bool
+ticker_ticks(ticker_t *ticker, int32_t nticks) {
+ ticker->tick -= nticks;
+ if (unlikely(ticker->tick < 0)) {
+ return ticker_fixup(ticker);
+ }
+ return false;
+}
+
+static inline bool
+ticker_tick(ticker_t *ticker) {
+ return ticker_ticks(ticker, 1);
+}
+
+#endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
index 9fb4a23ec..0b9841aa7 100644
--- a/deps/jemalloc/include/jemalloc/internal/tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -1,434 +1,326 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_TSD_H
+#define JEMALLOC_INTERNAL_TSD_H
-/* Maximum number of malloc_tsd users with cleanup functions. */
-#define MALLOC_TSD_CLEANUPS_MAX 8
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness.h"
-typedef bool (*malloc_tsd_cleanup_t)(void);
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
- !defined(_WIN32))
-typedef struct tsd_init_block_s tsd_init_block_t;
-typedef struct tsd_init_head_s tsd_init_head_t;
+/*
+ * Thread-Specific-Data layout
+ * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
+ * s: state
+ * e: tcache_enabled
+ * m: thread_allocated (config_stats)
+ * f: thread_deallocated (config_stats)
+ * p: prof_tdata (config_prof)
+ * c: rtree_ctx (rtree cache accessed on deallocation)
+ * t: tcache
+ * --- data not accessed on tcache fast path: arena-related fields ---
+ * d: arenas_tdata_bypass
+ * r: reentrancy_level
+ * x: narenas_tdata
+ * i: iarena
+ * a: arena
+ * o: arenas_tdata
+ * Loading TSD data is on the critical path of basically all malloc operations.
+ * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
+ * Use a compact layout to reduce cache footprint.
+ * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
+ * |---------------------------- 1st cacheline ----------------------------|
+ * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] |
+ * |---------------------------- 2nd cacheline ----------------------------|
+ * | [c * 64 ........ ........ ........ ........ ........ ........ .......] |
+ * |---------------------------- 3nd cacheline ----------------------------|
+ * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
+ * +-------------------------------------------------------------------------+
+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
+ *
+ * The last 3 members (i, a and o) before tcache isn't really needed on tcache
+ * fast path. However we have a number of unused tcache bins and witnesses
+ * (never touched unless config_debug) at the end of tcache, so we place them
+ * there to avoid breaking the cachelines and possibly paging in an extra page.
+ */
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+# define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+# define MALLOC_TEST_TSD \
+ O(test_data, int, int) \
+ O(test_callback, test_callback_t, int)
+# define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+# define MALLOC_TEST_TSD
+# define MALLOC_TEST_TSD_INITIALIZER
#endif
+/* O(name, type, nullable type */
+#define MALLOC_TSD \
+ O(tcache_enabled, bool, bool) \
+ O(arenas_tdata_bypass, bool, bool) \
+ O(reentrancy_level, int8_t, int8_t) \
+ O(narenas_tdata, uint32_t, uint32_t) \
+ O(offset_state, uint64_t, uint64_t) \
+ O(thread_allocated, uint64_t, uint64_t) \
+ O(thread_deallocated, uint64_t, uint64_t) \
+ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
+ O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \
+ O(iarena, arena_t *, arena_t *) \
+ O(arena, arena_t *, arena_t *) \
+ O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\
+ O(tcache, tcache_t, tcache_t) \
+ O(witness_tsd, witness_tsd_t, witness_tsdn_t) \
+ MALLOC_TEST_TSD
+
+#define TSD_INITIALIZER { \
+ tsd_state_uninitialized, \
+ TCACHE_ENABLED_ZERO_INITIALIZER, \
+ false, \
+ 0, \
+ 0, \
+ 0, \
+ 0, \
+ 0, \
+ NULL, \
+ RTREE_CTX_ZERO_INITIALIZER, \
+ NULL, \
+ NULL, \
+ NULL, \
+ TCACHE_ZERO_INITIALIZER, \
+ WITNESS_TSD_INITIALIZER \
+ MALLOC_TEST_TSD_INITIALIZER \
+}
+
+enum {
+ tsd_state_nominal = 0, /* Common case --> jnz. */
+ tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
+ /* the above 2 nominal states should be lower values. */
+ tsd_state_nominal_max = 1, /* used for comparison only. */
+ tsd_state_minimal_initialized = 2,
+ tsd_state_purgatory = 3,
+ tsd_state_reincarnated = 4,
+ tsd_state_uninitialized = 5
+};
+
+/* Manually limit tsd_state_t to a single byte. */
+typedef uint8_t tsd_state_t;
+
+/* The actual tsd. */
+struct tsd_s {
+ /*
+ * The contents should be treated as totally opaque outside the tsd
+ * module. Access any thread-local state through the getters and
+ * setters below.
+ */
+ tsd_state_t state;
+#define O(n, t, nt) \
+ t use_a_getter_or_setter_instead_##n;
+MALLOC_TSD
+#undef O
+};
+
/*
- * TLS/TSD-agnostic macro-based implementation of thread-specific data. There
- * are four macros that support (at least) three use cases: file-private,
- * library-private, and library-private inlined. Following is an example
- * library-private tsd variable:
- *
- * In example.h:
- * typedef struct {
- * int x;
- * int y;
- * } example_t;
- * #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
- * malloc_tsd_protos(, example, example_t *)
- * malloc_tsd_externs(example, example_t *)
- * In example.c:
- * malloc_tsd_data(, example, example_t *, EX_INITIALIZER)
- * malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER,
- * example_tsd_cleanup)
- *
- * The result is a set of generated functions, e.g.:
- *
- * bool example_tsd_boot(void) {...}
- * example_t **example_tsd_get() {...}
- * void example_tsd_set(example_t **val) {...}
- *
- * Note that all of the functions deal in terms of (a_type *) rather than
- * (a_type) so that it is possible to support non-pointer types (unlike
- * pthreads TSD). example_tsd_cleanup() is passed an (a_type *) pointer that is
- * cast to (void *). This means that the cleanup function needs to cast *and*
- * dereference the function argument, e.g.:
- *
- * void
- * example_tsd_cleanup(void *arg)
- * {
- * example_t *example = *(example_t **)arg;
- *
- * [...]
- * if ([want the cleanup function to be called again]) {
- * example_tsd_set(&example);
- * }
- * }
- *
- * If example_tsd_set() is called within example_tsd_cleanup(), it will be
- * called again. This is similar to how pthreads TSD destruction works, except
- * that pthreads only calls the cleanup function again if the value was set to
- * non-NULL.
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
*/
+struct tsdn_s {
+ tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+ return (tsdn_t *)tsd;
+}
-/* malloc_tsd_protos(). */
-#define malloc_tsd_protos(a_attr, a_name, a_type) \
-a_attr bool \
-a_name##_tsd_boot(void); \
-a_attr a_type * \
-a_name##_tsd_get(void); \
-a_attr void \
-a_name##_tsd_set(a_type *val);
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+ return tsdn == NULL;
+}
-/* malloc_tsd_externs(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern __thread bool a_name##_initialized; \
-extern bool a_name##_booted;
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_externs(a_name, a_type) \
-extern __thread a_type a_name##_tls; \
-extern pthread_key_t a_name##_tsd; \
-extern bool a_name##_booted;
-#elif (defined(_WIN32))
-#define malloc_tsd_externs(a_name, a_type) \
-extern DWORD a_name##_tsd; \
-extern bool a_name##_booted;
-#else
-#define malloc_tsd_externs(a_name, a_type) \
-extern pthread_key_t a_name##_tsd; \
-extern tsd_init_head_t a_name##_tsd_init_head; \
-extern bool a_name##_booted;
-#endif
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+ assert(!tsdn_null(tsdn));
+
+ return &tsdn->tsd;
+}
+
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+void malloc_tsd_cleanup_register(bool (*f)(void));
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+void tsd_slow_update(tsd_t *tsd);
-/* malloc_tsd_data(). */
+/*
+ * We put the platform-specific data declarations and inlines into their own
+ * header files to avoid cluttering this file. They define tsd_boot0,
+ * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
+ */
#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
-a_attr __thread bool JEMALLOC_TLS_MODEL \
- a_name##_initialized = false; \
-a_attr bool a_name##_booted = false;
+#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr __thread a_type JEMALLOC_TLS_MODEL \
- a_name##_tls = a_initializer; \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+#include "jemalloc/internal/tsd_tls.h"
#elif (defined(_WIN32))
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr DWORD a_name##_tsd; \
-a_attr bool a_name##_booted = false;
+#include "jemalloc/internal/tsd_win.h"
#else
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer) \
-a_attr pthread_key_t a_name##_tsd; \
-a_attr tsd_init_head_t a_name##_tsd_init_head = { \
- ql_head_initializer(blocks), \
- MALLOC_MUTEX_INITIALIZER \
-}; \
-a_attr bool a_name##_booted = false;
+#include "jemalloc/internal/tsd_generic.h"
#endif
-/* malloc_tsd_funcs(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
- a_cleanup) \
-/* Initialization/cleanup. */ \
-a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
-{ \
- \
- if (a_name##_initialized) { \
- a_name##_initialized = false; \
- a_cleanup(&a_name##_tls); \
- } \
- return (a_name##_initialized); \
-} \
-a_attr bool \
-a_name##_tsd_boot(void) \
-{ \
- \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
- } \
- a_name##_booted = true; \
- return (false); \
-} \
-/* Get/set. */ \
-a_attr a_type * \
-a_name##_tsd_get(void) \
-{ \
- \
- assert(a_name##_booted); \
- return (&a_name##_tls); \
-} \
-a_attr void \
-a_name##_tsd_set(a_type *val) \
-{ \
- \
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
- if (a_cleanup != malloc_tsd_no_cleanup) \
- a_name##_initialized = true; \
+/*
+ * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
+ * foo. This omits some safety checks, and so can be used during tsd
+ * initialization and cleanup.
+ */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get_unsafe(tsd_t *tsd) { \
+ return &tsd->use_a_getter_or_setter_instead_##n; \
}
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
- a_cleanup) \
-/* Initialization/cleanup. */ \
-a_attr bool \
-a_name##_tsd_boot(void) \
-{ \
- \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0) \
- return (true); \
- } \
- a_name##_booted = true; \
- return (false); \
-} \
-/* Get/set. */ \
-a_attr a_type * \
-a_name##_tsd_get(void) \
-{ \
- \
- assert(a_name##_booted); \
- return (&a_name##_tls); \
-} \
-a_attr void \
-a_name##_tsd_set(a_type *val) \
-{ \
- \
- assert(a_name##_booted); \
- a_name##_tls = (*val); \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)(&a_name##_tls))) { \
- malloc_write("<jemalloc>: Error" \
- " setting TSD for "#a_name"\n"); \
- if (opt_abort) \
- abort(); \
- } \
- } \
+MALLOC_TSD
+#undef O
+
+/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t * \
+tsd_##n##p_get(tsd_t *tsd) { \
+ assert(tsd->state == tsd_state_nominal || \
+ tsd->state == tsd_state_nominal_slow || \
+ tsd->state == tsd_state_reincarnated || \
+ tsd->state == tsd_state_minimal_initialized); \
+ return tsd_##n##p_get_unsafe(tsd); \
}
-#elif (defined(_WIN32))
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
- a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
-/* Initialization/cleanup. */ \
-a_attr bool \
-a_name##_tsd_cleanup_wrapper(void) \
-{ \
- a_name##_tsd_wrapper_t *wrapper; \
- \
- wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd); \
- if (wrapper == NULL) \
- return (false); \
- if (a_cleanup != malloc_tsd_no_cleanup && \
- wrapper->initialized) { \
- a_type val = wrapper->val; \
- a_type tsd_static_data = a_initializer; \
- wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- a_cleanup(&val); \
- if (wrapper->initialized) { \
- /* Trigger another cleanup round. */ \
- return (true); \
- } \
- } \
- malloc_tsd_dalloc(wrapper); \
- return (false); \
-} \
-a_attr bool \
-a_name##_tsd_boot(void) \
-{ \
- \
- a_name##_tsd = TlsAlloc(); \
- if (a_name##_tsd == TLS_OUT_OF_INDEXES) \
- return (true); \
- if (a_cleanup != malloc_tsd_no_cleanup) { \
- malloc_tsd_cleanup_register( \
- &a_name##_tsd_cleanup_wrapper); \
- } \
- a_name##_booted = true; \
- return (false); \
-} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
-{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- TlsGetValue(a_name##_tsd); \
- \
- if (wrapper == NULL) { \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
- if (wrapper == NULL) { \
- malloc_write("<jemalloc>: Error allocating" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } else { \
- static a_type tsd_static_data = a_initializer; \
- wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } \
+MALLOC_TSD
+#undef O
+
+/*
+ * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
+ * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
+ */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE nt * \
+tsdn_##n##p_get(tsdn_t *tsdn) { \
+ if (tsdn_null(tsdn)) { \
+ return NULL; \
} \
- return (wrapper); \
-} \
-a_attr a_type * \
-a_name##_tsd_get(void) \
-{ \
- a_name##_tsd_wrapper_t *wrapper; \
- \
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
- return (&wrapper->val); \
-} \
-a_attr void \
-a_name##_tsd_set(a_type *val) \
-{ \
- a_name##_tsd_wrapper_t *wrapper; \
- \
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
- wrapper->val = *(val); \
- if (a_cleanup != malloc_tsd_no_cleanup) \
- wrapper->initialized = true; \
+ tsd_t *tsd = tsdn_tsd(tsdn); \
+ return (nt *)tsd_##n##p_get(tsd); \
}
-#else
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer, \
- a_cleanup) \
-/* Data structure. */ \
-typedef struct { \
- bool initialized; \
- a_type val; \
-} a_name##_tsd_wrapper_t; \
-/* Initialization/cleanup. */ \
-a_attr void \
-a_name##_tsd_cleanup_wrapper(void *arg) \
-{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\
- \
- if (a_cleanup != malloc_tsd_no_cleanup && \
- wrapper->initialized) { \
- wrapper->initialized = false; \
- a_cleanup(&wrapper->val); \
- if (wrapper->initialized) { \
- /* Trigger another cleanup round. */ \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error" \
- " setting TSD for "#a_name"\n"); \
- if (opt_abort) \
- abort(); \
- } \
- return; \
- } \
- } \
- malloc_tsd_dalloc(wrapper); \
-} \
-a_attr bool \
-a_name##_tsd_boot(void) \
-{ \
- \
- if (pthread_key_create(&a_name##_tsd, \
- a_name##_tsd_cleanup_wrapper) != 0) \
- return (true); \
- a_name##_booted = true; \
- return (false); \
-} \
-/* Get/set. */ \
-a_attr a_name##_tsd_wrapper_t * \
-a_name##_tsd_get_wrapper(void) \
-{ \
- a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *) \
- pthread_getspecific(a_name##_tsd); \
- \
- if (wrapper == NULL) { \
- tsd_init_block_t block; \
- wrapper = tsd_init_check_recursion( \
- &a_name##_tsd_init_head, &block); \
- if (wrapper) \
- return (wrapper); \
- wrapper = (a_name##_tsd_wrapper_t *) \
- malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t)); \
- block.data = wrapper; \
- if (wrapper == NULL) { \
- malloc_write("<jemalloc>: Error allocating" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } else { \
- static a_type tsd_static_data = a_initializer; \
- wrapper->initialized = false; \
- wrapper->val = tsd_static_data; \
- } \
- if (pthread_setspecific(a_name##_tsd, \
- (void *)wrapper)) { \
- malloc_write("<jemalloc>: Error setting" \
- " TSD for "#a_name"\n"); \
- abort(); \
- } \
- tsd_init_finish(&a_name##_tsd_init_head, &block); \
- } \
- return (wrapper); \
-} \
-a_attr a_type * \
-a_name##_tsd_get(void) \
-{ \
- a_name##_tsd_wrapper_t *wrapper; \
- \
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
- return (&wrapper->val); \
-} \
-a_attr void \
-a_name##_tsd_set(a_type *val) \
-{ \
- a_name##_tsd_wrapper_t *wrapper; \
- \
- assert(a_name##_booted); \
- wrapper = a_name##_tsd_get_wrapper(); \
- wrapper->val = *(val); \
- if (a_cleanup != malloc_tsd_no_cleanup) \
- wrapper->initialized = true; \
+MALLOC_TSD
+#undef O
+
+/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE t \
+tsd_##n##_get(tsd_t *tsd) { \
+ return *tsd_##n##p_get(tsd); \
}
-#endif
+MALLOC_TSD
+#undef O
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
+#define O(n, t, nt) \
+JEMALLOC_ALWAYS_INLINE void \
+tsd_##n##_set(tsd_t *tsd, t val) { \
+ assert(tsd->state != tsd_state_reincarnated && \
+ tsd->state != tsd_state_minimal_initialized); \
+ *tsd_##n##p_get(tsd) = val; \
+}
+MALLOC_TSD
+#undef O
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
- !defined(_WIN32))
-struct tsd_init_block_s {
- ql_elm(tsd_init_block_t) link;
- pthread_t thread;
- void *data;
-};
-struct tsd_init_head_s {
- ql_head(tsd_init_block_t) blocks;
- malloc_mutex_t lock;
-};
-#endif
+JEMALLOC_ALWAYS_INLINE void
+tsd_assert_fast(tsd_t *tsd) {
+ assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
+ tsd_reentrancy_level_get(tsd) == 0);
+}
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_no_cleanup(void *);
-void malloc_tsd_cleanup_register(bool (*f)(void));
-void malloc_tsd_boot(void);
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
- !defined(_WIN32))
-void *tsd_init_check_recursion(tsd_init_head_t *head,
- tsd_init_block_t *block);
-void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
-#endif
+JEMALLOC_ALWAYS_INLINE bool
+tsd_fast(tsd_t *tsd) {
+ bool fast = (tsd->state == tsd_state_nominal);
+ if (fast) {
+ tsd_assert_fast(tsd);
+ }
+
+ return fast;
+}
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_impl(bool init, bool minimal) {
+ tsd_t *tsd = tsd_get(init);
+
+ if (!init && tsd_get_allocates() && tsd == NULL) {
+ return NULL;
+ }
+ assert(tsd != NULL);
+
+ if (unlikely(tsd->state != tsd_state_nominal)) {
+ return tsd_fetch_slow(tsd, minimal);
+ }
+ assert(tsd_fast(tsd));
+ tsd_assert_fast(tsd);
+
+ return tsd;
+}
+
+/* Get a minimal TSD that requires no cleanup. See comments in free(). */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_min(void) {
+ return tsd_fetch_impl(true, true);
+}
+
+/* For internal background threads use only. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_internal_fetch(void) {
+ tsd_t *tsd = tsd_fetch_min();
+ /* Use reincarnated state to prevent full initialization. */
+ tsd->state = tsd_state_reincarnated;
+
+ return tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void) {
+ return tsd_fetch_impl(true, false);
+}
+
+static inline bool
+tsd_nominal(tsd_t *tsd) {
+ return (tsd->state <= tsd_state_nominal_max);
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void) {
+ if (!tsd_booted_get()) {
+ return NULL;
+ }
+
+ return tsd_tsdn(tsd_fetch_impl(false, false));
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsd_rtree_ctx(tsd_t *tsd) {
+ return tsd_rtree_ctxp_get(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
+ /*
+ * If tsd cannot be accessed, initialize the fallback rtree_ctx and
+ * return a pointer to it.
+ */
+ if (unlikely(tsdn_null(tsdn))) {
+ rtree_ctx_data_init(fallback);
+ return fallback;
+ }
+ return tsd_rtree_ctx(tsdn_tsd(tsdn));
+}
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_TSD_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
new file mode 100644
index 000000000..1e52ef767
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
@@ -0,0 +1,157 @@
+#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_GENERIC_H
+
+typedef struct tsd_init_block_s tsd_init_block_t;
+struct tsd_init_block_s {
+ ql_elm(tsd_init_block_t) link;
+ pthread_t thread;
+ void *data;
+};
+
+/* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
+typedef struct tsd_init_head_s tsd_init_head_t;
+
+typedef struct {
+ bool initialized;
+ tsd_t val;
+} tsd_wrapper_t;
+
+void *tsd_init_check_recursion(tsd_init_head_t *head,
+ tsd_init_block_t *block);
+void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+
+extern pthread_key_t tsd_tsd;
+extern tsd_init_head_t tsd_init_head;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE void
+tsd_cleanup_wrapper(void *arg) {
+ tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg;
+
+ if (wrapper->initialized) {
+ wrapper->initialized = false;
+ tsd_cleanup(&wrapper->val);
+ if (wrapper->initialized) {
+ /* Trigger another cleanup round. */
+ if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
+ {
+ malloc_write("<jemalloc>: Error setting TSD\n");
+ if (opt_abort) {
+ abort();
+ }
+ }
+ return;
+ }
+ }
+ malloc_tsd_dalloc(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+ if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) {
+ malloc_write("<jemalloc>: Error setting TSD\n");
+ abort();
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+ tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
+
+ if (init && unlikely(wrapper == NULL)) {
+ tsd_init_block_t block;
+ wrapper = (tsd_wrapper_t *)
+ tsd_init_check_recursion(&tsd_init_head, &block);
+ if (wrapper) {
+ return wrapper;
+ }
+ wrapper = (tsd_wrapper_t *)
+ malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+ block.data = (void *)wrapper;
+ if (wrapper == NULL) {
+ malloc_write("<jemalloc>: Error allocating TSD\n");
+ abort();
+ } else {
+ wrapper->initialized = false;
+ tsd_t initializer = TSD_INITIALIZER;
+ wrapper->val = initializer;
+ }
+ tsd_wrapper_set(wrapper);
+ tsd_init_finish(&tsd_init_head, &block);
+ }
+ return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+ if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) {
+ return true;
+ }
+ tsd_wrapper_set(&tsd_boot_wrapper);
+ tsd_booted = true;
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+ tsd_wrapper_t *wrapper;
+ wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+ if (wrapper == NULL) {
+ malloc_write("<jemalloc>: Error allocating TSD\n");
+ abort();
+ }
+ tsd_boot_wrapper.initialized = false;
+ tsd_cleanup(&tsd_boot_wrapper.val);
+ wrapper->initialized = false;
+ tsd_t initializer = TSD_INITIALIZER;
+ wrapper->val = initializer;
+ tsd_wrapper_set(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+ if (tsd_boot0()) {
+ return true;
+ }
+ tsd_boot1();
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+ return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+ return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+ tsd_wrapper_t *wrapper;
+
+ assert(tsd_booted);
+ wrapper = tsd_wrapper_get(init);
+ if (tsd_get_allocates() && !init && wrapper == NULL) {
+ return NULL;
+ }
+ return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+ tsd_wrapper_t *wrapper;
+
+ assert(tsd_booted);
+ wrapper = tsd_wrapper_get(true);
+ if (likely(&wrapper->val != val)) {
+ wrapper->val = *(val);
+ }
+ wrapper->initialized = true;
+}
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
new file mode 100644
index 000000000..beb467a67
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -0,0 +1,60 @@
+#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+
+extern __thread tsd_t tsd_tls;
+extern __thread bool tsd_initialized;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+ if (tsd_initialized) {
+ tsd_initialized = false;
+ tsd_cleanup(&tsd_tls);
+ }
+ return tsd_initialized;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+ malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+ tsd_booted = true;
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+ /* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+ return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+ return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+ return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+ assert(tsd_booted);
+ return &tsd_tls;
+}
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+ assert(tsd_booted);
+ if (likely(&tsd_tls != val)) {
+ tsd_tls = (*val);
+ }
+ tsd_initialized = true;
+}
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
new file mode 100644
index 000000000..0de64b7b8
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
@@ -0,0 +1,59 @@
+#ifdef JEMALLOC_INTERNAL_TSD_TLS_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_TLS_H
+
+extern __thread tsd_t tsd_tls;
+extern pthread_key_t tsd_tsd;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+ if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) {
+ return true;
+ }
+ tsd_booted = true;
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+ /* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+ return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+ return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+ return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(UNUSED bool init) {
+ assert(tsd_booted);
+ return &tsd_tls;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+ assert(tsd_booted);
+ if (likely(&tsd_tls != val)) {
+ tsd_tls = (*val);
+ }
+ if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) {
+ malloc_write("<jemalloc>: Error setting tsd.\n");
+ if (opt_abort) {
+ abort();
+ }
+ }
+}
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_types.h b/deps/jemalloc/include/jemalloc/internal/tsd_types.h
new file mode 100644
index 000000000..6200af61f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_types.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
+#define JEMALLOC_INTERNAL_TSD_TYPES_H
+
+#define MALLOC_TSD_CLEANUPS_MAX 2
+
+typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+typedef bool (*malloc_tsd_cleanup_t)(void);
+
+#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd_win.h b/deps/jemalloc/include/jemalloc/internal/tsd_win.h
new file mode 100644
index 000000000..cf30d18e3
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_win.h
@@ -0,0 +1,139 @@
+#ifdef JEMALLOC_INTERNAL_TSD_WIN_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_WIN_H
+
+typedef struct {
+ bool initialized;
+ tsd_t val;
+} tsd_wrapper_t;
+
+extern DWORD tsd_tsd;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+ DWORD error = GetLastError();
+ tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
+ SetLastError(error);
+
+ if (wrapper == NULL) {
+ return false;
+ }
+
+ if (wrapper->initialized) {
+ wrapper->initialized = false;
+ tsd_cleanup(&wrapper->val);
+ if (wrapper->initialized) {
+ /* Trigger another cleanup round. */
+ return true;
+ }
+ }
+ malloc_tsd_dalloc(wrapper);
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+ if (!TlsSetValue(tsd_tsd, (void *)wrapper)) {
+ malloc_write("<jemalloc>: Error setting TSD\n");
+ abort();
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+ DWORD error = GetLastError();
+ tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+ SetLastError(error);
+
+ if (init && unlikely(wrapper == NULL)) {
+ wrapper = (tsd_wrapper_t *)
+ malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+ if (wrapper == NULL) {
+ malloc_write("<jemalloc>: Error allocating TSD\n");
+ abort();
+ } else {
+ wrapper->initialized = false;
+ /* MSVC is finicky about aggregate initialization. */
+ tsd_t tsd_initializer = TSD_INITIALIZER;
+ wrapper->val = tsd_initializer;
+ }
+ tsd_wrapper_set(wrapper);
+ }
+ return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+ tsd_tsd = TlsAlloc();
+ if (tsd_tsd == TLS_OUT_OF_INDEXES) {
+ return true;
+ }
+ malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+ tsd_wrapper_set(&tsd_boot_wrapper);
+ tsd_booted = true;
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+ tsd_wrapper_t *wrapper;
+ wrapper = (tsd_wrapper_t *)
+ malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+ if (wrapper == NULL) {
+ malloc_write("<jemalloc>: Error allocating TSD\n");
+ abort();
+ }
+ tsd_boot_wrapper.initialized = false;
+ tsd_cleanup(&tsd_boot_wrapper.val);
+ wrapper->initialized = false;
+ tsd_t initializer = TSD_INITIALIZER;
+ wrapper->val = initializer;
+ tsd_wrapper_set(wrapper);
+}
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+ if (tsd_boot0()) {
+ return true;
+ }
+ tsd_boot1();
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+ return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+ return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+ tsd_wrapper_t *wrapper;
+
+ assert(tsd_booted);
+ wrapper = tsd_wrapper_get(init);
+ if (tsd_get_allocates() && !init && wrapper == NULL) {
+ return NULL;
+ }
+ return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+ tsd_wrapper_t *wrapper;
+
+ assert(tsd_booted);
+ wrapper = tsd_wrapper_get(true);
+ if (likely(&wrapper->val != val)) {
+ wrapper->val = *(val);
+ }
+ wrapper->initialized = true;
+}
diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h
index 6b938f746..304cb545a 100644
--- a/deps/jemalloc/include/jemalloc/internal/util.h
+++ b/deps/jemalloc/include/jemalloc/internal/util.h
@@ -1,143 +1,50 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_UTIL_H
+#define JEMALLOC_INTERNAL_UTIL_H
-/* Size of stack-allocated buffer passed to buferror(). */
-#define BUFERROR_BUF 64
+#define UTIL_INLINE static inline
-/*
- * Size of stack-allocated buffer used by malloc_{,v,vc}printf(). This must be
- * large enough for all possible uses within jemalloc.
- */
-#define MALLOC_PRINTF_BUFSIZE 4096
+/* Junk fill patterns. */
+#ifndef JEMALLOC_ALLOC_JUNK
+# define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5)
+#endif
+#ifndef JEMALLOC_FREE_JUNK
+# define JEMALLOC_FREE_JUNK ((uint8_t)0x5a)
+#endif
/*
* Wrap a cpp argument that contains commas such that it isn't broken up into
* multiple arguments.
*/
-#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
+#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
+
+/* cpp macro definition stringification. */
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
/*
* Silence compiler warnings due to uninitialized values. This is used
* wherever the compiler fails to recognize that the variable is never used
* uninitialized.
*/
-#ifdef JEMALLOC_CC_SILENCE
-# define JEMALLOC_CC_SILENCE_INIT(v) = v
-#else
-# define JEMALLOC_CC_SILENCE_INIT(v)
-#endif
-
-/*
- * Define a custom assert() in order to reduce the chances of deadlock during
- * assertion failure.
- */
-#ifndef assert
-#define assert(e) do { \
- if (config_debug && !(e)) { \
- malloc_printf( \
- "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
- __FILE__, __LINE__, #e); \
- abort(); \
- } \
-} while (0)
-#endif
-
-#ifndef not_reached
-#define not_reached() do { \
- if (config_debug) { \
- malloc_printf( \
- "<jemalloc>: %s:%d: Unreachable code reached\n", \
- __FILE__, __LINE__); \
- abort(); \
- } \
-} while (0)
-#endif
-
-#ifndef not_implemented
-#define not_implemented() do { \
- if (config_debug) { \
- malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \
- __FILE__, __LINE__); \
- abort(); \
- } \
-} while (0)
-#endif
+#define JEMALLOC_CC_SILENCE_INIT(v) = v
-#ifndef assert_not_implemented
-#define assert_not_implemented(e) do { \
- if (config_debug && !(e)) \
- not_implemented(); \
-} while (0)
-#endif
-
-/* Use to assert a particular configuration, e.g., cassert(config_debug). */
-#define cassert(c) do { \
- if ((c) == false) \
- not_reached(); \
-} while (0)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-int buferror(int err, char *buf, size_t buflen);
-uintmax_t malloc_strtoumax(const char *restrict nptr,
- char **restrict endptr, int base);
-void malloc_write(const char *s);
-
-/*
- * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
- * point math.
- */
-int malloc_vsnprintf(char *str, size_t size, const char *format,
- va_list ap);
-int malloc_snprintf(char *str, size_t size, const char *format, ...)
- JEMALLOC_ATTR(format(printf, 3, 4));
-void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *format, va_list ap);
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
- const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
-void malloc_printf(const char *format, ...)
- JEMALLOC_ATTR(format(printf, 1, 2));
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-size_t pow2_ceil(size_t x);
-void set_errno(int errnum);
-int get_errno(void);
+#ifdef __GNUC__
+# define likely(x) __builtin_expect(!!(x), 1)
+# define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+# define likely(x) !!(x)
+# define unlikely(x) !!(x)
#endif
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
-/* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
-pow2_ceil(size_t x)
-{
-
- x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
-#if (LG_SIZEOF_PTR == 3)
- x |= x >> 32;
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+# error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
#endif
- x++;
- return (x);
-}
-/* Sets error code */
-JEMALLOC_INLINE void
-set_errno(int errnum)
-{
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+/* Set error code. */
+UTIL_INLINE void
+set_errno(int errnum) {
#ifdef _WIN32
SetLastError(errnum);
#else
@@ -145,18 +52,16 @@ set_errno(int errnum)
#endif
}
-/* Get last error code */
-JEMALLOC_INLINE int
-get_errno(void)
-{
-
+/* Get last error code. */
+UTIL_INLINE int
+get_errno(void) {
#ifdef _WIN32
- return (GetLastError());
+ return GetLastError();
#else
- return (errno);
+ return errno;
#endif
}
-#endif
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#undef UTIL_INLINE
+
+#endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/witness.h b/deps/jemalloc/include/jemalloc/internal/witness.h
new file mode 100644
index 000000000..7ace8ae4a
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/witness.h
@@ -0,0 +1,346 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_H
+#define JEMALLOC_INTERNAL_WITNESS_H
+
+#include "jemalloc/internal/ql.h"
+
+/******************************************************************************/
+/* LOCK RANKS */
+/******************************************************************************/
+
+/*
+ * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness
+ * machinery.
+ */
+
+#define WITNESS_RANK_OMIT 0U
+
+#define WITNESS_RANK_MIN 1U
+
+#define WITNESS_RANK_INIT 1U
+#define WITNESS_RANK_CTL 1U
+#define WITNESS_RANK_TCACHES 2U
+#define WITNESS_RANK_ARENAS 3U
+
+#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL 4U
+
+#define WITNESS_RANK_PROF_DUMP 5U
+#define WITNESS_RANK_PROF_BT2GCTX 6U
+#define WITNESS_RANK_PROF_TDATAS 7U
+#define WITNESS_RANK_PROF_TDATA 8U
+#define WITNESS_RANK_PROF_GCTX 9U
+
+#define WITNESS_RANK_BACKGROUND_THREAD 10U
+
+/*
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks. Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE 11U
+
+#define WITNESS_RANK_DECAY 11U
+#define WITNESS_RANK_TCACHE_QL 12U
+#define WITNESS_RANK_EXTENT_GROW 13U
+#define WITNESS_RANK_EXTENTS 14U
+#define WITNESS_RANK_EXTENT_AVAIL 15U
+
+#define WITNESS_RANK_EXTENT_POOL 16U
+#define WITNESS_RANK_RTREE 17U
+#define WITNESS_RANK_BASE 18U
+#define WITNESS_RANK_ARENA_LARGE 19U
+
+#define WITNESS_RANK_LEAF 0xffffffffU
+#define WITNESS_RANK_BIN WITNESS_RANK_LEAF
+#define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF
+#define WITNESS_RANK_DSS WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT WITNESS_RANK_LEAF
+
+/******************************************************************************/
+/* PER-WITNESS DATA */
+/******************************************************************************/
+#if defined(JEMALLOC_DEBUG)
+# define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#else
+# define WITNESS_INITIALIZER(name, rank)
+#endif
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
+ void *);
+
+struct witness_s {
+ /* Name, used for printing lock order reversal messages. */
+ const char *name;
+
+ /*
+ * Witness rank, where 0 is lowest and UINT_MAX is highest. Witnesses
+ * must be acquired in order of increasing rank.
+ */
+ witness_rank_t rank;
+
+ /*
+ * If two witnesses are of equal rank and they have the samp comp
+ * function pointer, it is called as a last attempt to differentiate
+ * between witnesses of equal rank.
+ */
+ witness_comp_t *comp;
+
+ /* Opaque data, passed to comp(). */
+ void *opaque;
+
+ /* Linkage for thread's currently owned locks. */
+ ql_elm(witness_t) link;
+};
+
+/******************************************************************************/
+/* PER-THREAD DATA */
+/******************************************************************************/
+typedef struct witness_tsd_s witness_tsd_t;
+struct witness_tsd_s {
+ witness_list_t witnesses;
+ bool forking;
+};
+
+#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
+
+/******************************************************************************/
+/* (PER-THREAD) NULLABILITY HELPERS */
+/******************************************************************************/
+typedef struct witness_tsdn_s witness_tsdn_t;
+struct witness_tsdn_s {
+ witness_tsd_t witness_tsd;
+};
+
+JEMALLOC_ALWAYS_INLINE witness_tsdn_t *
+witness_tsd_tsdn(witness_tsd_t *witness_tsd) {
+ return (witness_tsdn_t *)witness_tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+witness_tsdn_null(witness_tsdn_t *witness_tsdn) {
+ return witness_tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE witness_tsd_t *
+witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
+ assert(!witness_tsdn_null(witness_tsdn));
+ return &witness_tsdn->witness_tsd;
+}
+
+/******************************************************************************/
+/* API */
+/******************************************************************************/
+void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+ witness_comp_t *comp, void *opaque);
+
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
+
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
+
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
+
+typedef void (witness_depth_error_t)(const witness_list_t *,
+ witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
+
+void witnesses_cleanup(witness_tsd_t *witness_tsd);
+void witness_prefork(witness_tsd_t *witness_tsd);
+void witness_postfork_parent(witness_tsd_t *witness_tsd);
+void witness_postfork_child(witness_tsd_t *witness_tsd);
+
+/* Helper, not intended for direct use. */
+static inline bool
+witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
+ witness_list_t *witnesses;
+ witness_t *w;
+
+ cassert(config_debug);
+
+ witnesses = &witness_tsd->witnesses;
+ ql_foreach(w, witnesses, link) {
+ if (w == witness) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static inline void
+witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+ witness_tsd_t *witness_tsd;
+
+ if (!config_debug) {
+ return;
+ }
+
+ if (witness_tsdn_null(witness_tsdn)) {
+ return;
+ }
+ witness_tsd = witness_tsdn_tsd(witness_tsdn);
+ if (witness->rank == WITNESS_RANK_OMIT) {
+ return;
+ }
+
+ if (witness_owner(witness_tsd, witness)) {
+ return;
+ }
+ witness_owner_error(witness);
+}
+
+static inline void
+witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
+ const witness_t *witness) {
+ witness_tsd_t *witness_tsd;
+ witness_list_t *witnesses;
+ witness_t *w;
+
+ if (!config_debug) {
+ return;
+ }
+
+ if (witness_tsdn_null(witness_tsdn)) {
+ return;
+ }
+ witness_tsd = witness_tsdn_tsd(witness_tsdn);
+ if (witness->rank == WITNESS_RANK_OMIT) {
+ return;
+ }
+
+ witnesses = &witness_tsd->witnesses;
+ ql_foreach(w, witnesses, link) {
+ if (w == witness) {
+ witness_not_owner_error(witness);
+ }
+ }
+}
+
+static inline void
+witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
+ witness_rank_t rank_inclusive, unsigned depth) {
+ witness_tsd_t *witness_tsd;
+ unsigned d;
+ witness_list_t *witnesses;
+ witness_t *w;
+
+ if (!config_debug) {
+ return;
+ }
+
+ if (witness_tsdn_null(witness_tsdn)) {
+ return;
+ }
+ witness_tsd = witness_tsdn_tsd(witness_tsdn);
+
+ d = 0;
+ witnesses = &witness_tsd->witnesses;
+ w = ql_last(witnesses, link);
+ if (w != NULL) {
+ ql_reverse_foreach(w, witnesses, link) {
+ if (w->rank < rank_inclusive) {
+ break;
+ }
+ d++;
+ }
+ }
+ if (d != depth) {
+ witness_depth_error(witnesses, rank_inclusive, depth);
+ }
+}
+
+static inline void
+witness_assert_depth(witness_tsdn_t *witness_tsdn, unsigned depth) {
+ witness_assert_depth_to_rank(witness_tsdn, WITNESS_RANK_MIN, depth);
+}
+
+static inline void
+witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
+ witness_assert_depth(witness_tsdn, 0);
+}
+
+static inline void
+witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+ witness_tsd_t *witness_tsd;
+ witness_list_t *witnesses;
+ witness_t *w;
+
+ if (!config_debug) {
+ return;
+ }
+
+ if (witness_tsdn_null(witness_tsdn)) {
+ return;
+ }
+ witness_tsd = witness_tsdn_tsd(witness_tsdn);
+ if (witness->rank == WITNESS_RANK_OMIT) {
+ return;
+ }
+
+ witness_assert_not_owner(witness_tsdn, witness);
+
+ witnesses = &witness_tsd->witnesses;
+ w = ql_last(witnesses, link);
+ if (w == NULL) {
+ /* No other locks; do nothing. */
+ } else if (witness_tsd->forking && w->rank <= witness->rank) {
+ /* Forking, and relaxed ranking satisfied. */
+ } else if (w->rank > witness->rank) {
+ /* Not forking, rank order reversal. */
+ witness_lock_error(witnesses, witness);
+ } else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+ witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
+ 0)) {
+ /*
+ * Missing/incompatible comparison function, or comparison
+ * function indicates rank order reversal.
+ */
+ witness_lock_error(witnesses, witness);
+ }
+
+ ql_elm_new(witness, link);
+ ql_tail_insert(witnesses, witness, link);
+}
+
+static inline void
+witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+ witness_tsd_t *witness_tsd;
+ witness_list_t *witnesses;
+
+ if (!config_debug) {
+ return;
+ }
+
+ if (witness_tsdn_null(witness_tsdn)) {
+ return;
+ }
+ witness_tsd = witness_tsdn_tsd(witness_tsdn);
+ if (witness->rank == WITNESS_RANK_OMIT) {
+ return;
+ }
+
+ /*
+ * Check whether owner before removal, rather than relying on
+ * witness_assert_owner() to abort, so that unit tests can test this
+ * function's failure mode without causing undefined behavior.
+ */
+ if (witness_owner(witness_tsd, witness)) {
+ witnesses = &witness_tsd->witnesses;
+ ql_remove(witnesses, witness, link);
+ } else {
+ witness_assert_owner(witness_tsdn, witness);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_H */
diff --git a/deps/jemalloc/include/jemalloc/jemalloc.sh b/deps/jemalloc/include/jemalloc/jemalloc.sh
index e4738ebae..b19b1548b 100755
--- a/deps/jemalloc/include/jemalloc/jemalloc.sh
+++ b/deps/jemalloc/include/jemalloc/jemalloc.sh
@@ -4,7 +4,7 @@ objroot=$1
cat <<EOF
#ifndef JEMALLOC_H_
-#define JEMALLOC_H_
+#define JEMALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
@@ -12,17 +12,16 @@ extern "C" {
EOF
for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
- jemalloc_protos.h jemalloc_mangle.h ; do
+ jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do
cat "${objroot}include/jemalloc/${hdr}" \
| grep -v 'Generated from .* by configure\.' \
- | sed -e 's/^#define /#define /g' \
| sed -e 's/ $//g'
echo
done
cat <<EOF
#ifdef __cplusplus
-};
+}
#endif
#endif /* JEMALLOC_H_ */
EOF
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
index eb38d7105..6d89435c2 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -1,8 +1,14 @@
/* Defined if __attribute__((...)) syntax is supported. */
#undef JEMALLOC_HAVE_ATTR
-/* Support the experimental API. */
-#undef JEMALLOC_EXPERIMENTAL
+/* Defined if alloc_size attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+
+/* Defined if format(gnu_printf, ...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
+
+/* Defined if format(printf, ...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF
/*
* Define overrides for non-standard allocator-related functions if they are
@@ -20,5 +26,20 @@
*/
#undef JEMALLOC_USABLE_SIZE_CONST
+/*
+ * If defined, specify throw() for the public function prototypes when compiling
+ * with C++. The only justification for this is to match the prototypes that
+ * glibc defines.
+ */
+#undef JEMALLOC_USE_CXX_THROW
+
+#ifdef _MSC_VER
+# ifdef _WIN64
+# define LG_SIZEOF_PTR_WIN 3
+# else
+# define LG_SIZEOF_PTR_WIN 2
+# endif
+#endif
+
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#undef LG_SIZEOF_PTR
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
index 13dbdd912..daf9e571b 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in
@@ -1,61 +1,126 @@
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
#include <limits.h>
#include <strings.h>
-#define JEMALLOC_VERSION "@jemalloc_version@"
-#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
-#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
-#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
-#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
-#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+#define JEMALLOC_VERSION "@jemalloc_version@"
+#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
-# define MALLOCX_LG_ALIGN(la) (la)
-# if LG_SIZEOF_PTR == 2
-# define MALLOCX_ALIGN(a) (ffs(a)-1)
+#define MALLOCX_LG_ALIGN(la) ((int)(la))
+#if LG_SIZEOF_PTR == 2
+# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1))
+#else
+# define MALLOCX_ALIGN(a) \
+ ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \
+ ffs((int)(((size_t)(a))>>32))+31))
+#endif
+#define MALLOCX_ZERO ((int)0x40)
+/*
+ * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
+ * encodes MALLOCX_TCACHE_NONE.
+ */
+#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8))
+#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1)
+/*
+ * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
+ */
+#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20)
+
+/*
+ * Use as arena index in "arena.<i>.{purge,decay,dss}" and
+ * "stats.arenas.<i>.*" mallctl interfaces to select all arenas. This
+ * definition is intentionally specified in raw decimal format to support
+ * cpp-based string concatenation, e.g.
+ *
+ * #define STRINGIFY_HELPER(x) #x
+ * #define STRINGIFY(x) STRINGIFY_HELPER(x)
+ *
+ * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL,
+ * 0);
+ */
+#define MALLCTL_ARENAS_ALL 4096
+/*
+ * Use as arena index in "stats.arenas.<i>.*" mallctl interfaces to select
+ * destroyed arenas.
+ */
+#define MALLCTL_ARENAS_DESTROYED 4097
+
+#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
+# define JEMALLOC_CXX_THROW throw()
+#else
+# define JEMALLOC_CXX_THROW
+#endif
+
+#if defined(_MSC_VER)
+# define JEMALLOC_ATTR(s)
+# define JEMALLOC_ALIGNED(s) __declspec(align(s))
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
+# ifndef JEMALLOC_EXPORT
+# ifdef DLLEXPORT
+# define JEMALLOC_EXPORT __declspec(dllexport)
+# else
+# define JEMALLOC_EXPORT __declspec(dllimport)
+# endif
+# endif
+# define JEMALLOC_FORMAT_PRINTF(s, i)
+# define JEMALLOC_NOINLINE __declspec(noinline)
+# ifdef __cplusplus
+# define JEMALLOC_NOTHROW __declspec(nothrow)
# else
-# define MALLOCX_ALIGN(a) \
- ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+# define JEMALLOC_NOTHROW
# endif
-# define MALLOCX_ZERO ((int)0x40)
-/* Bias arena index bits so that 0 encodes "MALLOCX_ARENA() unspecified". */
-# define MALLOCX_ARENA(a) ((int)(((a)+1) << 8))
-
-#ifdef JEMALLOC_EXPERIMENTAL
-# define ALLOCM_LG_ALIGN(la) (la)
-# if LG_SIZEOF_PTR == 2
-# define ALLOCM_ALIGN(a) (ffs(a)-1)
+# define JEMALLOC_SECTION(s) __declspec(allocate(s))
+# define JEMALLOC_RESTRICT_RETURN __declspec(restrict)
+# if _MSC_VER >= 1900 && !defined(__EDG__)
+# define JEMALLOC_ALLOCATOR __declspec(allocator)
# else
-# define ALLOCM_ALIGN(a) \
- ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+# define JEMALLOC_ALLOCATOR
# endif
-# define ALLOCM_ZERO ((int)0x40)
-# define ALLOCM_NO_MOVE ((int)0x80)
-/* Bias arena index bits so that 0 encodes "ALLOCM_ARENA() unspecified". */
-# define ALLOCM_ARENA(a) ((int)(((a)+1) << 8))
-# define ALLOCM_SUCCESS 0
-# define ALLOCM_ERR_OOM 1
-# define ALLOCM_ERR_NOT_MOVED 2
-#endif
-
-#ifdef JEMALLOC_HAVE_ATTR
+#elif defined(JEMALLOC_HAVE_ATTR)
# define JEMALLOC_ATTR(s) __attribute__((s))
-# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
-# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
-# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
-#elif _MSC_VER
-# define JEMALLOC_ATTR(s)
-# ifdef DLLEXPORT
-# define JEMALLOC_EXPORT __declspec(dllexport)
+# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s))
+# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2))
# else
-# define JEMALLOC_EXPORT __declspec(dllimport)
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
# endif
-# define JEMALLOC_ALIGNED(s) __declspec(align(s))
-# define JEMALLOC_SECTION(s) __declspec(allocate(s))
-# define JEMALLOC_NOINLINE __declspec(noinline)
+# ifndef JEMALLOC_EXPORT
+# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+# endif
+# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
+# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
+# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
+# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i))
+# else
+# define JEMALLOC_FORMAT_PRINTF(s, i)
+# endif
+# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
+# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+# define JEMALLOC_RESTRICT_RETURN
+# define JEMALLOC_ALLOCATOR
#else
# define JEMALLOC_ATTR(s)
-# define JEMALLOC_EXPORT
# define JEMALLOC_ALIGNED(s)
-# define JEMALLOC_SECTION(s)
+# define JEMALLOC_ALLOC_SIZE(s)
+# define JEMALLOC_ALLOC_SIZE2(s1, s2)
+# define JEMALLOC_EXPORT
+# define JEMALLOC_FORMAT_PRINTF(s, i)
# define JEMALLOC_NOINLINE
+# define JEMALLOC_NOTHROW
+# define JEMALLOC_SECTION(s)
+# define JEMALLOC_RESTRICT_RETURN
+# define JEMALLOC_ALLOCATOR
#endif
+
+/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint()
+ * function. */
+#define JEMALLOC_FRAG_HINT
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh b/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh
index df328b78d..c675bb469 100755
--- a/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh
+++ b/deps/jemalloc/include/jemalloc/jemalloc_mangle.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -eu
public_symbols_txt=$1
symbol_prefix=$2
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in b/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
index 25446de3d..a78414b19 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_protos.h.in
@@ -7,52 +7,60 @@ extern JEMALLOC_EXPORT const char *@je_@malloc_conf;
extern JEMALLOC_EXPORT void (*@je_@malloc_message)(void *cbopaque,
const char *s);
-JEMALLOC_EXPORT void *@je_@malloc(size_t size) JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT void *@je_@calloc(size_t num, size_t size)
- JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT int @je_@posix_memalign(void **memptr, size_t alignment,
- size_t size) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT void *@je_@aligned_alloc(size_t alignment, size_t size)
- JEMALLOC_ATTR(malloc);
-JEMALLOC_EXPORT void *@je_@realloc(void *ptr, size_t size);
-JEMALLOC_EXPORT void @je_@free(void *ptr);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@malloc(size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@calloc(size_t num, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@posix_memalign(void **memptr,
+ size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@aligned_alloc(size_t alignment,
+ size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc)
+ JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@realloc(void *ptr, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@free(void *ptr)
+ JEMALLOC_CXX_THROW;
-JEMALLOC_EXPORT void *@je_@mallocx(size_t size, int flags);
-JEMALLOC_EXPORT void *@je_@rallocx(void *ptr, size_t size, int flags);
-JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra,
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@mallocx(size_t size, int flags)
+ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@rallocx(void *ptr, size_t size,
+ int flags) JEMALLOC_ALLOC_SIZE(2);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@xallocx(void *ptr, size_t size,
+ size_t extra, int flags);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@sallocx(const void *ptr,
+ int flags) JEMALLOC_ATTR(pure);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@dallocx(void *ptr, int flags);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@sdallocx(void *ptr, size_t size,
int flags);
-JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags);
-JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags);
-JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@nallocx(size_t size, int flags)
+ JEMALLOC_ATTR(pure);
-JEMALLOC_EXPORT int @je_@mallctl(const char *name, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen);
-JEMALLOC_EXPORT int @je_@mallctlnametomib(const char *name, size_t *mibp,
- size_t *miblenp);
-JEMALLOC_EXPORT int @je_@mallctlbymib(const size_t *mib, size_t miblen,
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctl(const char *name,
void *oldp, size_t *oldlenp, void *newp, size_t newlen);
-JEMALLOC_EXPORT void @je_@malloc_stats_print(void (*write_cb)(void *,
- const char *), void *@je_@cbopaque, const char *opts);
-JEMALLOC_EXPORT size_t @je_@malloc_usable_size(
- JEMALLOC_USABLE_SIZE_CONST void *ptr);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlnametomib(const char *name,
+ size_t *mibp, size_t *miblenp);
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW @je_@mallctlbymib(const size_t *mib,
+ size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW @je_@malloc_stats_print(
+ void (*write_cb)(void *, const char *), void *@je_@cbopaque,
+ const char *opts);
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @je_@malloc_usable_size(
+ JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW;
#ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_EXPORT void * @je_@memalign(size_t alignment, size_t size)
- JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@memalign(size_t alignment, size_t size)
+ JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc);
#endif
#ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_EXPORT void * @je_@valloc(size_t size) JEMALLOC_ATTR(malloc);
-#endif
-
-#ifdef JEMALLOC_EXPERIMENTAL
-JEMALLOC_EXPORT int @je_@allocm(void **ptr, size_t *rsize, size_t size,
- int flags) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@rallocm(void **ptr, size_t *rsize, size_t size,
- size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@sallocm(const void *ptr, size_t *rsize, int flags)
- JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@dallocm(void *ptr, int flags)
- JEMALLOC_ATTR(nonnull(1));
-JEMALLOC_EXPORT int @je_@nallocm(size_t *rsize, size_t size, int flags);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+ void JEMALLOC_NOTHROW *@je_@valloc(size_t size) JEMALLOC_CXX_THROW
+ JEMALLOC_ATTR(malloc);
#endif
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in
new file mode 100644
index 000000000..1a5887430
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/jemalloc_typedefs.h.in
@@ -0,0 +1,77 @@
+typedef struct extent_hooks_s extent_hooks_t;
+
+/*
+ * void *
+ * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+ * size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+ */
+typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *,
+ bool *, unsigned);
+
+/*
+ * bool
+ * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * bool committed, unsigned arena_ind);
+ */
+typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool,
+ unsigned);
+
+/*
+ * void
+ * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * bool committed, unsigned arena_ind);
+ */
+typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool,
+ unsigned);
+
+/*
+ * bool
+ * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * size_t offset, size_t length, unsigned arena_ind);
+ */
+typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+ unsigned);
+
+/*
+ * bool
+ * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * size_t offset, size_t length, unsigned arena_ind);
+ */
+typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t,
+ size_t, unsigned);
+
+/*
+ * bool
+ * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * size_t offset, size_t length, unsigned arena_ind);
+ */
+typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+ unsigned);
+
+/*
+ * bool
+ * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ * size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
+ */
+typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+ bool, unsigned);
+
+/*
+ * bool
+ * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+ * void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
+ */
+typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t,
+ bool, unsigned);
+
+struct extent_hooks_s {
+ extent_alloc_t *alloc;
+ extent_dalloc_t *dalloc;
+ extent_destroy_t *destroy;
+ extent_commit_t *commit;
+ extent_decommit_t *decommit;
+ extent_purge_t *purge_lazy;
+ extent_purge_t *purge_forced;
+ extent_split_t *split;
+ extent_merge_t *merge;
+};
diff --git a/deps/jemalloc/include/msvc_compat/stdbool.h b/deps/jemalloc/include/msvc_compat/C99/stdbool.h
index da9ee8b80..d92160ebc 100644
--- a/deps/jemalloc/include/msvc_compat/stdbool.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdbool.h
@@ -5,7 +5,11 @@
/* MSVC doesn't define _Bool or bool in C, but does have BOOL */
/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */
+/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as
+ * a built-in type. */
+#ifndef __clang__
typedef BOOL _Bool;
+#endif
#define bool _Bool
#define true 1
diff --git a/deps/jemalloc/include/msvc_compat/stdint.h b/deps/jemalloc/include/msvc_compat/C99/stdint.h
index d02608a59..d02608a59 100644
--- a/deps/jemalloc/include/msvc_compat/stdint.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdint.h
diff --git a/deps/jemalloc/include/msvc_compat/inttypes.h b/deps/jemalloc/include/msvc_compat/inttypes.h
deleted file mode 100644
index a4e6b75cb..000000000
--- a/deps/jemalloc/include/msvc_compat/inttypes.h
+++ /dev/null
@@ -1,313 +0,0 @@
-// ISO C9x compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-// Copyright (c) 2006 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// 1. Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. The name of the author may be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_INTTYPES_H_ // [
-#define _MSC_INTTYPES_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#include "stdint.h"
-
-// 7.8 Format conversion of integer types
-
-typedef struct {
- intmax_t quot;
- intmax_t rem;
-} imaxdiv_t;
-
-// 7.8.1 Macros for format specifiers
-
-#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198
-
-#ifdef _WIN64
-# define __PRI64_PREFIX "l"
-# define __PRIPTR_PREFIX "l"
-#else
-# define __PRI64_PREFIX "ll"
-# define __PRIPTR_PREFIX
-#endif
-
-// The fprintf macros for signed integers are:
-#define PRId8 "d"
-#define PRIi8 "i"
-#define PRIdLEAST8 "d"
-#define PRIiLEAST8 "i"
-#define PRIdFAST8 "d"
-#define PRIiFAST8 "i"
-
-#define PRId16 "hd"
-#define PRIi16 "hi"
-#define PRIdLEAST16 "hd"
-#define PRIiLEAST16 "hi"
-#define PRIdFAST16 "hd"
-#define PRIiFAST16 "hi"
-
-#define PRId32 "d"
-#define PRIi32 "i"
-#define PRIdLEAST32 "d"
-#define PRIiLEAST32 "i"
-#define PRIdFAST32 "d"
-#define PRIiFAST32 "i"
-
-#define PRId64 __PRI64_PREFIX "d"
-#define PRIi64 __PRI64_PREFIX "i"
-#define PRIdLEAST64 __PRI64_PREFIX "d"
-#define PRIiLEAST64 __PRI64_PREFIX "i"
-#define PRIdFAST64 __PRI64_PREFIX "d"
-#define PRIiFAST64 __PRI64_PREFIX "i"
-
-#define PRIdMAX __PRI64_PREFIX "d"
-#define PRIiMAX __PRI64_PREFIX "i"
-
-#define PRIdPTR __PRIPTR_PREFIX "d"
-#define PRIiPTR __PRIPTR_PREFIX "i"
-
-// The fprintf macros for unsigned integers are:
-#define PRIo8 "o"
-#define PRIu8 "u"
-#define PRIx8 "x"
-#define PRIX8 "X"
-#define PRIoLEAST8 "o"
-#define PRIuLEAST8 "u"
-#define PRIxLEAST8 "x"
-#define PRIXLEAST8 "X"
-#define PRIoFAST8 "o"
-#define PRIuFAST8 "u"
-#define PRIxFAST8 "x"
-#define PRIXFAST8 "X"
-
-#define PRIo16 "ho"
-#define PRIu16 "hu"
-#define PRIx16 "hx"
-#define PRIX16 "hX"
-#define PRIoLEAST16 "ho"
-#define PRIuLEAST16 "hu"
-#define PRIxLEAST16 "hx"
-#define PRIXLEAST16 "hX"
-#define PRIoFAST16 "ho"
-#define PRIuFAST16 "hu"
-#define PRIxFAST16 "hx"
-#define PRIXFAST16 "hX"
-
-#define PRIo32 "o"
-#define PRIu32 "u"
-#define PRIx32 "x"
-#define PRIX32 "X"
-#define PRIoLEAST32 "o"
-#define PRIuLEAST32 "u"
-#define PRIxLEAST32 "x"
-#define PRIXLEAST32 "X"
-#define PRIoFAST32 "o"
-#define PRIuFAST32 "u"
-#define PRIxFAST32 "x"
-#define PRIXFAST32 "X"
-
-#define PRIo64 __PRI64_PREFIX "o"
-#define PRIu64 __PRI64_PREFIX "u"
-#define PRIx64 __PRI64_PREFIX "x"
-#define PRIX64 __PRI64_PREFIX "X"
-#define PRIoLEAST64 __PRI64_PREFIX "o"
-#define PRIuLEAST64 __PRI64_PREFIX "u"
-#define PRIxLEAST64 __PRI64_PREFIX "x"
-#define PRIXLEAST64 __PRI64_PREFIX "X"
-#define PRIoFAST64 __PRI64_PREFIX "o"
-#define PRIuFAST64 __PRI64_PREFIX "u"
-#define PRIxFAST64 __PRI64_PREFIX "x"
-#define PRIXFAST64 __PRI64_PREFIX "X"
-
-#define PRIoMAX __PRI64_PREFIX "o"
-#define PRIuMAX __PRI64_PREFIX "u"
-#define PRIxMAX __PRI64_PREFIX "x"
-#define PRIXMAX __PRI64_PREFIX "X"
-
-#define PRIoPTR __PRIPTR_PREFIX "o"
-#define PRIuPTR __PRIPTR_PREFIX "u"
-#define PRIxPTR __PRIPTR_PREFIX "x"
-#define PRIXPTR __PRIPTR_PREFIX "X"
-
-// The fscanf macros for signed integers are:
-#define SCNd8 "d"
-#define SCNi8 "i"
-#define SCNdLEAST8 "d"
-#define SCNiLEAST8 "i"
-#define SCNdFAST8 "d"
-#define SCNiFAST8 "i"
-
-#define SCNd16 "hd"
-#define SCNi16 "hi"
-#define SCNdLEAST16 "hd"
-#define SCNiLEAST16 "hi"
-#define SCNdFAST16 "hd"
-#define SCNiFAST16 "hi"
-
-#define SCNd32 "ld"
-#define SCNi32 "li"
-#define SCNdLEAST32 "ld"
-#define SCNiLEAST32 "li"
-#define SCNdFAST32 "ld"
-#define SCNiFAST32 "li"
-
-#define SCNd64 "I64d"
-#define SCNi64 "I64i"
-#define SCNdLEAST64 "I64d"
-#define SCNiLEAST64 "I64i"
-#define SCNdFAST64 "I64d"
-#define SCNiFAST64 "I64i"
-
-#define SCNdMAX "I64d"
-#define SCNiMAX "I64i"
-
-#ifdef _WIN64 // [
-# define SCNdPTR "I64d"
-# define SCNiPTR "I64i"
-#else // _WIN64 ][
-# define SCNdPTR "ld"
-# define SCNiPTR "li"
-#endif // _WIN64 ]
-
-// The fscanf macros for unsigned integers are:
-#define SCNo8 "o"
-#define SCNu8 "u"
-#define SCNx8 "x"
-#define SCNX8 "X"
-#define SCNoLEAST8 "o"
-#define SCNuLEAST8 "u"
-#define SCNxLEAST8 "x"
-#define SCNXLEAST8 "X"
-#define SCNoFAST8 "o"
-#define SCNuFAST8 "u"
-#define SCNxFAST8 "x"
-#define SCNXFAST8 "X"
-
-#define SCNo16 "ho"
-#define SCNu16 "hu"
-#define SCNx16 "hx"
-#define SCNX16 "hX"
-#define SCNoLEAST16 "ho"
-#define SCNuLEAST16 "hu"
-#define SCNxLEAST16 "hx"
-#define SCNXLEAST16 "hX"
-#define SCNoFAST16 "ho"
-#define SCNuFAST16 "hu"
-#define SCNxFAST16 "hx"
-#define SCNXFAST16 "hX"
-
-#define SCNo32 "lo"
-#define SCNu32 "lu"
-#define SCNx32 "lx"
-#define SCNX32 "lX"
-#define SCNoLEAST32 "lo"
-#define SCNuLEAST32 "lu"
-#define SCNxLEAST32 "lx"
-#define SCNXLEAST32 "lX"
-#define SCNoFAST32 "lo"
-#define SCNuFAST32 "lu"
-#define SCNxFAST32 "lx"
-#define SCNXFAST32 "lX"
-
-#define SCNo64 "I64o"
-#define SCNu64 "I64u"
-#define SCNx64 "I64x"
-#define SCNX64 "I64X"
-#define SCNoLEAST64 "I64o"
-#define SCNuLEAST64 "I64u"
-#define SCNxLEAST64 "I64x"
-#define SCNXLEAST64 "I64X"
-#define SCNoFAST64 "I64o"
-#define SCNuFAST64 "I64u"
-#define SCNxFAST64 "I64x"
-#define SCNXFAST64 "I64X"
-
-#define SCNoMAX "I64o"
-#define SCNuMAX "I64u"
-#define SCNxMAX "I64x"
-#define SCNXMAX "I64X"
-
-#ifdef _WIN64 // [
-# define SCNoPTR "I64o"
-# define SCNuPTR "I64u"
-# define SCNxPTR "I64x"
-# define SCNXPTR "I64X"
-#else // _WIN64 ][
-# define SCNoPTR "lo"
-# define SCNuPTR "lu"
-# define SCNxPTR "lx"
-# define SCNXPTR "lX"
-#endif // _WIN64 ]
-
-#endif // __STDC_FORMAT_MACROS ]
-
-// 7.8.2 Functions for greatest-width integer types
-
-// 7.8.2.1 The imaxabs function
-#define imaxabs _abs64
-
-// 7.8.2.2 The imaxdiv function
-
-// This is modified version of div() function from Microsoft's div.c found
-// in %MSVC.NET%\crt\src\div.c
-#ifdef STATIC_IMAXDIV // [
-static
-#else // STATIC_IMAXDIV ][
-_inline
-#endif // STATIC_IMAXDIV ]
-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
-{
- imaxdiv_t result;
-
- result.quot = numer / denom;
- result.rem = numer % denom;
-
- if (numer < 0 && result.rem > 0) {
- // did division wrong; must fix up
- ++result.quot;
- result.rem -= denom;
- }
-
- return result;
-}
-
-// 7.8.2.3 The strtoimax and strtoumax functions
-#define strtoimax _strtoi64
-#define strtoumax _strtoui64
-
-// 7.8.2.4 The wcstoimax and wcstoumax functions
-#define wcstoimax _wcstoi64
-#define wcstoumax _wcstoui64
-
-
-#endif // _MSC_INTTYPES_H_ ]
diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h
index c84975b6b..996f256ce 100644
--- a/deps/jemalloc/include/msvc_compat/strings.h
+++ b/deps/jemalloc/include/msvc_compat/strings.h
@@ -3,21 +3,56 @@
/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
* for both */
-#include <intrin.h>
-#pragma intrinsic(_BitScanForward)
-static __forceinline int ffsl(long x)
-{
+#ifdef _MSC_VER
+# include <intrin.h>
+# pragma intrinsic(_BitScanForward)
+static __forceinline int ffsl(long x) {
unsigned long i;
- if (_BitScanForward(&i, x))
- return (i + 1);
- return (0);
+ if (_BitScanForward(&i, x)) {
+ return i + 1;
+ }
+ return 0;
}
-static __forceinline int ffs(int x)
-{
+static __forceinline int ffs(int x) {
+ return ffsl(x);
+}
+
+# ifdef _M_X64
+# pragma intrinsic(_BitScanForward64)
+# endif
+
+static __forceinline int ffsll(unsigned __int64 x) {
+ unsigned long i;
+#ifdef _M_X64
+ if (_BitScanForward64(&i, x)) {
+ return i + 1;
+ }
+ return 0;
+#else
+// Fallback for 32-bit build where 64-bit version not available
+// assuming little endian
+ union {
+ unsigned __int64 ll;
+ unsigned long l[2];
+ } s;
- return (ffsl(x));
+ s.ll = x;
+
+ if (_BitScanForward(&i, s.l[0])) {
+ return i + 1;
+ } else if(_BitScanForward(&i, s.l[1])) {
+ return i + 33;
+ }
+ return 0;
+#endif
}
+#else
+# define ffsll(x) __builtin_ffsll(x)
+# define ffsl(x) __builtin_ffsl(x)
+# define ffs(x) __builtin_ffs(x)
#endif
+
+#endif /* strings_h */
diff --git a/deps/jemalloc/include/msvc_compat/windows_extra.h b/deps/jemalloc/include/msvc_compat/windows_extra.h
new file mode 100644
index 000000000..a6ebb9306
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/windows_extra.h
@@ -0,0 +1,6 @@
+#ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
+#define MSVC_COMPAT_WINDOWS_EXTRA_H
+
+#include <errno.h>
+
+#endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */
diff --git a/deps/jemalloc/jemalloc.pc.in b/deps/jemalloc/jemalloc.pc.in
new file mode 100644
index 000000000..c428a86dc
--- /dev/null
+++ b/deps/jemalloc/jemalloc.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+install_suffix=@install_suffix@
+
+Name: jemalloc
+Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
+URL: http://jemalloc.net/
+Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
+Cflags: -I${includedir}
+Libs: -L${libdir} -ljemalloc${install_suffix}
diff --git a/deps/jemalloc/m4/ax_cxx_compile_stdcxx.m4 b/deps/jemalloc/m4/ax_cxx_compile_stdcxx.m4
new file mode 100644
index 000000000..2c18e49c5
--- /dev/null
+++ b/deps/jemalloc/m4/ax_cxx_compile_stdcxx.m4
@@ -0,0 +1,562 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
+#
+# DESCRIPTION
+#
+# Check for baseline language coverage in the compiler for the specified
+# version of the C++ standard. If necessary, add switches to CXX and
+# CXXCPP to enable support. VERSION may be '11' (for the C++11 standard)
+# or '14' (for the C++14 standard).
+#
+# The second argument, if specified, indicates whether you insist on an
+# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
+# -std=c++11). If neither is specified, you get whatever works, with
+# preference for an extended mode.
+#
+# The third argument, if specified 'mandatory' or if left unspecified,
+# indicates that baseline support for the specified C++ standard is
+# required and that the macro should error out if no mode with that
+# support is found. If specified 'optional', then configuration proceeds
+# regardless, after defining HAVE_CXX${VERSION} if and only if a
+# supporting mode is found.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
+# Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
+# Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
+# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
+# Copyright (c) 2015 Paul Norman <penorman@mac.com>
+# Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 4
+
+dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
+dnl (serial version number 13).
+
+AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
+ m4_if([$1], [11], [],
+ [$1], [14], [],
+ [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])],
+ [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
+ m4_if([$2], [], [],
+ [$2], [ext], [],
+ [$2], [noext], [],
+ [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl
+ m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true],
+ [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true],
+ [$3], [optional], [ax_cxx_compile_cxx$1_required=false],
+ [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
+ AC_LANG_PUSH([C++])dnl
+ ac_success=no
+ AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
+ ax_cv_cxx_compile_cxx$1,
+ [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+ [ax_cv_cxx_compile_cxx$1=yes],
+ [ax_cv_cxx_compile_cxx$1=no])])
+ if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
+ ac_success=yes
+ fi
+
+ m4_if([$2], [noext], [], [dnl
+ if test x$ac_success = xno; then
+ for switch in -std=gnu++$1 -std=gnu++0x; do
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+ AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+ $cachevar,
+ [ac_save_CXX="$CXX"
+ CXX="$CXX $switch"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+ [eval $cachevar=yes],
+ [eval $cachevar=no])
+ CXX="$ac_save_CXX"])
+ if eval test x\$$cachevar = xyes; then
+ CXX="$CXX $switch"
+ if test -n "$CXXCPP" ; then
+ CXXCPP="$CXXCPP $switch"
+ fi
+ ac_success=yes
+ break
+ fi
+ done
+ fi])
+
+ m4_if([$2], [ext], [], [dnl
+ if test x$ac_success = xno; then
+ dnl HP's aCC needs +std=c++11 according to:
+ dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
+ dnl Cray's crayCC needs "-h std=c++11"
+ for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do
+ cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+ AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+ $cachevar,
+ [ac_save_CXX="$CXX"
+ CXX="$CXX $switch"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+ [eval $cachevar=yes],
+ [eval $cachevar=no])
+ CXX="$ac_save_CXX"])
+ if eval test x\$$cachevar = xyes; then
+ CXX="$CXX $switch"
+ if test -n "$CXXCPP" ; then
+ CXXCPP="$CXXCPP $switch"
+ fi
+ ac_success=yes
+ break
+ fi
+ done
+ fi])
+ AC_LANG_POP([C++])
+ if test x$ax_cxx_compile_cxx$1_required = xtrue; then
+ if test x$ac_success = xno; then
+ AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.])
+ fi
+ fi
+ if test x$ac_success = xno; then
+ HAVE_CXX$1=0
+ AC_MSG_NOTICE([No compiler with C++$1 support was found])
+ else
+ HAVE_CXX$1=1
+ AC_DEFINE(HAVE_CXX$1,1,
+ [define if the compiler supports basic C++$1 syntax])
+ fi
+ AC_SUBST(HAVE_CXX$1)
+])
+
+
+dnl Test body for checking C++11 support
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+)
+
+
+dnl Test body for checking C++14 support
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+ _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
+)
+
+
+dnl Tests for new features in C++11
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+ namespace test_static_assert
+ {
+
+ template <typename T>
+ struct check
+ {
+ static_assert(sizeof(int) <= sizeof(T), "not big enough");
+ };
+
+ }
+
+ namespace test_final_override
+ {
+
+ struct Base
+ {
+ virtual void f() {}
+ };
+
+ struct Derived : public Base
+ {
+ virtual void f() override {}
+ };
+
+ }
+
+ namespace test_double_right_angle_brackets
+ {
+
+ template < typename T >
+ struct check {};
+
+ typedef check<void> single_type;
+ typedef check<check<void>> double_type;
+ typedef check<check<check<void>>> triple_type;
+ typedef check<check<check<check<void>>>> quadruple_type;
+
+ }
+
+ namespace test_decltype
+ {
+
+ int
+ f()
+ {
+ int a = 1;
+ decltype(a) b = 2;
+ return a + b;
+ }
+
+ }
+
+ namespace test_type_deduction
+ {
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static const bool value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static const bool value = true;
+ };
+
+ template < typename T1, typename T2 >
+ auto
+ add(T1 a1, T2 a2) -> decltype(a1 + a2)
+ {
+ return a1 + a2;
+ }
+
+ int
+ test(const int c, volatile int v)
+ {
+ static_assert(is_same<int, decltype(0)>::value == true, "");
+ static_assert(is_same<int, decltype(c)>::value == false, "");
+ static_assert(is_same<int, decltype(v)>::value == false, "");
+ auto ac = c;
+ auto av = v;
+ auto sumi = ac + av + 'x';
+ auto sumf = ac + av + 1.0;
+ static_assert(is_same<int, decltype(ac)>::value == true, "");
+ static_assert(is_same<int, decltype(av)>::value == true, "");
+ static_assert(is_same<int, decltype(sumi)>::value == true, "");
+ static_assert(is_same<int, decltype(sumf)>::value == false, "");
+ static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+ return (sumf > 0.0) ? sumi : add(c, v);
+ }
+
+ }
+
+ namespace test_noexcept
+ {
+
+ int f() { return 0; }
+ int g() noexcept { return 0; }
+
+ static_assert(noexcept(f()) == false, "");
+ static_assert(noexcept(g()) == true, "");
+
+ }
+
+ namespace test_constexpr
+ {
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+ {
+ return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+ }
+
+ template < typename CharT >
+ unsigned long constexpr
+ strlen_c(const CharT *const s) noexcept
+ {
+ return strlen_c_r(s, 0UL);
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("1") == 1UL, "");
+ static_assert(strlen_c("example") == 7UL, "");
+ static_assert(strlen_c("another\0example") == 7UL, "");
+
+ }
+
+ namespace test_rvalue_references
+ {
+
+ template < int N >
+ struct answer
+ {
+ static constexpr int value = N;
+ };
+
+ answer<1> f(int&) { return answer<1>(); }
+ answer<2> f(const int&) { return answer<2>(); }
+ answer<3> f(int&&) { return answer<3>(); }
+
+ void
+ test()
+ {
+ int i = 0;
+ const int c = 0;
+ static_assert(decltype(f(i))::value == 1, "");
+ static_assert(decltype(f(c))::value == 2, "");
+ static_assert(decltype(f(0))::value == 3, "");
+ }
+
+ }
+
+ namespace test_uniform_initialization
+ {
+
+ struct test
+ {
+ static const int zero {};
+ static const int one {1};
+ };
+
+ static_assert(test::zero == 0, "");
+ static_assert(test::one == 1, "");
+
+ }
+
+ namespace test_lambdas
+ {
+
+ void
+ test1()
+ {
+ auto lambda1 = [](){};
+ auto lambda2 = lambda1;
+ lambda1();
+ lambda2();
+ }
+
+ int
+ test2()
+ {
+ auto a = [](int i, int j){ return i + j; }(1, 2);
+ auto b = []() -> int { return '0'; }();
+ auto c = [=](){ return a + b; }();
+ auto d = [&](){ return c; }();
+ auto e = [a, &b](int x) mutable {
+ const auto identity = [](int y){ return y; };
+ for (auto i = 0; i < a; ++i)
+ a += b--;
+ return x + identity(a + b);
+ }(0);
+ return a + b + c + d + e;
+ }
+
+ int
+ test3()
+ {
+ const auto nullary = [](){ return 0; };
+ const auto unary = [](int x){ return x; };
+ using nullary_t = decltype(nullary);
+ using unary_t = decltype(unary);
+ const auto higher1st = [](nullary_t f){ return f(); };
+ const auto higher2nd = [unary](nullary_t f1){
+ return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+ };
+ return higher1st(nullary) + higher2nd(nullary)(unary);
+ }
+
+ }
+
+ namespace test_variadic_templates
+ {
+
+ template <int...>
+ struct sum;
+
+ template <int N0, int... N1toN>
+ struct sum<N0, N1toN...>
+ {
+ static constexpr auto value = N0 + sum<N1toN...>::value;
+ };
+
+ template <>
+ struct sum<>
+ {
+ static constexpr auto value = 0;
+ };
+
+ static_assert(sum<>::value == 0, "");
+ static_assert(sum<1>::value == 1, "");
+ static_assert(sum<23>::value == 23, "");
+ static_assert(sum<1, 2>::value == 3, "");
+ static_assert(sum<5, 5, 11>::value == 21, "");
+ static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+ }
+
+ // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+ // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+ // because of this.
+ namespace test_template_alias_sfinae
+ {
+
+ struct foo {};
+
+ template<typename T>
+ using member = typename T::member_type;
+
+ template<typename T>
+ void func(...) {}
+
+ template<typename T>
+ void func(member<T>*) {}
+
+ void test();
+
+ void test() { func<foo>(0); }
+
+ }
+
+} // namespace cxx11
+
+#endif // __cplusplus >= 201103L
+
+]])
+
+
+dnl Tests for new features in C++14
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
+
+// If the compiler admits that it is not ready for C++14, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201402L
+
+#error "This is not a C++14 compiler"
+
+#else
+
+namespace cxx14
+{
+
+ namespace test_polymorphic_lambdas
+ {
+
+ int
+ test()
+ {
+ const auto lambda = [](auto&&... args){
+ const auto istiny = [](auto x){
+ return (sizeof(x) == 1UL) ? 1 : 0;
+ };
+ const int aretiny[] = { istiny(args)... };
+ return aretiny[0];
+ };
+ return lambda(1, 1L, 1.0f, '1');
+ }
+
+ }
+
+ namespace test_binary_literals
+ {
+
+ constexpr auto ivii = 0b0000000000101010;
+ static_assert(ivii == 42, "wrong value");
+
+ }
+
+ namespace test_generalized_constexpr
+ {
+
+ template < typename CharT >
+ constexpr unsigned long
+ strlen_c(const CharT *const s) noexcept
+ {
+ auto length = 0UL;
+ for (auto p = s; *p; ++p)
+ ++length;
+ return length;
+ }
+
+ static_assert(strlen_c("") == 0UL, "");
+ static_assert(strlen_c("x") == 1UL, "");
+ static_assert(strlen_c("test") == 4UL, "");
+ static_assert(strlen_c("another\0test") == 7UL, "");
+
+ }
+
+ namespace test_lambda_init_capture
+ {
+
+ int
+ test()
+ {
+ auto x = 0;
+ const auto lambda1 = [a = x](int b){ return a + b; };
+ const auto lambda2 = [a = lambda1(x)](){ return a; };
+ return lambda2();
+ }
+
+ }
+
+ namespace test_digit_seperators
+ {
+
+ constexpr auto ten_million = 100'000'000;
+ static_assert(ten_million == 100000000, "");
+
+ }
+
+ namespace test_return_type_deduction
+ {
+
+ auto f(int& x) { return x; }
+ decltype(auto) g(int& x) { return x; }
+
+ template < typename T1, typename T2 >
+ struct is_same
+ {
+ static constexpr auto value = false;
+ };
+
+ template < typename T >
+ struct is_same<T, T>
+ {
+ static constexpr auto value = true;
+ };
+
+ int
+ test()
+ {
+ auto x = 0;
+ static_assert(is_same<int, decltype(f(x))>::value, "");
+ static_assert(is_same<int&, decltype(g(x))>::value, "");
+ return x;
+ }
+
+ }
+
+} // namespace cxx14
+
+#endif // __cplusplus >= 201402L
+
+]])
diff --git a/deps/jemalloc/msvc/ReadMe.txt b/deps/jemalloc/msvc/ReadMe.txt
new file mode 100644
index 000000000..633a7d49f
--- /dev/null
+++ b/deps/jemalloc/msvc/ReadMe.txt
@@ -0,0 +1,23 @@
+
+How to build jemalloc for Windows
+=================================
+
+1. Install Cygwin with at least the following packages:
+ * autoconf
+ * autogen
+ * gawk
+ * grep
+ * sed
+
+2. Install Visual Studio 2015 or 2017 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "x64 Native Tools Command Prompt for VS 2017"
+ (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+ sh -c "CC=cl ./autogen.sh"
+
+6. Now the project can be opened and built in Visual Studio:
+ msvc\jemalloc_vc2017.sln
diff --git a/deps/jemalloc/msvc/jemalloc_vc2015.sln b/deps/jemalloc/msvc/jemalloc_vc2015.sln
new file mode 100644
index 000000000..aedd5e5ea
--- /dev/null
+++ b/deps/jemalloc/msvc/jemalloc_vc2015.sln
@@ -0,0 +1,63 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+ ProjectSection(SolutionItems) = preProject
+ ReadMe.txt = ReadMe.txt
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
+ Debug-static|x64 = Debug-static|x64
+ Debug-static|x86 = Debug-static|x86
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
+ Release-static|x64 = Release-static|x64
+ Release-static|x86 = Release-static|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/deps/jemalloc/msvc/jemalloc_vc2017.sln b/deps/jemalloc/msvc/jemalloc_vc2017.sln
new file mode 100644
index 000000000..c22fcb437
--- /dev/null
+++ b/deps/jemalloc/msvc/jemalloc_vc2017.sln
@@ -0,0 +1,63 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+ ProjectSection(SolutionItems) = preProject
+ ReadMe.txt = ReadMe.txt
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2017\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2017\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
+ Debug-static|x64 = Debug-static|x64
+ Debug-static|x86 = Debug-static|x86
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
+ Release-static|x64 = Release-static|x64
+ Release-static|x86 = Release-static|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+ {8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+ {09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
new file mode 100644
index 000000000..f7b175b0a
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,348 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug-static|Win32">
+ <Configuration>Debug-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug-static|x64">
+ <Configuration>Debug-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|Win32">
+ <Configuration>Release-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|x64">
+ <Configuration>Release-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\..\src\arena.c" />
+ <ClCompile Include="..\..\..\..\src\background_thread.c" />
+ <ClCompile Include="..\..\..\..\src\base.c" />
+ <ClCompile Include="..\..\..\..\src\bin.c" />
+ <ClCompile Include="..\..\..\..\src\bitmap.c" />
+ <ClCompile Include="..\..\..\..\src\ckh.c" />
+ <ClCompile Include="..\..\..\..\src\ctl.c" />
+ <ClCompile Include="..\..\..\..\src\div.c" />
+ <ClCompile Include="..\..\..\..\src\extent.c" />
+ <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+ <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+ <ClCompile Include="..\..\..\..\src\hash.c" />
+ <ClCompile Include="..\..\..\..\src\hooks.c" />
+ <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+ <ClCompile Include="..\..\..\..\src\large.c" />
+ <ClCompile Include="..\..\..\..\src\log.c" />
+ <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+ <ClCompile Include="..\..\..\..\src\mutex.c" />
+ <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
+ <ClCompile Include="..\..\..\..\src\nstime.c" />
+ <ClCompile Include="..\..\..\..\src\pages.c" />
+ <ClCompile Include="..\..\..\..\src\prng.c" />
+ <ClCompile Include="..\..\..\..\src\prof.c" />
+ <ClCompile Include="..\..\..\..\src\rtree.c" />
+ <ClCompile Include="..\..\..\..\src\stats.c" />
+ <ClCompile Include="..\..\..\..\src\sz.c" />
+ <ClCompile Include="..\..\..\..\src\tcache.c" />
+ <ClCompile Include="..\..\..\..\src\ticker.c" />
+ <ClCompile Include="..\..\..\..\src\tsd.c" />
+ <ClCompile Include="..\..\..\..\src\witness.c" />
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>jemalloc</RootNamespace>
+ <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="Shared">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)d</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)d</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ <MinimalRebuild>false</MinimalRebuild>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 000000000..11cfcd0be
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\..\src\arena.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\background_thread.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\base.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\bitmap.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ckh.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ctl.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent_dss.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\hash.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\hooks.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\jemalloc.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\large.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\malloc_io.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\mutex.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\mutex_pool.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\nstime.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\pages.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\prng.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\prof.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\rtree.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\stats.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\sz.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\tcache.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ticker.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\tsd.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\witness.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\log.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\bin.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\div.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj
new file mode 100644
index 000000000..325876d6e
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -0,0 +1,327 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug-static|Win32">
+ <Configuration>Debug-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug-static|x64">
+ <Configuration>Debug-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|Win32">
+ <Configuration>Release-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|x64">
+ <Configuration>Release-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>test_threads</RootNamespace>
+ <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v140</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="Shared">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+ <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+ <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 000000000..fa4588fd8
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+ <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\..\test_threads\test_threads.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
new file mode 100644
index 000000000..ed71de8a5
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,347 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug-static|Win32">
+ <Configuration>Debug-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug-static|x64">
+ <Configuration>Debug-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|Win32">
+ <Configuration>Release-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|x64">
+ <Configuration>Release-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\..\src\arena.c" />
+ <ClCompile Include="..\..\..\..\src\background_thread.c" />
+ <ClCompile Include="..\..\..\..\src\base.c" />
+ <ClCompile Include="..\..\..\..\src\bin.c" />
+ <ClCompile Include="..\..\..\..\src\bitmap.c" />
+ <ClCompile Include="..\..\..\..\src\ckh.c" />
+ <ClCompile Include="..\..\..\..\src\ctl.c" />
+ <ClCompile Include="..\..\..\..\src\div.c" />
+ <ClCompile Include="..\..\..\..\src\extent.c" />
+ <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+ <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+ <ClCompile Include="..\..\..\..\src\hash.c" />
+ <ClCompile Include="..\..\..\..\src\hooks.c" />
+ <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+ <ClCompile Include="..\..\..\..\src\large.c" />
+ <ClCompile Include="..\..\..\..\src\log.c" />
+ <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+ <ClCompile Include="..\..\..\..\src\mutex.c" />
+ <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
+ <ClCompile Include="..\..\..\..\src\nstime.c" />
+ <ClCompile Include="..\..\..\..\src\pages.c" />
+ <ClCompile Include="..\..\..\..\src\prng.c" />
+ <ClCompile Include="..\..\..\..\src\prof.c" />
+ <ClCompile Include="..\..\..\..\src\rtree.c" />
+ <ClCompile Include="..\..\..\..\src\stats.c" />
+ <ClCompile Include="..\..\..\..\src\sz.c" />
+ <ClCompile Include="..\..\..\..\src\tcache.c" />
+ <ClCompile Include="..\..\..\..\src\ticker.c" />
+ <ClCompile Include="..\..\..\..\src\tsd.c" />
+ <ClCompile Include="..\..\..\..\src\witness.c" />
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>jemalloc</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>DynamicLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+ <ConfigurationType>StaticLibrary</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="Shared">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)d</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)d</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ <MinimalRebuild>false</MinimalRebuild>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+ <DebugInformationFormat>OldStyle</DebugInformationFormat>
+ </ClCompile>
+ <Link>
+ <SubSystem>Windows</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 000000000..11cfcd0be
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\..\src\arena.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\background_thread.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\base.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\bitmap.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ckh.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ctl.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent_dss.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\hash.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\hooks.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\jemalloc.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\large.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\malloc_io.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\mutex.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\mutex_pool.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\nstime.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\pages.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\prng.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\prof.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\rtree.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\stats.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\sz.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\tcache.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\ticker.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\tsd.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\witness.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\log.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\bin.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\..\src\div.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj b/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj
new file mode 100644
index 000000000..c35b0f5aa
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug-static|Win32">
+ <Configuration>Debug-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug-static|x64">
+ <Configuration>Debug-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|Win32">
+ <Configuration>Release-static</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release-static|x64">
+ <Configuration>Release-static</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>test_threads</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="Shared">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>$(Platform)\$(Configuration)\</IntDir>
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+ <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+ <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+ <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters b/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 000000000..fa4588fd8
--- /dev/null
+++ b/deps/jemalloc/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+ <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\..\..\test_threads\test_threads.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/deps/jemalloc/msvc/test_threads/test_threads.cpp b/deps/jemalloc/msvc/test_threads/test_threads.cpp
new file mode 100644
index 000000000..92e316243
--- /dev/null
+++ b/deps/jemalloc/msvc/test_threads/test_threads.cpp
@@ -0,0 +1,88 @@
+// jemalloc C++ threaded test
+// Author: Rustam Abdullaev
+// Public Domain
+
+#include <atomic>
+#include <functional>
+#include <future>
+#include <random>
+#include <thread>
+#include <vector>
+#include <stdio.h>
+#include <jemalloc/jemalloc.h>
+
+using std::vector;
+using std::thread;
+using std::uniform_int_distribution;
+using std::minstd_rand;
+
+int test_threads() {
+ je_malloc_conf = "narenas:3";
+ int narenas = 0;
+ size_t sz = sizeof(narenas);
+ je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
+ if (narenas != 3) {
+ printf("Error: unexpected number of arenas: %d\n", narenas);
+ return 1;
+ }
+ static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
+ static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+ vector<thread> workers;
+ static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
+ je_malloc_stats_print(NULL, NULL, NULL);
+ size_t allocated1;
+ size_t sz1 = sizeof(allocated1);
+ je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
+ printf("\nPress Enter to start threads...\n");
+ getchar();
+ printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
+ for (int i = 0; i < numThreads; i++) {
+ workers.emplace_back([tid=i]() {
+ uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+ minstd_rand rnd(tid * 17);
+ uint8_t* ptrs[numAllocsMax];
+ int ptrsz[numAllocsMax];
+ for (int i = 0; i < numIter1; ++i) {
+ thread t([&]() {
+ for (int i = 0; i < numIter2; ++i) {
+ const int numAllocs = numAllocsMax - sizeDist(rnd);
+ for (int j = 0; j < numAllocs; j += 64) {
+ const int x = sizeDist(rnd);
+ const int sz = sizes[x];
+ ptrsz[j] = sz;
+ ptrs[j] = (uint8_t*)je_malloc(sz);
+ if (!ptrs[j]) {
+ printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
+ exit(1);
+ }
+ for (int k = 0; k < sz; k++)
+ ptrs[j][k] = tid + k;
+ }
+ for (int j = 0; j < numAllocs; j += 64) {
+ for (int k = 0, sz = ptrsz[j]; k < sz; k++)
+ if (ptrs[j][k] != (uint8_t)(tid + k)) {
+ printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+ exit(1);
+ }
+ je_free(ptrs[j]);
+ }
+ }
+ });
+ t.join();
+ }
+ });
+ }
+ for (thread& t : workers) {
+ t.join();
+ }
+ je_malloc_stats_print(NULL, NULL, NULL);
+ size_t allocated2;
+ je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
+ size_t leaked = allocated2 - allocated1;
+ printf("\nDone. Leaked: %zd bytes\n", leaked);
+ bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+ printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+ printf("\nPress Enter to continue...\n");
+ getchar();
+ return failed ? 1 : 0;
+}
diff --git a/deps/jemalloc/msvc/test_threads/test_threads.h b/deps/jemalloc/msvc/test_threads/test_threads.h
new file mode 100644
index 000000000..64d0cdb33
--- /dev/null
+++ b/deps/jemalloc/msvc/test_threads/test_threads.h
@@ -0,0 +1,3 @@
+#pragma once
+
+int test_threads();
diff --git a/deps/jemalloc/msvc/test_threads/test_threads_main.cpp b/deps/jemalloc/msvc/test_threads/test_threads_main.cpp
new file mode 100644
index 000000000..0a022fba4
--- /dev/null
+++ b/deps/jemalloc/msvc/test_threads/test_threads_main.cpp
@@ -0,0 +1,11 @@
+#include "test_threads.h"
+#include <future>
+#include <functional>
+#include <chrono>
+
+using namespace std::chrono_literals;
+
+int main(int argc, char** argv) {
+ int rc = test_threads();
+ return rc;
+}
diff --git a/deps/jemalloc/run_tests.sh b/deps/jemalloc/run_tests.sh
new file mode 100755
index 000000000..b434f15b3
--- /dev/null
+++ b/deps/jemalloc/run_tests.sh
@@ -0,0 +1 @@
+$(dirname "$)")/scripts/gen_run_tests.py | bash
diff --git a/deps/jemalloc/scripts/gen_run_tests.py b/deps/jemalloc/scripts/gen_run_tests.py
new file mode 100755
index 000000000..a87ecffba
--- /dev/null
+++ b/deps/jemalloc/scripts/gen_run_tests.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+import sys
+from itertools import combinations
+from os import uname
+from multiprocessing import cpu_count
+
+# Later, we want to test extended vaddr support. Apparently, the "real" way of
+# checking this is flaky on OS X.
+bits_64 = sys.maxsize > 2**32
+
+nparallel = cpu_count() * 2
+
+uname = uname()[0]
+
+def powerset(items):
+ result = []
+ for i in xrange(len(items) + 1):
+ result += combinations(items, i)
+ return result
+
+possible_compilers = [('gcc', 'g++'), ('clang', 'clang++')]
+possible_compiler_opts = [
+ '-m32',
+]
+possible_config_opts = [
+ '--enable-debug',
+ '--enable-prof',
+ '--disable-stats',
+]
+if bits_64:
+ possible_config_opts.append('--with-lg-vaddr=56')
+
+possible_malloc_conf_opts = [
+ 'tcache:false',
+ 'dss:primary',
+ 'percpu_arena:percpu',
+ 'background_thread:true',
+]
+
+print 'set -e'
+print 'if [ -f Makefile ] ; then make relclean ; fi'
+print 'autoconf'
+print 'rm -rf run_tests.out'
+print 'mkdir run_tests.out'
+print 'cd run_tests.out'
+
+ind = 0
+for cc, cxx in possible_compilers:
+ for compiler_opts in powerset(possible_compiler_opts):
+ for config_opts in powerset(possible_config_opts):
+ for malloc_conf_opts in powerset(possible_malloc_conf_opts):
+ if cc is 'clang' \
+ and '-m32' in possible_compiler_opts \
+ and '--enable-prof' in config_opts:
+ continue
+ config_line = (
+ 'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror '
+ + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
+ + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
+ + '../../configure '
+ + " ".join(config_opts) + (' --with-malloc-conf=' +
+ ",".join(malloc_conf_opts) if len(malloc_conf_opts) > 0
+ else '')
+ )
+
+ # We don't want to test large vaddr spaces in 32-bit mode.
+ if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
+ config_opts):
+ continue
+
+ # Per CPU arenas are only supported on Linux.
+ linux_supported = ('percpu_arena:percpu' in malloc_conf_opts \
+ or 'background_thread:true' in malloc_conf_opts)
+ # Heap profiling and dss are not supported on OS X.
+ darwin_unsupported = ('--enable-prof' in config_opts or \
+ 'dss:primary' in malloc_conf_opts)
+ if (uname == 'Linux' and linux_supported) \
+ or (not linux_supported and (uname != 'Darwin' or \
+ not darwin_unsupported)):
+ print """cat <<EOF > run_test_%(ind)d.sh
+#!/bin/sh
+
+set -e
+
+abort() {
+ echo "==> Error" >> run_test.log
+ echo "Error; see run_tests.out/run_test_%(ind)d.out/run_test.log"
+ exit 255 # Special exit code tells xargs to terminate.
+}
+
+# Environment variables are not supported.
+run_cmd() {
+ echo "==> \$@" >> run_test.log
+ \$@ >> run_test.log 2>&1 || abort
+}
+
+echo "=> run_test_%(ind)d: %(config_line)s"
+mkdir run_test_%(ind)d.out
+cd run_test_%(ind)d.out
+
+echo "==> %(config_line)s" >> run_test.log
+%(config_line)s >> run_test.log 2>&1 || abort
+
+run_cmd make all tests
+run_cmd make check
+run_cmd make distclean
+EOF
+chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line}
+ ind += 1
+
+print 'for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel}
diff --git a/deps/jemalloc/scripts/gen_travis.py b/deps/jemalloc/scripts/gen_travis.py
new file mode 100755
index 000000000..6dd39290c
--- /dev/null
+++ b/deps/jemalloc/scripts/gen_travis.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+from itertools import combinations
+
+travis_template = """\
+language: generic
+
+matrix:
+ include:
+%s
+
+before_script:
+ - autoconf
+ - ./configure ${COMPILER_FLAGS:+ \
+ CC="$CC $COMPILER_FLAGS" \
+ CXX="$CXX $COMPILER_FLAGS" } \
+ $CONFIGURE_FLAGS
+ - make -j3
+ - make -j3 tests
+
+script:
+ - make check
+"""
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags. We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --with-malloc-conf=tcache:false. To avoid abusing
+# travis though, we don't test all 2**7 = 128 possible combinations of these;
+# instead, we only test combinations of up to 2 'unusual' settings, under the
+# hope that bugs involving interactions of such settings are rare.
+# Things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
+MAX_UNUSUAL_OPTIONS = 2
+
+os_default = 'linux'
+os_unusual = 'osx'
+
+compilers_default = 'CC=gcc CXX=g++'
+compilers_unusual = 'CC=clang CXX=clang++'
+
+compiler_flag_unusuals = ['-m32']
+
+configure_flag_unusuals = [
+ '--enable-debug',
+ '--enable-prof',
+ '--disable-stats',
+]
+
+malloc_conf_unusuals = [
+ 'tcache:false',
+ 'dss:primary',
+ 'percpu_arena:percpu',
+ 'background_thread:true',
+]
+
+all_unusuals = (
+ [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
+ + configure_flag_unusuals + malloc_conf_unusuals
+)
+
+unusual_combinations_to_test = []
+for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
+ unusual_combinations_to_test += combinations(all_unusuals, i)
+
+include_rows = ""
+for unusual_combination in unusual_combinations_to_test:
+ os = os_default
+ if os_unusual in unusual_combination:
+ os = os_unusual
+
+ compilers = compilers_default
+ if compilers_unusual in unusual_combination:
+ compilers = compilers_unusual
+
+ compiler_flags = [
+ x for x in unusual_combination if x in compiler_flag_unusuals]
+
+ configure_flags = [
+ x for x in unusual_combination if x in configure_flag_unusuals]
+
+ malloc_conf = [
+ x for x in unusual_combination if x in malloc_conf_unusuals]
+ # Filter out unsupported configurations on OS X.
+ if os == 'osx' and ('dss:primary' in malloc_conf or \
+ 'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
+ in malloc_conf):
+ continue
+ if len(malloc_conf) > 0:
+ configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
+
+ # Filter out an unsupported configuration - heap profiling on OS X.
+ if os == 'osx' and '--enable-prof' in configure_flags:
+ continue
+
+ # We get some spurious errors when -Warray-bounds is enabled.
+ env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
+ 'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
+ compilers, " ".join(compiler_flags), " ".join(configure_flags))
+
+ include_rows += ' - os: %s\n' % os
+ include_rows += ' env: %s\n' % env_string
+ if '-m32' in unusual_combination and os == 'linux':
+ include_rows += ' addons:\n'
+ include_rows += ' apt:\n'
+ include_rows += ' packages:\n'
+ include_rows += ' - gcc-multilib\n'
+
+print travis_template % include_rows
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c
index dad707b63..5d55bf1a0 100644
--- a/deps/jemalloc/src/arena.c
+++ b/deps/jemalloc/src/arena.c
@@ -1,2577 +1,2043 @@
-#define JEMALLOC_ARENA_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_ARENA_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/div.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/util.h"
/******************************************************************************/
/* Data. */
-ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
-arena_bin_info_t arena_bin_info[NBINS];
-
-JEMALLOC_ALIGNED(CACHELINE)
-const uint8_t small_size2bin[] = {
-#define S2B_8(i) i,
-#define S2B_16(i) S2B_8(i) S2B_8(i)
-#define S2B_32(i) S2B_16(i) S2B_16(i)
-#define S2B_64(i) S2B_32(i) S2B_32(i)
-#define S2B_128(i) S2B_64(i) S2B_64(i)
-#define S2B_256(i) S2B_128(i) S2B_128(i)
-#define S2B_512(i) S2B_256(i) S2B_256(i)
-#define S2B_1024(i) S2B_512(i) S2B_512(i)
-#define S2B_2048(i) S2B_1024(i) S2B_1024(i)
-#define S2B_4096(i) S2B_2048(i) S2B_2048(i)
-#define S2B_8192(i) S2B_4096(i) S2B_4096(i)
-#define SIZE_CLASS(bin, delta, size) \
- S2B_##delta(bin)
- SIZE_CLASSES
-#undef S2B_8
-#undef S2B_16
-#undef S2B_32
-#undef S2B_64
-#undef S2B_128
-#undef S2B_256
-#undef S2B_512
-#undef S2B_1024
-#undef S2B_2048
-#undef S2B_4096
-#undef S2B_8192
-#undef SIZE_CLASS
+/*
+ * Define names for both unininitialized and initialized phases, so that
+ * options and mallctl processing are straightforward.
+ */
+const char *percpu_arena_mode_names[] = {
+ "percpu",
+ "phycpu",
+ "disabled",
+ "percpu",
+ "phycpu"
+};
+percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
+
+ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
+ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
+
+static atomic_zd_t dirty_decay_ms_default;
+static atomic_zd_t muzzy_decay_ms_default;
+
+const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+#define STEP(step, h, x, y) \
+ h,
+ SMOOTHSTEP
+#undef STEP
};
+static div_info_t arena_binind_div_info[NBINS];
+
/******************************************************************************/
/*
* Function prototypes for static functions that are referenced prior to
* definition.
*/
-static void arena_purge(arena_t *arena, bool all);
-static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
- bool cleaned);
-static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
- arena_run_t *run, arena_bin_t *bin);
-static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
- arena_run_t *run, arena_bin_t *bin);
+static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
+ arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
+ size_t npages_decay_max, bool is_background_thread);
+static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
+ bool is_background_thread, bool all);
+static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+ bin_t *bin);
+static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+ bin_t *bin);
/******************************************************************************/
-static inline int
-arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
-{
- uintptr_t a_mapelm = (uintptr_t)a;
- uintptr_t b_mapelm = (uintptr_t)b;
-
- assert(a != NULL);
- assert(b != NULL);
-
- return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
+void
+arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+ const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+ size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
+ *nthreads += arena_nthreads_get(arena, false);
+ *dss = dss_prec_names[arena_dss_prec_get(arena)];
+ *dirty_decay_ms = arena_dirty_decay_ms_get(arena);
+ *muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
+ *nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
+ *ndirty += extents_npages_get(&arena->extents_dirty);
+ *nmuzzy += extents_npages_get(&arena->extents_muzzy);
}
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
- u.rb_link, arena_run_comp)
-
-static inline int
-arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
-{
- int ret;
- size_t a_size = a->bits & ~PAGE_MASK;
- size_t b_size = b->bits & ~PAGE_MASK;
-
- ret = (a_size > b_size) - (a_size < b_size);
- if (ret == 0) {
- uintptr_t a_mapelm, b_mapelm;
-
- if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
- a_mapelm = (uintptr_t)a;
- else {
- /*
- * Treat keys as though they are lower than anything
- * else.
- */
- a_mapelm = 0;
+void
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+ const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+ size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
+ bin_stats_t *bstats, arena_stats_large_t *lstats) {
+ cassert(config_stats);
+
+ arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
+ muzzy_decay_ms, nactive, ndirty, nmuzzy);
+
+ size_t base_allocated, base_resident, base_mapped, metadata_thp;
+ base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+ &base_mapped, &metadata_thp);
+
+ arena_stats_lock(tsdn, &arena->stats);
+
+ arena_stats_accum_zu(&astats->mapped, base_mapped
+ + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
+ arena_stats_accum_zu(&astats->retained,
+ extents_npages_get(&arena->extents_retained) << LG_PAGE);
+
+ arena_stats_accum_u64(&astats->decay_dirty.npurge,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_dirty.npurge));
+ arena_stats_accum_u64(&astats->decay_dirty.nmadvise,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_dirty.nmadvise));
+ arena_stats_accum_u64(&astats->decay_dirty.purged,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_dirty.purged));
+
+ arena_stats_accum_u64(&astats->decay_muzzy.npurge,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_muzzy.npurge));
+ arena_stats_accum_u64(&astats->decay_muzzy.nmadvise,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_muzzy.nmadvise));
+ arena_stats_accum_u64(&astats->decay_muzzy.purged,
+ arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.decay_muzzy.purged));
+
+ arena_stats_accum_zu(&astats->base, base_allocated);
+ arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
+ arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
+ arena_stats_accum_zu(&astats->resident, base_resident +
+ (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
+ extents_npages_get(&arena->extents_dirty) +
+ extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+
+ for (szind_t i = 0; i < NSIZES - NBINS; i++) {
+ uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.lstats[i].nmalloc);
+ arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
+ arena_stats_accum_u64(&astats->nmalloc_large, nmalloc);
+
+ uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.lstats[i].ndalloc);
+ arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc);
+ arena_stats_accum_u64(&astats->ndalloc_large, ndalloc);
+
+ uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
+ &arena->stats.lstats[i].nrequests);
+ arena_stats_accum_u64(&lstats[i].nrequests,
+ nmalloc + nrequests);
+ arena_stats_accum_u64(&astats->nrequests_large,
+ nmalloc + nrequests);
+
+ assert(nmalloc >= ndalloc);
+ assert(nmalloc - ndalloc <= SIZE_T_MAX);
+ size_t curlextents = (size_t)(nmalloc - ndalloc);
+ lstats[i].curlextents += curlextents;
+ arena_stats_accum_zu(&astats->allocated_large,
+ curlextents * sz_index2size(NBINS + i));
+ }
+
+ arena_stats_unlock(tsdn, &arena->stats);
+
+ /* tcache_bytes counts currently cached bytes. */
+ atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
+ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+ cache_bin_array_descriptor_t *descriptor;
+ ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
+ szind_t i = 0;
+ for (; i < NBINS; i++) {
+ cache_bin_t *tbin = &descriptor->bins_small[i];
+ arena_stats_accum_zu(&astats->tcache_bytes,
+ tbin->ncached * sz_index2size(i));
+ }
+ for (; i < nhbins; i++) {
+ cache_bin_t *tbin = &descriptor->bins_large[i];
+ arena_stats_accum_zu(&astats->tcache_bytes,
+ tbin->ncached * sz_index2size(i));
}
- b_mapelm = (uintptr_t)b;
-
- ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
}
+ malloc_mutex_prof_read(tsdn,
+ &astats->mutex_prof_data[arena_prof_mutex_tcache_list],
+ &arena->tcache_ql_mtx);
+ malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
+
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind) \
+ malloc_mutex_lock(tsdn, &arena->mtx); \
+ malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[ind], \
+ &arena->mtx); \
+ malloc_mutex_unlock(tsdn, &arena->mtx);
+
+ /* Gather per arena mutex profiling data. */
+ READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
+ READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
+ arena_prof_mutex_extent_avail)
+ READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
+ arena_prof_mutex_extents_dirty)
+ READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
+ arena_prof_mutex_extents_muzzy)
+ READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
+ arena_prof_mutex_extents_retained)
+ READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
+ arena_prof_mutex_decay_dirty)
+ READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx,
+ arena_prof_mutex_decay_muzzy)
+ READ_ARENA_MUTEX_PROF_DATA(base->mtx,
+ arena_prof_mutex_base)
+#undef READ_ARENA_MUTEX_PROF_DATA
+
+ nstime_copy(&astats->uptime, &arena->create_time);
+ nstime_update(&astats->uptime);
+ nstime_subtract(&astats->uptime, &arena->create_time);
+
+ for (szind_t i = 0; i < NBINS; i++) {
+ bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]);
+ }
+}
- return (ret);
+void
+arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
+ extent);
+ if (arena_dirty_decay_ms_get(arena) == 0) {
+ arena_decay_dirty(tsdn, arena, false, true);
+ } else {
+ arena_background_thread_inactivity_check(tsdn, arena, false);
+ }
}
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
- u.rb_link, arena_avail_comp)
+static void *
+arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
+ void *ret;
+ arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+ size_t regind;
-static inline int
-arena_chunk_dirty_comp(arena_chunk_t *a, arena_chunk_t *b)
-{
+ assert(extent_nfree_get(slab) > 0);
+ assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
- assert(a != NULL);
- assert(b != NULL);
+ regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
+ ret = (void *)((uintptr_t)extent_addr_get(slab) +
+ (uintptr_t)(bin_info->reg_size * regind));
+ extent_nfree_dec(slab);
+ return ret;
+}
- /*
- * Short-circuit for self comparison. The following comparison code
- * would come to the same result, but at the cost of executing the slow
- * path.
- */
- if (a == b)
- return (0);
+#ifndef JEMALLOC_JET
+static
+#endif
+size_t
+arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
+ size_t diff, regind;
- /*
- * Order such that chunks with higher fragmentation are "less than"
- * those with lower fragmentation -- purging order is from "least" to
- * "greatest". Fragmentation is measured as:
- *
- * mean current avail run size
- * --------------------------------
- * mean defragmented avail run size
- *
- * navail
- * -----------
- * nruns_avail nruns_avail-nruns_adjac
- * = ========================= = -----------------------
- * navail nruns_avail
- * -----------------------
- * nruns_avail-nruns_adjac
- *
- * The following code multiplies away the denominator prior to
- * comparison, in order to avoid division.
- *
- */
- {
- size_t a_val = (a->nruns_avail - a->nruns_adjac) *
- b->nruns_avail;
- size_t b_val = (b->nruns_avail - b->nruns_adjac) *
- a->nruns_avail;
+ /* Freeing a pointer outside the slab can cause assertion failure. */
+ assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
+ assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
+ /* Freeing an interior pointer can cause assertion failure. */
+ assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
+ (uintptr_t)bin_infos[binind].reg_size == 0);
- if (a_val < b_val)
- return (1);
- if (a_val > b_val)
- return (-1);
- }
- /*
- * Break ties by chunk address. For fragmented chunks, report lower
- * addresses as "lower", so that fragmentation reduction happens first
- * at lower addresses. However, use the opposite ordering for
- * unfragmented chunks, in order to increase the chances of
- * re-allocating dirty runs.
- */
- {
- uintptr_t a_chunk = (uintptr_t)a;
- uintptr_t b_chunk = (uintptr_t)b;
- int ret = ((a_chunk > b_chunk) - (a_chunk < b_chunk));
- if (a->nruns_adjac == 0) {
- assert(b->nruns_adjac == 0);
- ret = -ret;
- }
- return (ret);
- }
-}
+ diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_chunk_dirty_, arena_chunk_tree_t, arena_chunk_t,
- dirty_link, arena_chunk_dirty_comp)
+ /* Avoid doing division with a variable divisor. */
+ regind = div_compute(&arena_binind_div_info[binind], diff);
-static inline bool
-arena_avail_adjac_pred(arena_chunk_t *chunk, size_t pageind)
-{
- bool ret;
+ assert(regind < bin_infos[binind].nregs);
- if (pageind-1 < map_bias)
- ret = false;
- else {
- ret = (arena_mapbits_allocated_get(chunk, pageind-1) == 0);
- assert(ret == false || arena_mapbits_dirty_get(chunk,
- pageind-1) != arena_mapbits_dirty_get(chunk, pageind));
- }
- return (ret);
+ return regind;
}
-static inline bool
-arena_avail_adjac_succ(arena_chunk_t *chunk, size_t pageind, size_t npages)
-{
- bool ret;
-
- if (pageind+npages == chunk_npages)
- ret = false;
- else {
- assert(pageind+npages < chunk_npages);
- ret = (arena_mapbits_allocated_get(chunk, pageind+npages) == 0);
- assert(ret == false || arena_mapbits_dirty_get(chunk, pageind)
- != arena_mapbits_dirty_get(chunk, pageind+npages));
- }
- return (ret);
-}
+static void
+arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) {
+ szind_t binind = extent_szind_get(slab);
+ const bin_info_t *bin_info = &bin_infos[binind];
+ size_t regind = arena_slab_regind(slab, binind, ptr);
-static inline bool
-arena_avail_adjac(arena_chunk_t *chunk, size_t pageind, size_t npages)
-{
+ assert(extent_nfree_get(slab) < bin_info->nregs);
+ /* Freeing an unallocated pointer can cause assertion failure. */
+ assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
- return (arena_avail_adjac_pred(chunk, pageind) ||
- arena_avail_adjac_succ(chunk, pageind, npages));
+ bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+ extent_nfree_inc(slab);
}
static void
-arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
- size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
-{
+arena_nactive_add(arena_t *arena, size_t add_pages) {
+ atomic_fetch_add_zu(&arena->nactive, add_pages, ATOMIC_RELAXED);
+}
- assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
- LG_PAGE));
+static void
+arena_nactive_sub(arena_t *arena, size_t sub_pages) {
+ assert(atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) >= sub_pages);
+ atomic_fetch_sub_zu(&arena->nactive, sub_pages, ATOMIC_RELAXED);
+}
- /*
- * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
- * removed and reinserted even if the run to be inserted is clean.
- */
- if (chunk->ndirty != 0)
- arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
+static void
+arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
+ szind_t index, hindex;
- if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
- chunk->nruns_adjac++;
- if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
- chunk->nruns_adjac++;
- chunk->nruns_avail++;
- assert(chunk->nruns_avail > chunk->nruns_adjac);
+ cassert(config_stats);
- if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
- arena->ndirty += npages;
- chunk->ndirty += npages;
+ if (usize < LARGE_MINCLASS) {
+ usize = LARGE_MINCLASS;
}
- if (chunk->ndirty != 0)
- arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+ index = sz_size2index(usize);
+ hindex = (index >= NBINS) ? index - NBINS : 0;
- arena_avail_tree_insert(&arena->runs_avail, arena_mapp_get(chunk,
- pageind));
+ arena_stats_add_u64(tsdn, &arena->stats,
+ &arena->stats.lstats[hindex].nmalloc, 1);
}
static void
-arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
- size_t npages, bool maybe_adjac_pred, bool maybe_adjac_succ)
-{
-
- assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
- LG_PAGE));
-
- /*
- * chunks_dirty is keyed by nruns_{avail,adjac}, so the chunk must be
- * removed and reinserted even if the run to be removed is clean.
- */
- if (chunk->ndirty != 0)
- arena_chunk_dirty_remove(&arena->chunks_dirty, chunk);
+arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
+ szind_t index, hindex;
- if (maybe_adjac_pred && arena_avail_adjac_pred(chunk, pageind))
- chunk->nruns_adjac--;
- if (maybe_adjac_succ && arena_avail_adjac_succ(chunk, pageind, npages))
- chunk->nruns_adjac--;
- chunk->nruns_avail--;
- assert(chunk->nruns_avail > chunk->nruns_adjac || (chunk->nruns_avail
- == 0 && chunk->nruns_adjac == 0));
+ cassert(config_stats);
- if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
- arena->ndirty -= npages;
- chunk->ndirty -= npages;
+ if (usize < LARGE_MINCLASS) {
+ usize = LARGE_MINCLASS;
}
- if (chunk->ndirty != 0)
- arena_chunk_dirty_insert(&arena->chunks_dirty, chunk);
+ index = sz_size2index(usize);
+ hindex = (index >= NBINS) ? index - NBINS : 0;
- arena_avail_tree_remove(&arena->runs_avail, arena_mapp_get(chunk,
- pageind));
+ arena_stats_add_u64(tsdn, &arena->stats,
+ &arena->stats.lstats[hindex].ndalloc, 1);
}
-static inline void *
-arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
-{
- void *ret;
- unsigned regind;
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
-
- assert(run->nfree > 0);
- assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
-
- regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
- ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
- (uintptr_t)(bin_info->reg_interval * regind));
- run->nfree--;
- if (regind == run->nextind)
- run->nextind++;
- assert(regind < run->nextind);
- return (ret);
-}
-
-static inline void
-arena_run_reg_dalloc(arena_run_t *run, void *ptr)
-{
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- size_t mapbits = arena_mapbits_get(chunk, pageind);
- size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
- unsigned regind = arena_run_regind(run, bin_info, ptr);
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
-
- assert(run->nfree < bin_info->nregs);
- /* Freeing an interior pointer can cause assertion failure. */
- assert(((uintptr_t)ptr - ((uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset)) %
- (uintptr_t)bin_info->reg_interval == 0);
- assert((uintptr_t)ptr >= (uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset);
- /* Freeing an unallocated pointer can cause assertion failure. */
- assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
-
- bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
- run->nfree++;
-}
+static void
+arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
+ size_t usize) {
+ arena_large_dalloc_stats_update(tsdn, arena, oldusize);
+ arena_large_malloc_stats_update(tsdn, arena, usize);
+}
+
+extent_t *
+arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
+ size_t alignment, bool *zero) {
+ extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ szind_t szind = sz_size2index(usize);
+ size_t mapped_add;
+ bool commit = true;
+ extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false,
+ szind, zero, &commit);
+ if (extent == NULL) {
+ extent = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment,
+ false, szind, zero, &commit);
+ }
+ size_t size = usize + sz_large_pad;
+ if (extent == NULL) {
+ extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
+ usize, sz_large_pad, alignment, false, szind, zero,
+ &commit);
+ if (config_stats) {
+ /*
+ * extent may be NULL on OOM, but in that case
+ * mapped_add isn't used below, so there's no need to
+ * conditionlly set it to 0 here.
+ */
+ mapped_add = size;
+ }
+ } else if (config_stats) {
+ mapped_add = 0;
+ }
-static inline void
-arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
-{
+ if (extent != NULL) {
+ if (config_stats) {
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_large_malloc_stats_update(tsdn, arena, usize);
+ if (mapped_add != 0) {
+ arena_stats_add_zu(tsdn, &arena->stats,
+ &arena->stats.mapped, mapped_add);
+ }
+ arena_stats_unlock(tsdn, &arena->stats);
+ }
+ arena_nactive_add(arena, size >> LG_PAGE);
+ }
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk + (run_ind <<
- LG_PAGE)), (npages << LG_PAGE));
- memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
- (npages << LG_PAGE));
+ return extent;
}
-static inline void
-arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind)
-{
-
- VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind <<
- LG_PAGE)), PAGE);
+void
+arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+ if (config_stats) {
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_large_dalloc_stats_update(tsdn, arena,
+ extent_usize_get(extent));
+ arena_stats_unlock(tsdn, &arena->stats);
+ }
+ arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
}
-static inline void
-arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
-{
- size_t i;
- UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
+void
+arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+ size_t oldusize) {
+ size_t usize = extent_usize_get(extent);
+ size_t udiff = oldusize - usize;
- arena_run_page_mark_zeroed(chunk, run_ind);
- for (i = 0; i < PAGE / sizeof(size_t); i++)
- assert(p[i] == 0);
+ if (config_stats) {
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
+ arena_stats_unlock(tsdn, &arena->stats);
+ }
+ arena_nactive_sub(arena, udiff >> LG_PAGE);
}
-static void
-arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
-{
+void
+arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+ size_t oldusize) {
+ size_t usize = extent_usize_get(extent);
+ size_t udiff = usize - oldusize;
if (config_stats) {
- ssize_t cactive_diff = CHUNK_CEILING((arena->nactive +
- add_pages) << LG_PAGE) - CHUNK_CEILING((arena->nactive -
- sub_pages) << LG_PAGE);
- if (cactive_diff != 0)
- stats_cactive_add(cactive_diff);
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
+ arena_stats_unlock(tsdn, &arena->stats);
}
+ arena_nactive_add(arena, udiff >> LG_PAGE);
}
-static void
-arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
- size_t flag_dirty, size_t need_pages)
-{
- size_t total_pages, rem_pages;
-
- total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
- LG_PAGE;
- assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
- flag_dirty);
- assert(need_pages <= total_pages);
- rem_pages = total_pages - need_pages;
-
- arena_avail_remove(arena, chunk, run_ind, total_pages, true, true);
- arena_cactive_update(arena, need_pages, 0);
- arena->nactive += need_pages;
-
- /* Keep track of trailing unused pages for later use. */
- if (rem_pages > 0) {
- if (flag_dirty != 0) {
- arena_mapbits_unallocated_set(chunk,
- run_ind+need_pages, (rem_pages << LG_PAGE),
- flag_dirty);
- arena_mapbits_unallocated_set(chunk,
- run_ind+total_pages-1, (rem_pages << LG_PAGE),
- flag_dirty);
- } else {
- arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
- (rem_pages << LG_PAGE),
- arena_mapbits_unzeroed_get(chunk,
- run_ind+need_pages));
- arena_mapbits_unallocated_set(chunk,
- run_ind+total_pages-1, (rem_pages << LG_PAGE),
- arena_mapbits_unzeroed_get(chunk,
- run_ind+total_pages-1));
- }
- arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages,
- false, true);
- }
+static ssize_t
+arena_decay_ms_read(arena_decay_t *decay) {
+ return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
}
static void
-arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
- bool remove, bool zero)
-{
- arena_chunk_t *chunk;
- size_t flag_dirty, run_ind, need_pages, i;
+arena_decay_ms_write(arena_decay_t *decay, ssize_t decay_ms) {
+ atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
+}
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
- flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
- need_pages = (size >> LG_PAGE);
- assert(need_pages > 0);
+static void
+arena_decay_deadline_init(arena_decay_t *decay) {
+ /*
+ * Generate a new deadline that is uniformly random within the next
+ * epoch after the current one.
+ */
+ nstime_copy(&decay->deadline, &decay->epoch);
+ nstime_add(&decay->deadline, &decay->interval);
+ if (arena_decay_ms_read(decay) > 0) {
+ nstime_t jitter;
- if (remove) {
- arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
- need_pages);
+ nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
+ nstime_ns(&decay->interval)));
+ nstime_add(&decay->deadline, &jitter);
}
+}
- if (zero) {
- if (flag_dirty == 0) {
- /*
- * The run is clean, so some pages may be zeroed (i.e.
- * never before touched).
- */
- for (i = 0; i < need_pages; i++) {
- if (arena_mapbits_unzeroed_get(chunk, run_ind+i)
- != 0)
- arena_run_zero(chunk, run_ind+i, 1);
- else if (config_debug) {
- arena_run_page_validate_zeroed(chunk,
- run_ind+i);
- } else {
- arena_run_page_mark_zeroed(chunk,
- run_ind+i);
- }
- }
- } else {
- /* The run is dirty, so all pages must be zeroed. */
- arena_run_zero(chunk, run_ind, need_pages);
- }
- } else {
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
- (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
- }
+static bool
+arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
+ return (nstime_compare(&decay->deadline, time) <= 0);
+}
+
+static size_t
+arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
+ uint64_t sum;
+ size_t npages_limit_backlog;
+ unsigned i;
/*
- * Set the last element first, in case the run only contains one page
- * (i.e. both statements set the same element).
+ * For each element of decay_backlog, multiply by the corresponding
+ * fixed-point smoothstep decay factor. Sum the products, then divide
+ * to round down to the nearest whole number of pages.
*/
- arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
-}
-
-static void
-arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
-{
+ sum = 0;
+ for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+ sum += decay->backlog[i] * h_steps[i];
+ }
+ npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
- arena_run_split_large_helper(arena, run, size, true, zero);
+ return npages_limit_backlog;
}
static void
-arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
-{
+arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
+ size_t npages_delta = (current_npages > decay->nunpurged) ?
+ current_npages - decay->nunpurged : 0;
+ decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
- arena_run_split_large_helper(arena, run, size, false, zero);
+ if (config_debug) {
+ if (current_npages > decay->ceil_npages) {
+ decay->ceil_npages = current_npages;
+ }
+ size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+ assert(decay->ceil_npages >= npages_limit);
+ if (decay->ceil_npages > npages_limit) {
+ decay->ceil_npages = npages_limit;
+ }
+ }
}
static void
-arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
- size_t binind)
-{
- arena_chunk_t *chunk;
- size_t flag_dirty, run_ind, need_pages, i;
-
- assert(binind != BININD_INVALID);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
- flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
- need_pages = (size >> LG_PAGE);
- assert(need_pages > 0);
-
- arena_run_split_remove(arena, chunk, run_ind, flag_dirty, need_pages);
-
- /*
- * Propagate the dirty and unzeroed flags to the allocated small run,
- * so that arena_dalloc_bin_run() has the ability to conditionally trim
- * clean pages.
- */
- arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty);
- /*
- * The first page will always be dirtied during small run
- * initialization, so a validation failure here would not actually
- * cause an observable failure.
- */
- if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
- run_ind) == 0)
- arena_run_page_validate_zeroed(chunk, run_ind);
- for (i = 1; i < need_pages - 1; i++) {
- arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0);
- if (config_debug && flag_dirty == 0 &&
- arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
- arena_run_page_validate_zeroed(chunk, run_ind+i);
- }
- arena_mapbits_small_set(chunk, run_ind+need_pages-1, need_pages-1,
- binind, flag_dirty);
- if (config_debug && flag_dirty == 0 && arena_mapbits_unzeroed_get(chunk,
- run_ind+need_pages-1) == 0)
- arena_run_page_validate_zeroed(chunk, run_ind+need_pages-1);
- VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
- (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
-}
-
-static arena_chunk_t *
-arena_chunk_init_spare(arena_t *arena)
-{
- arena_chunk_t *chunk;
-
- assert(arena->spare != NULL);
-
- chunk = arena->spare;
- arena->spare = NULL;
-
- assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
- assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
- assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
- arena_maxclass);
- assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
- arena_maxclass);
- assert(arena_mapbits_dirty_get(chunk, map_bias) ==
- arena_mapbits_dirty_get(chunk, chunk_npages-1));
-
- return (chunk);
-}
-
-static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
-{
- arena_chunk_t *chunk;
- bool zero;
- size_t unzeroed, i;
-
- assert(arena->spare == NULL);
-
- zero = false;
- malloc_mutex_unlock(&arena->lock);
- chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize, false,
- &zero, arena->dss_prec);
- malloc_mutex_lock(&arena->lock);
- if (chunk == NULL)
- return (NULL);
- if (config_stats)
- arena->stats.mapped += chunksize;
-
- chunk->arena = arena;
-
- /*
- * Claim that no pages are in use, since the header is merely overhead.
- */
- chunk->ndirty = 0;
+arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
+ size_t current_npages) {
+ if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+ memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+ sizeof(size_t));
+ } else {
+ size_t nadvance_z = (size_t)nadvance_u64;
- chunk->nruns_avail = 0;
- chunk->nruns_adjac = 0;
+ assert((uint64_t)nadvance_z == nadvance_u64);
- /*
- * Initialize the map to contain one maximal free untouched run. Mark
- * the pages as zeroed iff chunk_alloc() returned a zeroed chunk.
- */
- unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
- arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
- unzeroed);
- /*
- * There is no need to initialize the internal page map entries unless
- * the chunk is not zeroed.
- */
- if (zero == false) {
- VALGRIND_MAKE_MEM_UNDEFINED((void *)arena_mapp_get(chunk,
- map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
- chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
- map_bias+1)));
- for (i = map_bias+1; i < chunk_npages-1; i++)
- arena_mapbits_unzeroed_set(chunk, i, unzeroed);
- } else {
- VALGRIND_MAKE_MEM_DEFINED((void *)arena_mapp_get(chunk,
- map_bias+1), (size_t)((uintptr_t) arena_mapp_get(chunk,
- chunk_npages-1) - (uintptr_t)arena_mapp_get(chunk,
- map_bias+1)));
- if (config_debug) {
- for (i = map_bias+1; i < chunk_npages-1; i++) {
- assert(arena_mapbits_unzeroed_get(chunk, i) ==
- unzeroed);
- }
+ memmove(decay->backlog, &decay->backlog[nadvance_z],
+ (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+ if (nadvance_z > 1) {
+ memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
+ nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
}
}
- arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxclass,
- unzeroed);
- return (chunk);
+ arena_decay_backlog_update_last(decay, current_npages);
}
-static arena_chunk_t *
-arena_chunk_alloc(arena_t *arena)
-{
- arena_chunk_t *chunk;
-
- if (arena->spare != NULL)
- chunk = arena_chunk_init_spare(arena);
- else {
- chunk = arena_chunk_init_hard(arena);
- if (chunk == NULL)
- return (NULL);
+static void
+arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, size_t current_npages, size_t npages_limit,
+ bool is_background_thread) {
+ if (current_npages > npages_limit) {
+ arena_decay_to_limit(tsdn, arena, decay, extents, false,
+ npages_limit, current_npages - npages_limit,
+ is_background_thread);
}
-
- /* Insert the run into the runs_avail tree. */
- arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias,
- false, false);
-
- return (chunk);
}
static void
-arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
-{
- assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
- assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
- assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
- arena_maxclass);
- assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
- arena_maxclass);
- assert(arena_mapbits_dirty_get(chunk, map_bias) ==
- arena_mapbits_dirty_get(chunk, chunk_npages-1));
+arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
+ size_t current_npages) {
+ assert(arena_decay_deadline_reached(decay, time));
- /*
- * Remove run from the runs_avail tree, so that the arena does not use
- * it.
- */
- arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias,
- false, false);
+ nstime_t delta;
+ nstime_copy(&delta, time);
+ nstime_subtract(&delta, &decay->epoch);
- if (arena->spare != NULL) {
- arena_chunk_t *spare = arena->spare;
-
- arena->spare = chunk;
- malloc_mutex_unlock(&arena->lock);
- chunk_dealloc((void *)spare, chunksize, true);
- malloc_mutex_lock(&arena->lock);
- if (config_stats)
- arena->stats.mapped -= chunksize;
- } else
- arena->spare = chunk;
-}
+ uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
+ assert(nadvance_u64 > 0);
-static arena_run_t *
-arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
-{
- arena_run_t *run;
- arena_chunk_map_t *mapelm, key;
+ /* Add nadvance_u64 decay intervals to epoch. */
+ nstime_copy(&delta, &decay->interval);
+ nstime_imultiply(&delta, nadvance_u64);
+ nstime_add(&decay->epoch, &delta);
- key.bits = size | CHUNK_MAP_KEY;
- mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
- if (mapelm != NULL) {
- arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = (((uintptr_t)mapelm -
- (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
- + map_bias;
+ /* Set a new deadline. */
+ arena_decay_deadline_init(decay);
- run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
- LG_PAGE));
- arena_run_split_large(arena, run, size, zero);
- return (run);
- }
-
- return (NULL);
+ /* Update the backlog. */
+ arena_decay_backlog_update(decay, nadvance_u64, current_npages);
}
-static arena_run_t *
-arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
-{
- arena_chunk_t *chunk;
- arena_run_t *run;
-
- assert(size <= arena_maxclass);
- assert((size & PAGE_MASK) == 0);
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, const nstime_t *time, bool is_background_thread) {
+ size_t current_npages = extents_npages_get(extents);
+ arena_decay_epoch_advance_helper(decay, time, current_npages);
- /* Search the arena's chunks for the lowest best fit. */
- run = arena_run_alloc_large_helper(arena, size, zero);
- if (run != NULL)
- return (run);
+ size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+ /* We may unlock decay->mtx when try_purge(). Finish logging first. */
+ decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
+ current_npages;
- /*
- * No usable runs. Create a new chunk from which to allocate the run.
- */
- chunk = arena_chunk_alloc(arena);
- if (chunk != NULL) {
- run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
- arena_run_split_large(arena, run, size, zero);
- return (run);
+ if (!background_thread_enabled() || is_background_thread) {
+ arena_decay_try_purge(tsdn, arena, decay, extents,
+ current_npages, npages_limit, is_background_thread);
}
-
- /*
- * arena_chunk_alloc() failed, but another thread may have made
- * sufficient memory available while this one dropped arena->lock in
- * arena_chunk_alloc(), so search one more time.
- */
- return (arena_run_alloc_large_helper(arena, size, zero));
}
-static arena_run_t *
-arena_run_alloc_small_helper(arena_t *arena, size_t size, size_t binind)
-{
- arena_run_t *run;
- arena_chunk_map_t *mapelm, key;
+static void
+arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
+ arena_decay_ms_write(decay, decay_ms);
+ if (decay_ms > 0) {
+ nstime_init(&decay->interval, (uint64_t)decay_ms *
+ KQU(1000000));
+ nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
+ }
- key.bits = size | CHUNK_MAP_KEY;
- mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
- if (mapelm != NULL) {
- arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
- size_t pageind = (((uintptr_t)mapelm -
- (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
- + map_bias;
+ nstime_init(&decay->epoch, 0);
+ nstime_update(&decay->epoch);
+ decay->jitter_state = (uint64_t)(uintptr_t)decay;
+ arena_decay_deadline_init(decay);
+ decay->nunpurged = 0;
+ memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+}
- run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
- LG_PAGE));
- arena_run_split_small(arena, run, size, binind);
- return (run);
+static bool
+arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
+ arena_stats_decay_t *stats) {
+ if (config_debug) {
+ for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
+ assert(((char *)decay)[i] == 0);
+ }
+ decay->ceil_npages = 0;
}
-
- return (NULL);
+ if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+ decay->purging = false;
+ arena_decay_reinit(decay, decay_ms);
+ /* Memory is zeroed, so there is no need to clear stats. */
+ if (config_stats) {
+ decay->stats = stats;
+ }
+ return false;
}
-static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
-{
- arena_chunk_t *chunk;
- arena_run_t *run;
+static bool
+arena_decay_ms_valid(ssize_t decay_ms) {
+ if (decay_ms < -1) {
+ return false;
+ }
+ if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
+ KQU(1000)) {
+ return true;
+ }
+ return false;
+}
- assert(size <= arena_maxclass);
- assert((size & PAGE_MASK) == 0);
- assert(binind != BININD_INVALID);
+static bool
+arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, bool is_background_thread) {
+ malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+ /* Purge all or nothing if the option is disabled. */
+ ssize_t decay_ms = arena_decay_ms_read(decay);
+ if (decay_ms <= 0) {
+ if (decay_ms == 0) {
+ arena_decay_to_limit(tsdn, arena, decay, extents, false,
+ 0, extents_npages_get(extents),
+ is_background_thread);
+ }
+ return false;
+ }
- /* Search the arena's chunks for the lowest best fit. */
- run = arena_run_alloc_small_helper(arena, size, binind);
- if (run != NULL)
- return (run);
+ nstime_t time;
+ nstime_init(&time, 0);
+ nstime_update(&time);
+ if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
+ > 0)) {
+ /*
+ * Time went backwards. Move the epoch back in time and
+ * generate a new deadline, with the expectation that time
+ * typically flows forward for long enough periods of time that
+ * epochs complete. Unfortunately, this strategy is susceptible
+ * to clock jitter triggering premature epoch advances, but
+ * clock jitter estimation and compensation isn't feasible here
+ * because calls into this code are event-driven.
+ */
+ nstime_copy(&decay->epoch, &time);
+ arena_decay_deadline_init(decay);
+ } else {
+ /* Verify that time does not go backwards. */
+ assert(nstime_compare(&decay->epoch, &time) <= 0);
+ }
/*
- * No usable runs. Create a new chunk from which to allocate the run.
+ * If the deadline has been reached, advance to the current epoch and
+ * purge to the new limit if necessary. Note that dirty pages created
+ * during the current epoch are not subject to purge until a future
+ * epoch, so as a result purging only happens during epoch advances, or
+ * being triggered by background threads (scheduled event).
*/
- chunk = arena_chunk_alloc(arena);
- if (chunk != NULL) {
- run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
- arena_run_split_small(arena, run, size, binind);
- return (run);
+ bool advance_epoch = arena_decay_deadline_reached(decay, &time);
+ if (advance_epoch) {
+ arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
+ is_background_thread);
+ } else if (is_background_thread) {
+ arena_decay_try_purge(tsdn, arena, decay, extents,
+ extents_npages_get(extents),
+ arena_decay_backlog_npages_limit(decay),
+ is_background_thread);
}
- /*
- * arena_chunk_alloc() failed, but another thread may have made
- * sufficient memory available while this one dropped arena->lock in
- * arena_chunk_alloc(), so search one more time.
- */
- return (arena_run_alloc_small_helper(arena, size, binind));
+ return advance_epoch;
}
-static inline void
-arena_maybe_purge(arena_t *arena)
-{
- size_t npurgeable, threshold;
-
- /* Don't purge if the option is disabled. */
- if (opt_lg_dirty_mult < 0)
- return;
- /* Don't purge if all dirty pages are already being purged. */
- if (arena->ndirty <= arena->npurgatory)
- return;
- npurgeable = arena->ndirty - arena->npurgatory;
- threshold = (arena->nactive >> opt_lg_dirty_mult);
- /*
- * Don't purge unless the number of purgeable pages exceeds the
- * threshold.
- */
- if (npurgeable <= threshold)
- return;
-
- arena_purge(arena, false);
+static ssize_t
+arena_decay_ms_get(arena_decay_t *decay) {
+ return arena_decay_ms_read(decay);
}
-static arena_chunk_t *
-chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
-{
- size_t *ndirty = (size_t *)arg;
+ssize_t
+arena_dirty_decay_ms_get(arena_t *arena) {
+ return arena_decay_ms_get(&arena->decay_dirty);
+}
- assert(chunk->ndirty != 0);
- *ndirty += chunk->ndirty;
- return (NULL);
+ssize_t
+arena_muzzy_decay_ms_get(arena_t *arena) {
+ return arena_decay_ms_get(&arena->decay_muzzy);
}
-static size_t
-arena_compute_npurgatory(arena_t *arena, bool all)
-{
- size_t npurgatory, npurgeable;
+static bool
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, ssize_t decay_ms) {
+ if (!arena_decay_ms_valid(decay_ms)) {
+ return true;
+ }
+ malloc_mutex_lock(tsdn, &decay->mtx);
/*
- * Compute the minimum number of pages that this thread should try to
- * purge.
+ * Restart decay backlog from scratch, which may cause many dirty pages
+ * to be immediately purged. It would conceptually be possible to map
+ * the old backlog onto the new backlog, but there is no justification
+ * for such complexity since decay_ms changes are intended to be
+ * infrequent, either between the {-1, 0, >0} states, or a one-time
+ * arbitrary change during initial arena configuration.
*/
- npurgeable = arena->ndirty - arena->npurgatory;
+ arena_decay_reinit(decay, decay_ms);
+ arena_maybe_decay(tsdn, arena, decay, extents, false);
+ malloc_mutex_unlock(tsdn, &decay->mtx);
- if (all == false) {
- size_t threshold = (arena->nactive >> opt_lg_dirty_mult);
+ return false;
+}
- npurgatory = npurgeable - threshold;
- } else
- npurgatory = npurgeable;
+bool
+arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+ ssize_t decay_ms) {
+ return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
+ &arena->extents_dirty, decay_ms);
+}
- return (npurgatory);
+bool
+arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+ ssize_t decay_ms) {
+ return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
+ &arena->extents_muzzy, decay_ms);
}
-static void
-arena_chunk_stash_dirty(arena_t *arena, arena_chunk_t *chunk, bool all,
- arena_chunk_mapelms_t *mapelms)
-{
- size_t pageind, npages;
+static size_t
+arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
+ size_t npages_decay_max, extent_list_t *decay_extents) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
- /*
- * Temporarily allocate free dirty runs within chunk. If all is false,
- * only operate on dirty runs that are fragments; otherwise operate on
- * all dirty runs.
- */
- for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
- if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
- size_t run_size =
- arena_mapbits_unallocated_size_get(chunk, pageind);
-
- npages = run_size >> LG_PAGE;
- assert(pageind + npages <= chunk_npages);
- assert(arena_mapbits_dirty_get(chunk, pageind) ==
- arena_mapbits_dirty_get(chunk, pageind+npages-1));
-
- if (arena_mapbits_dirty_get(chunk, pageind) != 0 &&
- (all || arena_avail_adjac(chunk, pageind,
- npages))) {
- arena_run_t *run = (arena_run_t *)((uintptr_t)
- chunk + (uintptr_t)(pageind << LG_PAGE));
-
- arena_run_split_large(arena, run, run_size,
- false);
- /* Append to list for later processing. */
- ql_elm_new(mapelm, u.ql_link);
- ql_tail_insert(mapelms, mapelm, u.ql_link);
- }
- } else {
- /* Skip run. */
- if (arena_mapbits_large_get(chunk, pageind) != 0) {
- npages = arena_mapbits_large_size_get(chunk,
- pageind) >> LG_PAGE;
- } else {
- size_t binind;
- arena_bin_info_t *bin_info;
- arena_run_t *run = (arena_run_t *)((uintptr_t)
- chunk + (uintptr_t)(pageind << LG_PAGE));
-
- assert(arena_mapbits_small_runind_get(chunk,
- pageind) == 0);
- binind = arena_bin_index(arena, run->bin);
- bin_info = &arena_bin_info[binind];
- npages = bin_info->run_size >> LG_PAGE;
- }
- }
+ /* Stash extents according to npages_limit. */
+ size_t nstashed = 0;
+ extent_t *extent;
+ while (nstashed < npages_decay_max &&
+ (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+ npages_limit)) != NULL) {
+ extent_list_append(decay_extents, extent);
+ nstashed += extent_size_get(extent) >> LG_PAGE;
}
- assert(pageind == chunk_npages);
- assert(chunk->ndirty == 0 || all == false);
- assert(chunk->nruns_adjac == 0);
+ return nstashed;
}
static size_t
-arena_chunk_purge_stashed(arena_t *arena, arena_chunk_t *chunk,
- arena_chunk_mapelms_t *mapelms)
-{
- size_t npurged, pageind, npages, nmadvise;
- arena_chunk_map_t *mapelm;
-
- malloc_mutex_unlock(&arena->lock);
- if (config_stats)
+arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
+ bool all, extent_list_t *decay_extents, bool is_background_thread) {
+ UNUSED size_t nmadvise, nunmapped;
+ size_t npurged;
+
+ if (config_stats) {
nmadvise = 0;
+ nunmapped = 0;
+ }
npurged = 0;
- ql_foreach(mapelm, mapelms, u.ql_link) {
- bool unzeroed;
- size_t flag_unzeroed, i;
-
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t)) + map_bias;
- npages = arena_mapbits_large_size_get(chunk, pageind) >>
- LG_PAGE;
- assert(pageind + npages <= chunk_npages);
- unzeroed = pages_purge((void *)((uintptr_t)chunk + (pageind <<
- LG_PAGE)), (npages << LG_PAGE));
- flag_unzeroed = unzeroed ? CHUNK_MAP_UNZEROED : 0;
- /*
- * Set the unzeroed flag for all pages, now that pages_purge()
- * has returned whether the pages were zeroed as a side effect
- * of purging. This chunk map modification is safe even though
- * the arena mutex isn't currently owned by this thread,
- * because the run is marked as allocated, thus protecting it
- * from being modified by any other thread. As long as these
- * writes don't perturb the first and last elements'
- * CHUNK_MAP_ALLOCATED bits, behavior is well defined.
- */
- for (i = 0; i < npages; i++) {
- arena_mapbits_unzeroed_set(chunk, pageind+i,
- flag_unzeroed);
+
+ ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
+ for (extent_t *extent = extent_list_first(decay_extents); extent !=
+ NULL; extent = extent_list_first(decay_extents)) {
+ if (config_stats) {
+ nmadvise++;
}
+ size_t npages = extent_size_get(extent) >> LG_PAGE;
npurged += npages;
- if (config_stats)
- nmadvise++;
+ extent_list_remove(decay_extents, extent);
+ switch (extents_state_get(extents)) {
+ case extent_state_active:
+ not_reached();
+ case extent_state_dirty:
+ if (!all && muzzy_decay_ms != 0 &&
+ !extent_purge_lazy_wrapper(tsdn, arena,
+ r_extent_hooks, extent, 0,
+ extent_size_get(extent))) {
+ extents_dalloc(tsdn, arena, r_extent_hooks,
+ &arena->extents_muzzy, extent);
+ arena_background_thread_inactivity_check(tsdn,
+ arena, is_background_thread);
+ break;
+ }
+ /* Fall through. */
+ case extent_state_muzzy:
+ extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
+ extent);
+ if (config_stats) {
+ nunmapped += npages;
+ }
+ break;
+ case extent_state_retained:
+ default:
+ not_reached();
+ }
+ }
+
+ if (config_stats) {
+ arena_stats_lock(tsdn, &arena->stats);
+ arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge,
+ 1);
+ arena_stats_add_u64(tsdn, &arena->stats,
+ &decay->stats->nmadvise, nmadvise);
+ arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
+ npurged);
+ arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+ nunmapped << LG_PAGE);
+ arena_stats_unlock(tsdn, &arena->stats);
}
- malloc_mutex_lock(&arena->lock);
- if (config_stats)
- arena->stats.nmadvise += nmadvise;
- return (npurged);
+ return npurged;
}
+/*
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (extents_npages_get(extents) >= npages_limit). We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
+ */
static void
-arena_chunk_unstash_purged(arena_t *arena, arena_chunk_t *chunk,
- arena_chunk_mapelms_t *mapelms)
-{
- arena_chunk_map_t *mapelm;
- size_t pageind;
-
- /* Deallocate runs. */
- for (mapelm = ql_first(mapelms); mapelm != NULL;
- mapelm = ql_first(mapelms)) {
- arena_run_t *run;
-
- pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t)) + map_bias;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)(pageind <<
- LG_PAGE));
- ql_remove(mapelms, mapelm, u.ql_link);
- arena_run_dalloc(arena, run, false, true);
+arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max,
+ bool is_background_thread) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 1);
+ malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+ if (decay->purging) {
+ return;
}
-}
+ decay->purging = true;
+ malloc_mutex_unlock(tsdn, &decay->mtx);
-static inline size_t
-arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk, bool all)
-{
- size_t npurged;
- arena_chunk_mapelms_t mapelms;
+ extent_hooks_t *extent_hooks = extent_hooks_get(arena);
- ql_new(&mapelms);
+ extent_list_t decay_extents;
+ extent_list_init(&decay_extents);
- /*
- * If chunk is the spare, temporarily re-allocate it, 1) so that its
- * run is reinserted into runs_avail, and 2) so that it cannot be
- * completely discarded by another thread while arena->lock is dropped
- * by this thread. Note that the arena_run_dalloc() call will
- * implicitly deallocate the chunk, so no explicit action is required
- * in this function to deallocate the chunk.
- *
- * Note that once a chunk contains dirty pages, it cannot again contain
- * a single run unless 1) it is a dirty run, or 2) this function purges
- * dirty pages and causes the transition to a single clean run. Thus
- * (chunk == arena->spare) is possible, but it is not possible for
- * this function to be called on the spare unless it contains a dirty
- * run.
- */
- if (chunk == arena->spare) {
- assert(arena_mapbits_dirty_get(chunk, map_bias) != 0);
- assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0);
-
- arena_chunk_alloc(arena);
+ size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
+ npages_limit, npages_decay_max, &decay_extents);
+ if (npurge != 0) {
+ UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
+ &extent_hooks, decay, extents, all, &decay_extents,
+ is_background_thread);
+ assert(npurged == npurge);
}
- if (config_stats)
- arena->stats.purged += chunk->ndirty;
-
- /*
- * Operate on all dirty runs if there is no clean/dirty run
- * fragmentation.
- */
- if (chunk->nruns_adjac == 0)
- all = true;
-
- arena_chunk_stash_dirty(arena, chunk, all, &mapelms);
- npurged = arena_chunk_purge_stashed(arena, chunk, &mapelms);
- arena_chunk_unstash_purged(arena, chunk, &mapelms);
-
- return (npurged);
+ malloc_mutex_lock(tsdn, &decay->mtx);
+ decay->purging = false;
}
-static void
-arena_purge(arena_t *arena, bool all)
-{
- arena_chunk_t *chunk;
- size_t npurgatory;
- if (config_debug) {
- size_t ndirty = 0;
+static bool
+arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+ extents_t *extents, bool is_background_thread, bool all) {
+ if (all) {
+ malloc_mutex_lock(tsdn, &decay->mtx);
+ arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
+ extents_npages_get(extents), is_background_thread);
+ malloc_mutex_unlock(tsdn, &decay->mtx);
- arena_chunk_dirty_iter(&arena->chunks_dirty, NULL,
- chunks_dirty_iter_cb, (void *)&ndirty);
- assert(ndirty == arena->ndirty);
+ return false;
}
- assert(arena->ndirty > arena->npurgatory || all);
- assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
- arena->npurgatory) || all);
- if (config_stats)
- arena->stats.npurge++;
+ if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+ /* No need to wait if another thread is in progress. */
+ return true;
+ }
- /*
- * Add the minimum number of pages this thread should try to purge to
- * arena->npurgatory. This will keep multiple threads from racing to
- * reduce ndirty below the threshold.
- */
- npurgatory = arena_compute_npurgatory(arena, all);
- arena->npurgatory += npurgatory;
+ bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
+ is_background_thread);
+ UNUSED size_t npages_new;
+ if (epoch_advanced) {
+ /* Backlog is updated on epoch advance. */
+ npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
+ }
+ malloc_mutex_unlock(tsdn, &decay->mtx);
- while (npurgatory > 0) {
- size_t npurgeable, npurged, nunpurged;
+ if (have_background_thread && background_thread_enabled() &&
+ epoch_advanced && !is_background_thread) {
+ background_thread_interval_check(tsdn, arena, decay,
+ npages_new);
+ }
- /* Get next chunk with dirty pages. */
- chunk = arena_chunk_dirty_first(&arena->chunks_dirty);
- if (chunk == NULL) {
- /*
- * This thread was unable to purge as many pages as
- * originally intended, due to races with other threads
- * that either did some of the purging work, or re-used
- * dirty pages.
- */
- arena->npurgatory -= npurgatory;
- return;
- }
- npurgeable = chunk->ndirty;
- assert(npurgeable != 0);
+ return false;
+}
- if (npurgeable > npurgatory && chunk->nruns_adjac == 0) {
- /*
- * This thread will purge all the dirty pages in chunk,
- * so set npurgatory to reflect this thread's intent to
- * purge the pages. This tends to reduce the chances
- * of the following scenario:
- *
- * 1) This thread sets arena->npurgatory such that
- * (arena->ndirty - arena->npurgatory) is at the
- * threshold.
- * 2) This thread drops arena->lock.
- * 3) Another thread causes one or more pages to be
- * dirtied, and immediately determines that it must
- * purge dirty pages.
- *
- * If this scenario *does* play out, that's okay,
- * because all of the purging work being done really
- * needs to happen.
- */
- arena->npurgatory += npurgeable - npurgatory;
- npurgatory = npurgeable;
- }
+static bool
+arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+ bool all) {
+ return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
+ &arena->extents_dirty, is_background_thread, all);
+}
- /*
- * Keep track of how many pages are purgeable, versus how many
- * actually get purged, and adjust counters accordingly.
- */
- arena->npurgatory -= npurgeable;
- npurgatory -= npurgeable;
- npurged = arena_chunk_purge(arena, chunk, all);
- nunpurged = npurgeable - npurged;
- arena->npurgatory += nunpurged;
- npurgatory += nunpurged;
- }
+static bool
+arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+ bool all) {
+ return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
+ &arena->extents_muzzy, is_background_thread, all);
}
void
-arena_purge_all(arena_t *arena)
-{
-
- malloc_mutex_lock(&arena->lock);
- arena_purge(arena, true);
- malloc_mutex_unlock(&arena->lock);
+arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
+ if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
+ return;
+ }
+ arena_decay_muzzy(tsdn, arena, is_background_thread, all);
}
static void
-arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
- size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty)
-{
- size_t size = *p_size;
- size_t run_ind = *p_run_ind;
- size_t run_pages = *p_run_pages;
-
- /* Try to coalesce forward. */
- if (run_ind + run_pages < chunk_npages &&
- arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
- arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
- size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
- run_ind+run_pages);
- size_t nrun_pages = nrun_size >> LG_PAGE;
+arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
+ arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
- /*
- * Remove successor from runs_avail; the coalesced run is
- * inserted later.
- */
- assert(arena_mapbits_unallocated_size_get(chunk,
- run_ind+run_pages+nrun_pages-1) == nrun_size);
- assert(arena_mapbits_dirty_get(chunk,
- run_ind+run_pages+nrun_pages-1) == flag_dirty);
- arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages,
- false, true);
-
- size += nrun_size;
- run_pages += nrun_pages;
-
- arena_mapbits_unallocated_size_set(chunk, run_ind, size);
- arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
- size);
- }
-
- /* Try to coalesce backward. */
- if (run_ind > map_bias && arena_mapbits_allocated_get(chunk,
- run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) ==
- flag_dirty) {
- size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
- run_ind-1);
- size_t prun_pages = prun_size >> LG_PAGE;
-
- run_ind -= prun_pages;
+ extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+ arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, slab);
+}
- /*
- * Remove predecessor from runs_avail; the coalesced run is
- * inserted later.
- */
- assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
- prun_size);
- assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
- arena_avail_remove(arena, chunk, run_ind, prun_pages, true,
- false);
+static void
+arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
+ assert(extent_nfree_get(slab) > 0);
+ extent_heap_insert(&bin->slabs_nonfull, slab);
+}
- size += prun_size;
- run_pages += prun_pages;
+static void
+arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
+ extent_heap_remove(&bin->slabs_nonfull, slab);
+}
- arena_mapbits_unallocated_size_set(chunk, run_ind, size);
- arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
- size);
+static extent_t *
+arena_bin_slabs_nonfull_tryget(bin_t *bin) {
+ extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
+ if (slab == NULL) {
+ return NULL;
}
-
- *p_size = size;
- *p_run_ind = run_ind;
- *p_run_pages = run_pages;
+ if (config_stats) {
+ bin->stats.reslabs++;
+ }
+ return slab;
}
static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned)
-{
- arena_chunk_t *chunk;
- size_t size, run_ind, run_pages, flag_dirty;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
- assert(run_ind >= map_bias);
- assert(run_ind < chunk_npages);
- if (arena_mapbits_large_get(chunk, run_ind) != 0) {
- size = arena_mapbits_large_size_get(chunk, run_ind);
- assert(size == PAGE ||
- arena_mapbits_large_size_get(chunk,
- run_ind+(size>>LG_PAGE)-1) == 0);
- } else {
- size_t binind = arena_bin_index(arena, run->bin);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
- size = bin_info->run_size;
- }
- run_pages = (size >> LG_PAGE);
- arena_cactive_update(arena, 0, run_pages);
- arena->nactive -= run_pages;
-
+arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) {
+ assert(extent_nfree_get(slab) == 0);
/*
- * The run is dirty if the caller claims to have dirtied it, as well as
- * if it was already dirty before being allocated and the caller
- * doesn't claim to have cleaned it.
+ * Tracking extents is required by arena_reset, which is not allowed
+ * for auto arenas. Bypass this step to avoid touching the extent
+ * linkage (often results in cache misses) for auto arenas.
*/
- assert(arena_mapbits_dirty_get(chunk, run_ind) ==
- arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
- if (cleaned == false && arena_mapbits_dirty_get(chunk, run_ind) != 0)
- dirty = true;
- flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
-
- /* Mark pages as unallocated in the chunk map. */
- if (dirty) {
- arena_mapbits_unallocated_set(chunk, run_ind, size,
- CHUNK_MAP_DIRTY);
- arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
- CHUNK_MAP_DIRTY);
- } else {
- arena_mapbits_unallocated_set(chunk, run_ind, size,
- arena_mapbits_unzeroed_get(chunk, run_ind));
- arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
- arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
+ if (arena_is_auto(arena)) {
+ return;
}
+ extent_list_append(&bin->slabs_full, slab);
+}
- arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages,
- flag_dirty);
-
- /* Insert into runs_avail, now that coalescing is complete. */
- assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
- arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
- assert(arena_mapbits_dirty_get(chunk, run_ind) ==
- arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
- arena_avail_insert(arena, chunk, run_ind, run_pages, true, true);
-
- /* Deallocate chunk if it is now completely unused. */
- if (size == arena_maxclass) {
- assert(run_ind == map_bias);
- assert(run_pages == (arena_maxclass >> LG_PAGE));
- arena_chunk_dealloc(arena, chunk);
+static void
+arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
+ if (arena_is_auto(arena)) {
+ return;
}
+ extent_list_remove(&bin->slabs_full, slab);
+}
+void
+arena_reset(tsd_t *tsd, arena_t *arena) {
/*
- * It is okay to do dirty page processing here even if the chunk was
- * deallocated above, since in that case it is the spare. Waiting
- * until after possible chunk deallocation to do dirty processing
- * allows for an old spare to be fully deallocated, thus decreasing the
- * chances of spuriously crossing the dirty page purging threshold.
+ * Locking in this function is unintuitive. The caller guarantees that
+ * no concurrent operations are happening in this arena, but there are
+ * still reasons that some locking is necessary:
+ *
+ * - Some of the functions in the transitive closure of calls assume
+ * appropriate locks are held, and in some cases these locks are
+ * temporarily dropped to avoid lock order reversal or deadlock due to
+ * reentry.
+ * - mallctl("epoch", ...) may concurrently refresh stats. While
+ * strictly speaking this is a "concurrent operation", disallowing
+ * stats refreshes would impose an inconvenient burden.
*/
- if (dirty)
- arena_maybe_purge(arena);
-}
-static void
-arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
- size_t oldsize, size_t newsize)
-{
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- size_t head_npages = (oldsize - newsize) >> LG_PAGE;
- size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
+ /* Large allocations. */
+ malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
- assert(oldsize > newsize);
+ for (extent_t *extent = extent_list_first(&arena->large); extent !=
+ NULL; extent = extent_list_first(&arena->large)) {
+ void *ptr = extent_base_get(extent);
+ size_t usize;
- /*
- * Update the chunk map so that arena_run_dalloc() can treat the
- * leading run as separately allocated. Set the last element of each
- * run first, in case of single-page runs.
- */
- assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
- arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
+ alloc_ctx_t alloc_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind != NSIZES);
- if (config_debug) {
- UNUSED size_t tail_npages = newsize >> LG_PAGE;
- assert(arena_mapbits_large_size_get(chunk,
- pageind+head_npages+tail_npages-1) == 0);
- assert(arena_mapbits_dirty_get(chunk,
- pageind+head_npages+tail_npages-1) == flag_dirty);
+ if (config_stats || (config_prof && opt_prof)) {
+ usize = sz_index2size(alloc_ctx.szind);
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+ }
+ /* Remove large allocation from prof sample set. */
+ if (config_prof && opt_prof) {
+ prof_free(tsd, ptr, usize, &alloc_ctx);
+ }
+ large_dalloc(tsd_tsdn(tsd), extent);
+ malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
+
+ /* Bins. */
+ for (unsigned i = 0; i < NBINS; i++) {
+ extent_t *slab;
+ bin_t *bin = &arena->bins[i];
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+ if (bin->slabcur != NULL) {
+ slab = bin->slabcur;
+ bin->slabcur = NULL;
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+ arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+ }
+ while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) !=
+ NULL) {
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+ arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+ }
+ for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
+ slab = extent_list_first(&bin->slabs_full)) {
+ arena_bin_slabs_full_remove(arena, bin, slab);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+ arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+ }
+ if (config_stats) {
+ bin->stats.curregs = 0;
+ bin->stats.curslabs = 0;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
}
- arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
- flag_dirty);
- arena_run_dalloc(arena, run, false, false);
+ atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
}
static void
-arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
- size_t oldsize, size_t newsize, bool dirty)
-{
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- size_t head_npages = newsize >> LG_PAGE;
- size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
-
- assert(oldsize > newsize);
-
+arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
/*
- * Update the chunk map so that arena_run_dalloc() can treat the
- * trailing run as separately allocated. Set the last element of each
- * run first, in case of single-page runs.
+ * Iterate over the retained extents and destroy them. This gives the
+ * extent allocator underlying the extent hooks an opportunity to unmap
+ * all retained memory without having to keep its own metadata
+ * structures. In practice, virtual memory for dss-allocated extents is
+ * leaked here, so best practice is to avoid dss for arenas to be
+ * destroyed, or provide custom extent hooks that track retained
+ * dss-based extents for later reuse.
*/
- assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
- arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
- arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty);
-
- if (config_debug) {
- UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
- assert(arena_mapbits_large_size_get(chunk,
- pageind+head_npages+tail_npages-1) == 0);
- assert(arena_mapbits_dirty_get(chunk,
- pageind+head_npages+tail_npages-1) == flag_dirty);
+ extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+ extent_t *extent;
+ while ((extent = extents_evict(tsdn, arena, &extent_hooks,
+ &arena->extents_retained, 0)) != NULL) {
+ extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
}
- arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
- flag_dirty);
-
- arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
- dirty, false);
}
-static arena_run_t *
-arena_bin_runs_first(arena_bin_t *bin)
-{
- arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs);
- if (mapelm != NULL) {
- arena_chunk_t *chunk;
- size_t pageind;
- arena_run_t *run;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
- pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
- sizeof(arena_chunk_map_t))) + map_bias;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) <<
- LG_PAGE));
- return (run);
- }
+void
+arena_destroy(tsd_t *tsd, arena_t *arena) {
+ assert(base_ind_get(arena->base) >= narenas_auto);
+ assert(arena_nthreads_get(arena, false) == 0);
+ assert(arena_nthreads_get(arena, true) == 0);
- return (NULL);
-}
+ /*
+ * No allocations have occurred since arena_reset() was called.
+ * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
+ * extents, so only retained extents may remain.
+ */
+ assert(extents_npages_get(&arena->extents_dirty) == 0);
+ assert(extents_npages_get(&arena->extents_muzzy) == 0);
-static void
-arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
-{
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+ /* Deallocate retained memory. */
+ arena_destroy_retained(tsd_tsdn(tsd), arena);
- assert(arena_run_tree_search(&bin->runs, mapelm) == NULL);
+ /*
+ * Remove the arena pointer from the arenas array. We rely on the fact
+ * that there is no way for the application to get a dirty read from the
+ * arenas array unless there is an inherent race in the application
+ * involving access of an arena being concurrently destroyed. The
+ * application must synchronize knowledge of the arena's validity, so as
+ * long as we use an atomic write to update the arenas array, the
+ * application will get a clean read any time after it synchronizes
+ * knowledge that the arena is no longer valid.
+ */
+ arena_set(base_ind_get(arena->base), NULL);
- arena_run_tree_insert(&bin->runs, mapelm);
+ /*
+ * Destroy the base allocator, which manages all metadata ever mapped by
+ * this arena.
+ */
+ base_delete(tsd_tsdn(tsd), arena->base);
}
-static void
-arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
-{
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
-
- assert(arena_run_tree_search(&bin->runs, mapelm) != NULL);
+static extent_t *
+arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info,
+ szind_t szind) {
+ extent_t *slab;
+ bool zero, commit;
- arena_run_tree_remove(&bin->runs, mapelm);
-}
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
-static arena_run_t *
-arena_bin_nonfull_run_tryget(arena_bin_t *bin)
-{
- arena_run_t *run = arena_bin_runs_first(bin);
- if (run != NULL) {
- arena_bin_runs_remove(bin, run);
- if (config_stats)
- bin->stats.reruns++;
+ zero = false;
+ commit = true;
+ slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
+ bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
+
+ if (config_stats && slab != NULL) {
+ arena_stats_mapped_add(tsdn, &arena->stats,
+ bin_info->slab_size);
+ }
+
+ return slab;
+}
+
+static extent_t *
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+ const bin_info_t *bin_info) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+ szind_t szind = sz_size2index(bin_info->reg_size);
+ bool zero = false;
+ bool commit = true;
+ extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
+ binind, &zero, &commit);
+ if (slab == NULL) {
+ slab = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
+ true, binind, &zero, &commit);
+ }
+ if (slab == NULL) {
+ slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
+ bin_info, szind);
+ if (slab == NULL) {
+ return NULL;
+ }
}
- return (run);
+ assert(extent_slab_get(slab));
+
+ /* Initialize slab internals. */
+ arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+ extent_nfree_set(slab, bin_info->nregs);
+ bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
+
+ arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
+
+ return slab;
}
-static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
-{
- arena_run_t *run;
- size_t binind;
- arena_bin_info_t *bin_info;
+static extent_t *
+arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+ szind_t binind) {
+ extent_t *slab;
+ const bin_info_t *bin_info;
- /* Look for a usable run. */
- run = arena_bin_nonfull_run_tryget(bin);
- if (run != NULL)
- return (run);
- /* No existing runs have any space available. */
+ /* Look for a usable slab. */
+ slab = arena_bin_slabs_nonfull_tryget(bin);
+ if (slab != NULL) {
+ return slab;
+ }
+ /* No existing slabs have any space available. */
- binind = arena_bin_index(arena, bin);
- bin_info = &arena_bin_info[binind];
+ bin_info = &bin_infos[binind];
- /* Allocate a new run. */
- malloc_mutex_unlock(&bin->lock);
+ /* Allocate a new slab. */
+ malloc_mutex_unlock(tsdn, &bin->lock);
/******************************/
- malloc_mutex_lock(&arena->lock);
- run = arena_run_alloc_small(arena, bin_info->run_size, binind);
- if (run != NULL) {
- bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
- (uintptr_t)bin_info->bitmap_offset);
-
- /* Initialize run internals. */
- run->bin = bin;
- run->nextind = 0;
- run->nfree = bin_info->nregs;
- bitmap_init(bitmap, &bin_info->bitmap_info);
- }
- malloc_mutex_unlock(&arena->lock);
+ slab = arena_slab_alloc(tsdn, arena, binind, bin_info);
/********************************/
- malloc_mutex_lock(&bin->lock);
- if (run != NULL) {
+ malloc_mutex_lock(tsdn, &bin->lock);
+ if (slab != NULL) {
if (config_stats) {
- bin->stats.nruns++;
- bin->stats.curruns++;
+ bin->stats.nslabs++;
+ bin->stats.curslabs++;
}
- return (run);
+ return slab;
}
/*
- * arena_run_alloc_small() failed, but another thread may have made
+ * arena_slab_alloc() failed, but another thread may have made
* sufficient memory available while this one dropped bin->lock above,
* so search one more time.
*/
- run = arena_bin_nonfull_run_tryget(bin);
- if (run != NULL)
- return (run);
+ slab = arena_bin_slabs_nonfull_tryget(bin);
+ if (slab != NULL) {
+ return slab;
+ }
- return (NULL);
+ return NULL;
}
-/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
+/* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
static void *
-arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
-{
- void *ret;
- size_t binind;
- arena_bin_info_t *bin_info;
- arena_run_t *run;
-
- binind = arena_bin_index(arena, bin);
- bin_info = &arena_bin_info[binind];
- bin->runcur = NULL;
- run = arena_bin_nonfull_run_get(arena, bin);
- if (bin->runcur != NULL && bin->runcur->nfree > 0) {
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+ szind_t binind) {
+ const bin_info_t *bin_info;
+ extent_t *slab;
+
+ bin_info = &bin_infos[binind];
+ if (!arena_is_auto(arena) && bin->slabcur != NULL) {
+ arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
+ bin->slabcur = NULL;
+ }
+ slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind);
+ if (bin->slabcur != NULL) {
/*
- * Another thread updated runcur while this one ran without the
- * bin lock in arena_bin_nonfull_run_get().
+ * Another thread updated slabcur while this one ran without the
+ * bin lock in arena_bin_nonfull_slab_get().
*/
- assert(bin->runcur->nfree > 0);
- ret = arena_run_reg_alloc(bin->runcur, bin_info);
- if (run != NULL) {
- arena_chunk_t *chunk;
-
- /*
- * arena_run_alloc_small() may have allocated run, or
- * it may have pulled run from the bin's run tree.
- * Therefore it is unsafe to make any assumptions about
- * how run has previously been used, and
- * arena_bin_lower_run() must be called, as if a region
- * were just deallocated from the run.
- */
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
- if (run->nfree == bin_info->nregs)
- arena_dalloc_bin_run(arena, chunk, run, bin);
- else
- arena_bin_lower_run(arena, chunk, run, bin);
+ if (extent_nfree_get(bin->slabcur) > 0) {
+ void *ret = arena_slab_reg_alloc(bin->slabcur,
+ bin_info);
+ if (slab != NULL) {
+ /*
+ * arena_slab_alloc() may have allocated slab,
+ * or it may have been pulled from
+ * slabs_nonfull. Therefore it is unsafe to
+ * make any assumptions about how slab has
+ * previously been used, and
+ * arena_bin_lower_slab() must be called, as if
+ * a region were just deallocated from the slab.
+ */
+ if (extent_nfree_get(slab) == bin_info->nregs) {
+ arena_dalloc_bin_slab(tsdn, arena, slab,
+ bin);
+ } else {
+ arena_bin_lower_slab(tsdn, arena, slab,
+ bin);
+ }
+ }
+ return ret;
}
- return (ret);
- }
- if (run == NULL)
- return (NULL);
+ arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
+ bin->slabcur = NULL;
+ }
- bin->runcur = run;
+ if (slab == NULL) {
+ return NULL;
+ }
+ bin->slabcur = slab;
- assert(bin->runcur->nfree > 0);
+ assert(extent_nfree_get(bin->slabcur) > 0);
- return (arena_run_reg_alloc(bin->runcur, bin_info));
+ return arena_slab_reg_alloc(slab, bin_info);
}
void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
- uint64_t prof_accumbytes)
-{
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+ cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
unsigned i, nfill;
- arena_bin_t *bin;
- arena_run_t *run;
- void *ptr;
+ bin_t *bin;
assert(tbin->ncached == 0);
- if (config_prof && arena_prof_accum(arena, prof_accumbytes))
- prof_idump();
+ if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
+ prof_idump(tsdn);
+ }
bin = &arena->bins[binind];
- malloc_mutex_lock(&bin->lock);
+ malloc_mutex_lock(tsdn, &bin->lock);
for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
- tbin->lg_fill_div); i < nfill; i++) {
- if ((run = bin->runcur) != NULL && run->nfree > 0)
- ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
- else
- ptr = arena_bin_malloc_hard(arena, bin);
- if (ptr == NULL)
+ tcache->lg_fill_div[binind]); i < nfill; i++) {
+ extent_t *slab;
+ void *ptr;
+ if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
+ 0) {
+ ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]);
+ } else {
+ ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
+ }
+ if (ptr == NULL) {
+ /*
+ * OOM. tbin->avail isn't yet filled down to its first
+ * element, so the successful allocations (if any) must
+ * be moved just before tbin->avail before bailing out.
+ */
+ if (i > 0) {
+ memmove(tbin->avail - i, tbin->avail - nfill,
+ i * sizeof(void *));
+ }
break;
- if (config_fill && opt_junk) {
- arena_alloc_junk_small(ptr, &arena_bin_info[binind],
- true);
+ }
+ if (config_fill && unlikely(opt_junk_alloc)) {
+ arena_alloc_junk_small(ptr, &bin_infos[binind], true);
}
/* Insert such that low regions get used first. */
- tbin->avail[nfill - 1 - i] = ptr;
+ *(tbin->avail - nfill + i) = ptr;
}
if (config_stats) {
- bin->stats.allocated += i * arena_bin_info[binind].reg_size;
bin->stats.nmalloc += i;
bin->stats.nrequests += tbin->tstats.nrequests;
+ bin->stats.curregs += i;
bin->stats.nfills++;
tbin->tstats.nrequests = 0;
}
- malloc_mutex_unlock(&bin->lock);
+ malloc_mutex_unlock(tsdn, &bin->lock);
tbin->ncached = i;
+ arena_decay_tick(tsdn, arena);
}
void
-arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
-{
-
- if (zero) {
- size_t redzone_size = bin_info->redzone_size;
- memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
- redzone_size);
- memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
- redzone_size);
- } else {
- memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
- bin_info->reg_interval);
+arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) {
+ if (!zero) {
+ memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
}
}
-#ifdef JEMALLOC_JET
-#undef arena_redzone_corruption
-#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
-#endif
-static void
-arena_redzone_corruption(void *ptr, size_t usize, bool after,
- size_t offset, uint8_t byte)
-{
-
- malloc_printf("<jemalloc>: Corrupt redzone %zu byte%s %s %p "
- "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s",
- after ? "after" : "before", ptr, usize, byte);
-}
-#ifdef JEMALLOC_JET
-#undef arena_redzone_corruption
-#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
-arena_redzone_corruption_t *arena_redzone_corruption =
- JEMALLOC_N(arena_redzone_corruption_impl);
-#endif
-
static void
-arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
-{
- size_t size = bin_info->reg_size;
- size_t redzone_size = bin_info->redzone_size;
- size_t i;
- bool error = false;
-
- for (i = 1; i <= redzone_size; i++) {
- uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
- if (*byte != 0xa5) {
- error = true;
- arena_redzone_corruption(ptr, size, false, i, *byte);
- if (reset)
- *byte = 0xa5;
- }
- }
- for (i = 0; i < redzone_size; i++) {
- uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
- if (*byte != 0xa5) {
- error = true;
- arena_redzone_corruption(ptr, size, true, i, *byte);
- if (reset)
- *byte = 0xa5;
- }
- }
- if (opt_abort && error)
- abort();
-}
-
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_small
-#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
-#endif
-void
-arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
-{
- size_t redzone_size = bin_info->redzone_size;
-
- arena_redzones_validate(ptr, bin_info, false);
- memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
- bin_info->reg_interval);
-}
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_small
-#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
-arena_dalloc_junk_small_t *arena_dalloc_junk_small =
- JEMALLOC_N(arena_dalloc_junk_small_impl);
-#endif
-
-void
-arena_quarantine_junk_small(void *ptr, size_t usize)
-{
- size_t binind;
- arena_bin_info_t *bin_info;
- cassert(config_fill);
- assert(opt_junk);
- assert(opt_quarantine);
- assert(usize <= SMALL_MAXCLASS);
-
- binind = SMALL_SIZE2BIN(usize);
- bin_info = &arena_bin_info[binind];
- arena_redzones_validate(ptr, bin_info, true);
+arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
+ memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
}
+arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
+ arena_dalloc_junk_small_impl;
-void *
-arena_malloc_small(arena_t *arena, size_t size, bool zero)
-{
+static void *
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
void *ret;
- arena_bin_t *bin;
- arena_run_t *run;
- size_t binind;
+ bin_t *bin;
+ size_t usize;
+ extent_t *slab;
- binind = SMALL_SIZE2BIN(size);
assert(binind < NBINS);
bin = &arena->bins[binind];
- size = arena_bin_info[binind].reg_size;
+ usize = sz_index2size(binind);
- malloc_mutex_lock(&bin->lock);
- if ((run = bin->runcur) != NULL && run->nfree > 0)
- ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
- else
- ret = arena_bin_malloc_hard(arena, bin);
+ malloc_mutex_lock(tsdn, &bin->lock);
+ if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
+ ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
+ } else {
+ ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
+ }
if (ret == NULL) {
- malloc_mutex_unlock(&bin->lock);
- return (NULL);
+ malloc_mutex_unlock(tsdn, &bin->lock);
+ return NULL;
}
if (config_stats) {
- bin->stats.allocated += size;
bin->stats.nmalloc++;
bin->stats.nrequests++;
+ bin->stats.curregs++;
+ }
+ malloc_mutex_unlock(tsdn, &bin->lock);
+ if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
+ prof_idump(tsdn);
}
- malloc_mutex_unlock(&bin->lock);
- if (config_prof && isthreaded == false && arena_prof_accum(arena, size))
- prof_idump();
- if (zero == false) {
+ if (!zero) {
if (config_fill) {
- if (opt_junk) {
+ if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
- &arena_bin_info[binind], false);
- } else if (opt_zero)
- memset(ret, 0, size);
+ &bin_infos[binind], false);
+ } else if (unlikely(opt_zero)) {
+ memset(ret, 0, usize);
+ }
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
} else {
- if (config_fill && opt_junk) {
- arena_alloc_junk_small(ret, &arena_bin_info[binind],
+ if (config_fill && unlikely(opt_junk_alloc)) {
+ arena_alloc_junk_small(ret, &bin_infos[binind],
true);
}
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
+ memset(ret, 0, usize);
}
- return (ret);
+ arena_decay_tick(tsdn, arena);
+ return ret;
}
void *
-arena_malloc_large(arena_t *arena, size_t size, bool zero)
-{
- void *ret;
- UNUSED bool idump;
+arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+ bool zero) {
+ assert(!tsdn_null(tsdn) || arena != NULL);
- /* Large allocation. */
- size = PAGE_CEILING(size);
- malloc_mutex_lock(&arena->lock);
- ret = (void *)arena_run_alloc_large(arena, size, zero);
- if (ret == NULL) {
- malloc_mutex_unlock(&arena->lock);
- return (NULL);
+ if (likely(!tsdn_null(tsdn))) {
+ arena = arena_choose(tsdn_tsd(tsdn), arena);
}
- if (config_stats) {
- arena->stats.nmalloc_large++;
- arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
- }
- if (config_prof)
- idump = arena_prof_accum_locked(arena, size);
- malloc_mutex_unlock(&arena->lock);
- if (config_prof && idump)
- prof_idump();
-
- if (zero == false) {
- if (config_fill) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
- }
+ if (unlikely(arena == NULL)) {
+ return NULL;
}
- return (ret);
+ if (likely(size <= SMALL_MAXCLASS)) {
+ return arena_malloc_small(tsdn, arena, ind, zero);
+ }
+ return large_malloc(tsdn, arena, sz_index2size(ind), zero);
}
-/* Only handles large allocations that require more than page alignment. */
void *
-arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
-{
+arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+ bool zero, tcache_t *tcache) {
void *ret;
- size_t alloc_size, leadsize, trailsize;
- arena_run_t *run;
- arena_chunk_t *chunk;
- assert((size & PAGE_MASK) == 0);
+ if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
+ && (usize & PAGE_MASK) == 0))) {
+ /* Small; alignment doesn't require special slab placement. */
+ ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+ zero, tcache, true);
+ } else {
+ if (likely(alignment <= CACHELINE)) {
+ ret = large_malloc(tsdn, arena, usize, zero);
+ } else {
+ ret = large_palloc(tsdn, arena, usize, alignment, zero);
+ }
+ }
+ return ret;
+}
+
+void
+arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+ assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
+ assert(usize <= SMALL_MAXCLASS);
- alignment = PAGE_CEILING(alignment);
- alloc_size = size + alignment - PAGE;
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
- malloc_mutex_lock(&arena->lock);
- run = arena_run_alloc_large(arena, alloc_size, false);
- if (run == NULL) {
- malloc_mutex_unlock(&arena->lock);
- return (NULL);
- }
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+ extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true);
+ arena_t *arena = extent_arena_get(extent);
- leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) -
- (uintptr_t)run;
- assert(alloc_size >= leadsize + size);
- trailsize = alloc_size - leadsize - size;
- ret = (void *)((uintptr_t)run + leadsize);
- if (leadsize != 0) {
- arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size -
- leadsize);
- }
- if (trailsize != 0) {
- arena_run_trim_tail(arena, chunk, ret, size + trailsize, size,
- false);
- }
- arena_run_init_large(arena, (arena_run_t *)ret, size, zero);
+ szind_t szind = sz_size2index(usize);
+ extent_szind_set(extent, szind);
+ rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+ szind, false);
- if (config_stats) {
- arena->stats.nmalloc_large++;
- arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
- }
- malloc_mutex_unlock(&arena->lock);
+ prof_accum_cancel(tsdn, &arena->prof_accum, usize);
- if (config_fill && zero == false) {
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
- }
- return (ret);
+ assert(isalloc(tsdn, ptr) == usize);
}
-void
-arena_prof_promoted(const void *ptr, size_t size)
-{
- arena_chunk_t *chunk;
- size_t pageind, binind;
-
+static size_t
+arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
cassert(config_prof);
assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
- assert(isalloc(ptr, false) == PAGE);
- assert(isalloc(ptr, true) == PAGE);
- assert(size <= SMALL_MAXCLASS);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- binind = SMALL_SIZE2BIN(size);
- assert(binind < NBINS);
- arena_mapbits_large_binind_set(chunk, pageind, binind);
- assert(isalloc(ptr, false) == PAGE);
- assert(isalloc(ptr, true) == size);
+ extent_szind_set(extent, NBINS);
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+ NBINS, false);
+
+ assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
+
+ return LARGE_MINCLASS;
+}
+
+void
+arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+ bool slow_path) {
+ cassert(config_prof);
+ assert(opt_prof);
+
+ extent_t *extent = iealloc(tsdn, ptr);
+ size_t usize = arena_prof_demote(tsdn, extent, ptr);
+ if (usize <= tcache_maxclass) {
+ tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+ sz_size2index(usize), slow_path);
+ } else {
+ large_dalloc(tsdn, extent);
+ }
}
static void
-arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
- arena_bin_t *bin)
-{
-
- /* Dissociate run from bin. */
- if (run == bin->runcur)
- bin->runcur = NULL;
- else {
- size_t binind = arena_bin_index(chunk->arena, bin);
- arena_bin_info_t *bin_info = &arena_bin_info[binind];
-
- if (bin_info->nregs != 1) {
- /*
- * This block's conditional is necessary because if the
- * run only contains one region, then it never gets
- * inserted into the non-full runs tree.
- */
- arena_bin_runs_remove(bin, run);
+arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
+ /* Dissociate slab from bin. */
+ if (slab == bin->slabcur) {
+ bin->slabcur = NULL;
+ } else {
+ szind_t binind = extent_szind_get(slab);
+ const bin_info_t *bin_info = &bin_infos[binind];
+
+ /*
+ * The following block's conditional is necessary because if the
+ * slab only contains one region, then it never gets inserted
+ * into the non-full slabs heap.
+ */
+ if (bin_info->nregs == 1) {
+ arena_bin_slabs_full_remove(arena, bin, slab);
+ } else {
+ arena_bin_slabs_nonfull_remove(bin, slab);
}
}
}
static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
- arena_bin_t *bin)
-{
- size_t binind;
- arena_bin_info_t *bin_info;
- size_t npages, run_ind, past;
-
- assert(run != bin->runcur);
- assert(arena_run_tree_search(&bin->runs,
- arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE))
- == NULL);
-
- binind = arena_bin_index(chunk->arena, run->bin);
- bin_info = &arena_bin_info[binind];
-
- malloc_mutex_unlock(&bin->lock);
- /******************************/
- npages = bin_info->run_size >> LG_PAGE;
- run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
- past = (size_t)(PAGE_CEILING((uintptr_t)run +
- (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
- bin_info->reg_interval - bin_info->redzone_size) -
- (uintptr_t)chunk) >> LG_PAGE);
- malloc_mutex_lock(&arena->lock);
+arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+ bin_t *bin) {
+ assert(slab != bin->slabcur);
- /*
- * If the run was originally clean, and some pages were never touched,
- * trim the clean pages before deallocating the dirty portion of the
- * run.
- */
- assert(arena_mapbits_dirty_get(chunk, run_ind) ==
- arena_mapbits_dirty_get(chunk, run_ind+npages-1));
- if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
- npages) {
- /* Trim clean pages. Convert to large run beforehand. */
- assert(npages > 0);
- arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0);
- arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0);
- arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
- ((past - run_ind) << LG_PAGE), false);
- /* npages = past - run_ind; */
- }
- arena_run_dalloc(arena, run, true, false);
- malloc_mutex_unlock(&arena->lock);
+ malloc_mutex_unlock(tsdn, &bin->lock);
+ /******************************/
+ arena_slab_dalloc(tsdn, arena, slab);
/****************************/
- malloc_mutex_lock(&bin->lock);
- if (config_stats)
- bin->stats.curruns--;
+ malloc_mutex_lock(tsdn, &bin->lock);
+ if (config_stats) {
+ bin->stats.curslabs--;
+ }
}
static void
-arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
- arena_bin_t *bin)
-{
+arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+ bin_t *bin) {
+ assert(extent_nfree_get(slab) > 0);
/*
- * Make sure that if bin->runcur is non-NULL, it refers to the lowest
- * non-full run. It is okay to NULL runcur out rather than proactively
- * keeping it pointing at the lowest non-full run.
+ * Make sure that if bin->slabcur is non-NULL, it refers to the
+ * oldest/lowest non-full slab. It is okay to NULL slabcur out rather
+ * than proactively keeping it pointing at the oldest/lowest non-full
+ * slab.
*/
- if ((uintptr_t)run < (uintptr_t)bin->runcur) {
- /* Switch runcur. */
- if (bin->runcur->nfree > 0)
- arena_bin_runs_insert(bin, bin->runcur);
- bin->runcur = run;
- if (config_stats)
- bin->stats.reruns++;
- } else
- arena_bin_runs_insert(bin, run);
+ if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
+ /* Switch slabcur. */
+ if (extent_nfree_get(bin->slabcur) > 0) {
+ arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
+ } else {
+ arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
+ }
+ bin->slabcur = slab;
+ if (config_stats) {
+ bin->stats.reslabs++;
+ }
+ } else {
+ arena_bin_slabs_nonfull_insert(bin, slab);
+ }
}
-void
-arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- arena_chunk_map_t *mapelm)
-{
- size_t pageind;
- arena_run_t *run;
- arena_bin_t *bin;
- arena_bin_info_t *bin_info;
- size_t size, binind;
-
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
- bin = run->bin;
- binind = arena_ptr_small_binind_get(ptr, mapelm->bits);
- bin_info = &arena_bin_info[binind];
- if (config_fill || config_stats)
- size = bin_info->reg_size;
-
- if (config_fill && opt_junk)
+static void
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+ void *ptr, bool junked) {
+ arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+ szind_t binind = extent_szind_get(slab);
+ bin_t *bin = &arena->bins[binind];
+ const bin_info_t *bin_info = &bin_infos[binind];
+
+ if (!junked && config_fill && unlikely(opt_junk_free)) {
arena_dalloc_junk_small(ptr, bin_info);
+ }
- arena_run_reg_dalloc(run, ptr);
- if (run->nfree == bin_info->nregs) {
- arena_dissociate_bin_run(chunk, run, bin);
- arena_dalloc_bin_run(arena, chunk, run, bin);
- } else if (run->nfree == 1 && run != bin->runcur)
- arena_bin_lower_run(arena, chunk, run, bin);
+ arena_slab_reg_dalloc(slab, slab_data, ptr);
+ unsigned nfree = extent_nfree_get(slab);
+ if (nfree == bin_info->nregs) {
+ arena_dissociate_bin_slab(arena, slab, bin);
+ arena_dalloc_bin_slab(tsdn, arena, slab, bin);
+ } else if (nfree == 1 && slab != bin->slabcur) {
+ arena_bin_slabs_full_remove(arena, bin, slab);
+ arena_bin_lower_slab(tsdn, arena, slab, bin);
+ }
if (config_stats) {
- bin->stats.allocated -= size;
bin->stats.ndalloc++;
+ bin->stats.curregs--;
}
}
void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind, arena_chunk_map_t *mapelm)
-{
- arena_run_t *run;
- arena_bin_t *bin;
-
- run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
- arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
- bin = run->bin;
- malloc_mutex_lock(&bin->lock);
- arena_dalloc_bin_locked(arena, chunk, ptr, mapelm);
- malloc_mutex_unlock(&bin->lock);
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+ void *ptr) {
+ arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true);
}
-void
-arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t pageind)
-{
- arena_chunk_map_t *mapelm;
+static void
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
+ szind_t binind = extent_szind_get(extent);
+ bin_t *bin = &arena->bins[binind];
- if (config_debug) {
- /* arena_ptr_small_binind_get() does extra sanity checking. */
- assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
- pageind)) != BININD_INVALID);
- }
- mapelm = arena_mapp_get(chunk, pageind);
- arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm);
+ malloc_mutex_lock(tsdn, &bin->lock);
+ arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);
+ malloc_mutex_unlock(tsdn, &bin->lock);
}
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_large
-#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
-#endif
-static void
-arena_dalloc_junk_large(void *ptr, size_t usize)
-{
+void
+arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
+ extent_t *extent = iealloc(tsdn, ptr);
+ arena_t *arena = extent_arena_get(extent);
- if (config_fill && opt_junk)
- memset(ptr, 0x5a, usize);
+ arena_dalloc_bin(tsdn, arena, extent, ptr);
+ arena_decay_tick(tsdn, arena);
}
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_large
-#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
-arena_dalloc_junk_large_t *arena_dalloc_junk_large =
- JEMALLOC_N(arena_dalloc_junk_large_impl);
-#endif
-void
-arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
-{
+bool
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+ size_t extra, bool zero) {
+ /* Calls with non-zero extra had to clamp extra. */
+ assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
- if (config_fill || config_stats) {
- size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- size_t usize = arena_mapbits_large_size_get(chunk, pageind);
+ if (unlikely(size > LARGE_MAXCLASS)) {
+ return true;
+ }
- arena_dalloc_junk_large(ptr, usize);
- if (config_stats) {
- arena->stats.ndalloc_large++;
- arena->stats.allocated_large -= usize;
- arena->stats.lstats[(usize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(usize >> LG_PAGE) - 1].curruns--;
+ extent_t *extent = iealloc(tsdn, ptr);
+ size_t usize_min = sz_s2u(size);
+ size_t usize_max = sz_s2u(size + extra);
+ if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
+ /*
+ * Avoid moving the allocation if the size class can be left the
+ * same.
+ */
+ assert(bin_infos[sz_size2index(oldsize)].reg_size ==
+ oldsize);
+ if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
+ sz_size2index(oldsize)) && (size > oldsize || usize_max <
+ oldsize)) {
+ return true;
}
+
+ arena_decay_tick(tsdn, extent_arena_get(extent));
+ return false;
+ } else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
+ return large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+ zero);
}
- arena_run_dalloc(arena, (arena_run_t *)ptr, true, false);
+ return true;
}
-void
-arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
-{
-
- malloc_mutex_lock(&arena->lock);
- arena_dalloc_large_locked(arena, chunk, ptr);
- malloc_mutex_unlock(&arena->lock);
+static void *
+arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache) {
+ if (alignment == 0) {
+ return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+ zero, tcache, true);
+ }
+ usize = sz_sa2u(usize, alignment);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ return NULL;
+ }
+ return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
}
-static void
-arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t oldsize, size_t size)
-{
+void *
+arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+ size_t size, size_t alignment, bool zero, tcache_t *tcache) {
+ size_t usize = sz_s2u(size);
+ if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
+ return NULL;
+ }
- assert(size < oldsize);
+ if (likely(usize <= SMALL_MAXCLASS)) {
+ /* Try to avoid moving the allocation. */
+ if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) {
+ return ptr;
+ }
+ }
+
+ if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
+ return large_ralloc(tsdn, arena, iealloc(tsdn, ptr), usize,
+ alignment, zero, tcache);
+ }
/*
- * Shrink the run, and make trailing pages available for other
- * allocations.
+ * size and oldsize are different enough that we need to move the
+ * object. In that case, fall back to allocating new space and copying.
*/
- malloc_mutex_lock(&arena->lock);
- arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
- true);
- if (config_stats) {
- arena->stats.ndalloc_large++;
- arena->stats.allocated_large -= oldsize;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
-
- arena->stats.nmalloc_large++;
- arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
+ void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
+ zero, tcache);
+ if (ret == NULL) {
+ return NULL;
}
- malloc_mutex_unlock(&arena->lock);
-}
-static bool
-arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
- size_t oldsize, size_t size, size_t extra, bool zero)
-{
- size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
- size_t npages = oldsize >> LG_PAGE;
- size_t followsize;
-
- assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
-
- /* Try to extend the run. */
- assert(size + extra > oldsize);
- malloc_mutex_lock(&arena->lock);
- if (pageind + npages < chunk_npages &&
- arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
- (followsize = arena_mapbits_unallocated_size_get(chunk,
- pageind+npages)) >= size - oldsize) {
- /*
- * The next run is available and sufficiently large. Split the
- * following run, then merge the first part with the existing
- * allocation.
- */
- size_t flag_dirty;
- size_t splitsize = (oldsize + followsize <= size + extra)
- ? followsize : size + extra - oldsize;
- arena_run_split_large(arena, (arena_run_t *)((uintptr_t)chunk +
- ((pageind+npages) << LG_PAGE)), splitsize, zero);
+ /*
+ * Junk/zero-filling were already done by
+ * ipalloc()/arena_malloc().
+ */
- size = oldsize + splitsize;
- npages = size >> LG_PAGE;
+ size_t copysize = (usize < oldsize) ? usize : oldsize;
+ memcpy(ret, ptr, copysize);
+ isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
+ return ret;
+}
- /*
- * Mark the extended run as dirty if either portion of the run
- * was dirty before allocation. This is rather pedantic,
- * because there's not actually any sequence of events that
- * could cause the resulting run to be passed to
- * arena_run_dalloc() with the dirty argument set to false
- * (which is when dirty flag consistency would really matter).
- */
- flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
- arena_mapbits_dirty_get(chunk, pageind+npages-1);
- arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
- arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
+dss_prec_t
+arena_dss_prec_get(arena_t *arena) {
+ return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
+}
- if (config_stats) {
- arena->stats.ndalloc_large++;
- arena->stats.allocated_large -= oldsize;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
- arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
-
- arena->stats.nmalloc_large++;
- arena->stats.nrequests_large++;
- arena->stats.allocated_large += size;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
- arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
- }
- malloc_mutex_unlock(&arena->lock);
- return (false);
+bool
+arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
+ if (!have_dss) {
+ return (dss_prec != dss_prec_disabled);
}
- malloc_mutex_unlock(&arena->lock);
-
- return (true);
+ atomic_store_u(&arena->dss_prec, (unsigned)dss_prec, ATOMIC_RELEASE);
+ return false;
}
-#ifdef JEMALLOC_JET
-#undef arena_ralloc_junk_large
-#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
-#endif
-static void
-arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
-{
+ssize_t
+arena_dirty_decay_ms_default_get(void) {
+ return atomic_load_zd(&dirty_decay_ms_default, ATOMIC_RELAXED);
+}
- if (config_fill && opt_junk) {
- memset((void *)((uintptr_t)ptr + usize), 0x5a,
- old_usize - usize);
+bool
+arena_dirty_decay_ms_default_set(ssize_t decay_ms) {
+ if (!arena_decay_ms_valid(decay_ms)) {
+ return true;
}
+ atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED);
+ return false;
}
-#ifdef JEMALLOC_JET
-#undef arena_ralloc_junk_large
-#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
-arena_ralloc_junk_large_t *arena_ralloc_junk_large =
- JEMALLOC_N(arena_ralloc_junk_large_impl);
-#endif
-/*
- * Try to resize a large allocation, in order to avoid copying. This will
- * always fail if growing an object, and the following run is already in use.
- */
-static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
- bool zero)
-{
- size_t psize;
-
- psize = PAGE_CEILING(size + extra);
- if (psize == oldsize) {
- /* Same size class. */
- return (false);
- } else {
- arena_chunk_t *chunk;
- arena_t *arena;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- arena = chunk->arena;
-
- if (psize < oldsize) {
- /* Fill before shrinking in order avoid a race. */
- arena_ralloc_junk_large(ptr, oldsize, psize);
- arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
- psize);
- return (false);
- } else {
- bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
- oldsize, PAGE_CEILING(size),
- psize - PAGE_CEILING(size), zero);
- if (config_fill && ret == false && zero == false) {
- if (opt_junk) {
- memset((void *)((uintptr_t)ptr +
- oldsize), 0xa5, isalloc(ptr,
- config_prof) - oldsize);
- } else if (opt_zero) {
- memset((void *)((uintptr_t)ptr +
- oldsize), 0, isalloc(ptr,
- config_prof) - oldsize);
- }
- }
- return (ret);
- }
- }
+ssize_t
+arena_muzzy_decay_ms_default_get(void) {
+ return atomic_load_zd(&muzzy_decay_ms_default, ATOMIC_RELAXED);
}
bool
-arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
- bool zero)
-{
-
- /*
- * Avoid moving the allocation if the size class can be left the same.
- */
- if (oldsize <= arena_maxclass) {
- if (oldsize <= SMALL_MAXCLASS) {
- assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
- == oldsize);
- if ((size + extra <= SMALL_MAXCLASS &&
- SMALL_SIZE2BIN(size + extra) ==
- SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
- size + extra >= oldsize))
- return (false);
- } else {
- assert(size <= arena_maxclass);
- if (size + extra > SMALL_MAXCLASS) {
- if (arena_ralloc_large(ptr, oldsize, size,
- extra, zero) == false)
- return (false);
- }
- }
+arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
+ if (!arena_decay_ms_valid(decay_ms)) {
+ return true;
}
-
- /* Reallocation would require a move. */
- return (true);
+ atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED);
+ return false;
}
-void *
-arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
- size_t extra, size_t alignment, bool zero, bool try_tcache_alloc,
- bool try_tcache_dalloc)
-{
- void *ret;
- size_t copysize;
-
- /* Try to avoid moving the allocation. */
- if (arena_ralloc_no_move(ptr, oldsize, size, extra, zero) == false)
- return (ptr);
-
- /*
- * size and oldsize are different enough that we need to move the
- * object. In that case, fall back to allocating new space and
- * copying.
- */
- if (alignment != 0) {
- size_t usize = sa2u(size + extra, alignment);
- if (usize == 0)
- return (NULL);
- ret = ipalloct(usize, alignment, zero, try_tcache_alloc, arena);
- } else
- ret = arena_malloc(arena, size + extra, zero, try_tcache_alloc);
-
- if (ret == NULL) {
- if (extra == 0)
- return (NULL);
- /* Try again, this time without extra. */
- if (alignment != 0) {
- size_t usize = sa2u(size, alignment);
- if (usize == 0)
- return (NULL);
- ret = ipalloct(usize, alignment, zero, try_tcache_alloc,
- arena);
- } else
- ret = arena_malloc(arena, size, zero, try_tcache_alloc);
-
- if (ret == NULL)
- return (NULL);
+bool
+arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
+ size_t *new_limit) {
+ assert(opt_retain);
+
+ pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+ if (new_limit != NULL) {
+ size_t limit = *new_limit;
+ /* Grow no more than the new limit. */
+ if ((new_ind = sz_psz2ind(limit + 1) - 1) >
+ EXTENT_GROW_MAX_PIND) {
+ return true;
+ }
}
- /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
+ malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+ if (old_limit != NULL) {
+ *old_limit = sz_pind2sz(arena->retain_grow_limit);
+ }
+ if (new_limit != NULL) {
+ arena->retain_grow_limit = new_ind;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
- /*
- * Copy at most size bytes (not size+extra), since the caller has no
- * expectation that the extra bytes will be reliably preserved.
- */
- copysize = (size < oldsize) ? size : oldsize;
- VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
- memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
- return (ret);
+ return false;
}
-dss_prec_t
-arena_dss_prec_get(arena_t *arena)
-{
- dss_prec_t ret;
+unsigned
+arena_nthreads_get(arena_t *arena, bool internal) {
+ return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED);
+}
- malloc_mutex_lock(&arena->lock);
- ret = arena->dss_prec;
- malloc_mutex_unlock(&arena->lock);
- return (ret);
+void
+arena_nthreads_inc(arena_t *arena, bool internal) {
+ atomic_fetch_add_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
}
void
-arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
-{
+arena_nthreads_dec(arena_t *arena, bool internal) {
+ atomic_fetch_sub_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
+}
- malloc_mutex_lock(&arena->lock);
- arena->dss_prec = dss_prec;
- malloc_mutex_unlock(&arena->lock);
+size_t
+arena_extent_sn_next(arena_t *arena) {
+ return atomic_fetch_add_zu(&arena->extent_sn_next, 1, ATOMIC_RELAXED);
}
-void
-arena_stats_merge(arena_t *arena, const char **dss, size_t *nactive,
- size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
- malloc_large_stats_t *lstats)
-{
+arena_t *
+arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+ arena_t *arena;
+ base_t *base;
unsigned i;
- malloc_mutex_lock(&arena->lock);
- *dss = dss_prec_names[arena->dss_prec];
- *nactive += arena->nactive;
- *ndirty += arena->ndirty;
+ if (ind == 0) {
+ base = b0get();
+ } else {
+ base = base_new(tsdn, ind, extent_hooks);
+ if (base == NULL) {
+ return NULL;
+ }
+ }
+
+ arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE);
+ if (arena == NULL) {
+ goto label_error;
+ }
- astats->mapped += arena->stats.mapped;
- astats->npurge += arena->stats.npurge;
- astats->nmadvise += arena->stats.nmadvise;
- astats->purged += arena->stats.purged;
- astats->allocated_large += arena->stats.allocated_large;
- astats->nmalloc_large += arena->stats.nmalloc_large;
- astats->ndalloc_large += arena->stats.ndalloc_large;
- astats->nrequests_large += arena->stats.nrequests_large;
+ atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
+ atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
+ arena->last_thd = NULL;
- for (i = 0; i < nlclasses; i++) {
- lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
- lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
- lstats[i].nrequests += arena->stats.lstats[i].nrequests;
- lstats[i].curruns += arena->stats.lstats[i].curruns;
+ if (config_stats) {
+ if (arena_stats_init(tsdn, &arena->stats)) {
+ goto label_error;
+ }
+
+ ql_new(&arena->tcache_ql);
+ ql_new(&arena->cache_bin_array_descriptor_ql);
+ if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
+ WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
+ goto label_error;
+ }
}
- malloc_mutex_unlock(&arena->lock);
- for (i = 0; i < NBINS; i++) {
- arena_bin_t *bin = &arena->bins[i];
-
- malloc_mutex_lock(&bin->lock);
- bstats[i].allocated += bin->stats.allocated;
- bstats[i].nmalloc += bin->stats.nmalloc;
- bstats[i].ndalloc += bin->stats.ndalloc;
- bstats[i].nrequests += bin->stats.nrequests;
- if (config_tcache) {
- bstats[i].nfills += bin->stats.nfills;
- bstats[i].nflushes += bin->stats.nflushes;
+ if (config_prof) {
+ if (prof_accum_init(tsdn, &arena->prof_accum)) {
+ goto label_error;
}
- bstats[i].nruns += bin->stats.nruns;
- bstats[i].reruns += bin->stats.reruns;
- bstats[i].curruns += bin->stats.curruns;
- malloc_mutex_unlock(&bin->lock);
}
-}
-bool
-arena_new(arena_t *arena, unsigned ind)
-{
- unsigned i;
- arena_bin_t *bin;
+ if (config_cache_oblivious) {
+ /*
+ * A nondeterministic seed based on the address of arena reduces
+ * the likelihood of lockstep non-uniform cache index
+ * utilization among identical concurrent processes, but at the
+ * cost of test repeatability. For debug builds, instead use a
+ * deterministic seed.
+ */
+ atomic_store_zu(&arena->offset_state, config_debug ? ind :
+ (size_t)(uintptr_t)arena, ATOMIC_RELAXED);
+ }
- arena->ind = ind;
- arena->nthreads = 0;
+ atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
- if (malloc_mutex_init(&arena->lock))
- return (true);
+ atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
+ ATOMIC_RELAXED);
- if (config_stats) {
- memset(&arena->stats, 0, sizeof(arena_stats_t));
- arena->stats.lstats =
- (malloc_large_stats_t *)base_alloc(nlclasses *
- sizeof(malloc_large_stats_t));
- if (arena->stats.lstats == NULL)
- return (true);
- memset(arena->stats.lstats, 0, nlclasses *
- sizeof(malloc_large_stats_t));
- if (config_tcache)
- ql_new(&arena->tcache_ql);
- }
+ atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
- if (config_prof)
- arena->prof_accumbytes = 0;
+ extent_list_init(&arena->large);
+ if (malloc_mutex_init(&arena->large_mtx, "arena_large",
+ WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
+ goto label_error;
+ }
- arena->dss_prec = chunk_dss_prec_get();
+ /*
+ * Delay coalescing for dirty extents despite the disruptive effect on
+ * memory layout for best-fit extent allocation, since cached extents
+ * are likely to be reused soon after deallocation, and the cost of
+ * merging/splitting extents is non-trivial.
+ */
+ if (extents_init(tsdn, &arena->extents_dirty, extent_state_dirty,
+ true)) {
+ goto label_error;
+ }
+ /*
+ * Coalesce muzzy extents immediately, because operations on them are in
+ * the critical path much less often than for dirty extents.
+ */
+ if (extents_init(tsdn, &arena->extents_muzzy, extent_state_muzzy,
+ false)) {
+ goto label_error;
+ }
+ /*
+ * Coalesce retained extents immediately, in part because they will
+ * never be evicted (and therefore there's no opportunity for delayed
+ * coalescing), but also because operations on retained extents are not
+ * in the critical path.
+ */
+ if (extents_init(tsdn, &arena->extents_retained, extent_state_retained,
+ false)) {
+ goto label_error;
+ }
- /* Initialize chunks. */
- arena_chunk_dirty_new(&arena->chunks_dirty);
- arena->spare = NULL;
+ if (arena_decay_init(&arena->decay_dirty,
+ arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) {
+ goto label_error;
+ }
+ if (arena_decay_init(&arena->decay_muzzy,
+ arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) {
+ goto label_error;
+ }
- arena->nactive = 0;
- arena->ndirty = 0;
- arena->npurgatory = 0;
+ arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
+ arena->retain_grow_limit = EXTENT_GROW_MAX_PIND;
+ if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
+ WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+ goto label_error;
+ }
- arena_avail_tree_new(&arena->runs_avail);
+ extent_avail_new(&arena->extent_avail);
+ if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
+ WITNESS_RANK_EXTENT_AVAIL, malloc_mutex_rank_exclusive)) {
+ goto label_error;
+ }
/* Initialize bins. */
for (i = 0; i < NBINS; i++) {
- bin = &arena->bins[i];
- if (malloc_mutex_init(&bin->lock))
- return (true);
- bin->runcur = NULL;
- arena_run_tree_new(&bin->runs);
- if (config_stats)
- memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+ bool err = bin_init(&arena->bins[i]);
+ if (err) {
+ goto label_error;
+ }
}
- return (false);
-}
-
-/*
- * Calculate bin_info->run_size such that it meets the following constraints:
- *
- * *) bin_info->run_size >= min_run_size
- * *) bin_info->run_size <= arena_maxclass
- * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
- * *) bin_info->nregs <= RUN_MAXREGS
- *
- * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
- * calculated here, since these settings are all interdependent.
- */
-static size_t
-bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
-{
- size_t pad_size;
- size_t try_run_size, good_run_size;
- uint32_t try_nregs, good_nregs;
- uint32_t try_hdr_size, good_hdr_size;
- uint32_t try_bitmap_offset, good_bitmap_offset;
- uint32_t try_ctx0_offset, good_ctx0_offset;
- uint32_t try_redzone0_offset, good_redzone0_offset;
-
- assert(min_run_size >= PAGE);
- assert(min_run_size <= arena_maxclass);
+ arena->base = base;
+ /* Set arena before creating background threads. */
+ arena_set(ind, arena);
- /*
- * Determine redzone size based on minimum alignment and minimum
- * redzone size. Add padding to the end of the run if it is needed to
- * align the regions. The padding allows each redzone to be half the
- * minimum alignment; without the padding, each redzone would have to
- * be twice as large in order to maintain alignment.
- */
- if (config_fill && opt_redzone) {
- size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1);
- if (align_min <= REDZONE_MINSIZE) {
- bin_info->redzone_size = REDZONE_MINSIZE;
- pad_size = 0;
- } else {
- bin_info->redzone_size = align_min >> 1;
- pad_size = bin_info->redzone_size;
- }
- } else {
- bin_info->redzone_size = 0;
- pad_size = 0;
- }
- bin_info->reg_interval = bin_info->reg_size +
- (bin_info->redzone_size << 1);
+ nstime_init(&arena->create_time, 0);
+ nstime_update(&arena->create_time);
- /*
- * Calculate known-valid settings before entering the run_size
- * expansion loop, so that the first part of the loop always copies
- * valid settings.
- *
- * The do..while loop iteratively reduces the number of regions until
- * the run header and the regions no longer overlap. A closed formula
- * would be quite messy, since there is an interdependency between the
- * header's mask length and the number of regions.
- */
- try_run_size = min_run_size;
- try_nregs = ((try_run_size - sizeof(arena_run_t)) /
- bin_info->reg_interval)
- + 1; /* Counter-act try_nregs-- in loop. */
- if (try_nregs > RUN_MAXREGS) {
- try_nregs = RUN_MAXREGS
- + 1; /* Counter-act try_nregs-- in loop. */
- }
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
- /* Pad to a long boundary. */
- try_hdr_size = LONG_CEILING(try_hdr_size);
- try_bitmap_offset = try_hdr_size;
- /* Add space for bitmap. */
- try_hdr_size += bitmap_size(try_nregs);
- if (config_prof && opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /* Add space for one (prof_ctx_t *) per region. */
- try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
- } else
- try_ctx0_offset = 0;
- try_redzone0_offset = try_run_size - (try_nregs *
- bin_info->reg_interval) - pad_size;
- } while (try_hdr_size > try_redzone0_offset);
-
- /* run_size expansion loop. */
- do {
+ /* We don't support reentrancy for arena 0 bootstrapping. */
+ if (ind != 0) {
/*
- * Copy valid settings before trying more aggressive settings.
+ * If we're here, then arena 0 already exists, so bootstrapping
+ * is done enough that we should have tsd.
*/
- good_run_size = try_run_size;
- good_nregs = try_nregs;
- good_hdr_size = try_hdr_size;
- good_bitmap_offset = try_bitmap_offset;
- good_ctx0_offset = try_ctx0_offset;
- good_redzone0_offset = try_redzone0_offset;
-
- /* Try more aggressive settings. */
- try_run_size += PAGE;
- try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) /
- bin_info->reg_interval)
- + 1; /* Counter-act try_nregs-- in loop. */
- if (try_nregs > RUN_MAXREGS) {
- try_nregs = RUN_MAXREGS
- + 1; /* Counter-act try_nregs-- in loop. */
+ assert(!tsdn_null(tsdn));
+ pre_reentrancy(tsdn_tsd(tsdn), arena);
+ if (hooks_arena_new_hook) {
+ hooks_arena_new_hook();
}
- do {
- try_nregs--;
- try_hdr_size = sizeof(arena_run_t);
- /* Pad to a long boundary. */
- try_hdr_size = LONG_CEILING(try_hdr_size);
- try_bitmap_offset = try_hdr_size;
- /* Add space for bitmap. */
- try_hdr_size += bitmap_size(try_nregs);
- if (config_prof && opt_prof && prof_promote == false) {
- /* Pad to a quantum boundary. */
- try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_ctx0_offset = try_hdr_size;
- /*
- * Add space for one (prof_ctx_t *) per region.
- */
- try_hdr_size += try_nregs *
- sizeof(prof_ctx_t *);
- }
- try_redzone0_offset = try_run_size - (try_nregs *
- bin_info->reg_interval) - pad_size;
- } while (try_hdr_size > try_redzone0_offset);
- } while (try_run_size <= arena_maxclass
- && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) >
- RUN_MAX_OVRHD_RELAX
- && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
- && try_nregs < RUN_MAXREGS);
-
- assert(good_hdr_size <= good_redzone0_offset);
-
- /* Copy final settings. */
- bin_info->run_size = good_run_size;
- bin_info->nregs = good_nregs;
- bin_info->bitmap_offset = good_bitmap_offset;
- bin_info->ctx0_offset = good_ctx0_offset;
- bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
-
- assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
- * bin_info->reg_interval) + pad_size == bin_info->run_size);
+ post_reentrancy(tsdn_tsd(tsdn));
+ }
- return (good_run_size);
+ return arena;
+label_error:
+ if (ind != 0) {
+ base_delete(tsdn, base);
+ }
+ return NULL;
}
-static void
-bin_info_init(void)
-{
- arena_bin_info_t *bin_info;
- size_t prev_run_size = PAGE;
-
-#define SIZE_CLASS(bin, delta, size) \
- bin_info = &arena_bin_info[bin]; \
- bin_info->reg_size = size; \
- prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
- bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+void
+arena_boot(void) {
+ arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
+ arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
+#define REGIND_bin_yes(index, reg_size) \
+ div_init(&arena_binind_div_info[(index)], (reg_size));
+#define REGIND_bin_no(index, reg_size)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \
+ lg_delta_lookup) \
+ REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta))
SIZE_CLASSES
-#undef SIZE_CLASS
+#undef REGIND_bin_yes
+#undef REGIND_bin_no
+#undef SC
}
void
-arena_boot(void)
-{
- size_t header_size;
- unsigned i;
+arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_prefork(tsdn, &arena->decay_dirty.mtx);
+ malloc_mutex_prefork(tsdn, &arena->decay_muzzy.mtx);
+}
- /*
- * Compute the header size such that it is large enough to contain the
- * page map. The page map is biased to omit entries for the header
- * itself, so some iteration is necessary to compute the map bias.
- *
- * 1) Compute safe header_size and map_bias values that include enough
- * space for an unbiased page map.
- * 2) Refine map_bias based on (1) to omit the header pages in the page
- * map. The resulting map_bias may be one too small.
- * 3) Refine map_bias based on (2). The result will be >= the result
- * from (2), and will always be correct.
- */
- map_bias = 0;
- for (i = 0; i < 3; i++) {
- header_size = offsetof(arena_chunk_t, map) +
- (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
- map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK)
- != 0);
+void
+arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
+ if (config_stats) {
+ malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx);
}
- assert(map_bias > 0);
+}
- arena_maxclass = chunksize - (map_bias << LG_PAGE);
+void
+arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx);
+}
- bin_info_init();
+void
+arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
+ extents_prefork(tsdn, &arena->extents_dirty);
+ extents_prefork(tsdn, &arena->extents_muzzy);
+ extents_prefork(tsdn, &arena->extents_retained);
}
void
-arena_prefork(arena_t *arena)
-{
- unsigned i;
+arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
+}
- malloc_mutex_prefork(&arena->lock);
- for (i = 0; i < NBINS; i++)
- malloc_mutex_prefork(&arena->bins[i].lock);
+void
+arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
+ base_prefork(tsdn, arena->base);
}
void
-arena_postfork_parent(arena_t *arena)
-{
+arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_prefork(tsdn, &arena->large_mtx);
+}
+
+void
+arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
+ for (unsigned i = 0; i < NBINS; i++) {
+ bin_prefork(tsdn, &arena->bins[i]);
+ }
+}
+
+void
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
unsigned i;
- for (i = 0; i < NBINS; i++)
- malloc_mutex_postfork_parent(&arena->bins[i].lock);
- malloc_mutex_postfork_parent(&arena->lock);
+ for (i = 0; i < NBINS; i++) {
+ bin_postfork_parent(tsdn, &arena->bins[i]);
+ }
+ malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
+ base_postfork_parent(tsdn, arena->base);
+ malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
+ extents_postfork_parent(tsdn, &arena->extents_dirty);
+ extents_postfork_parent(tsdn, &arena->extents_muzzy);
+ extents_postfork_parent(tsdn, &arena->extents_retained);
+ malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
+ malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
+ malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
+ if (config_stats) {
+ malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
+ }
}
void
-arena_postfork_child(arena_t *arena)
-{
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
unsigned i;
- for (i = 0; i < NBINS; i++)
- malloc_mutex_postfork_child(&arena->bins[i].lock);
- malloc_mutex_postfork_child(&arena->lock);
+ atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
+ atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
+ if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
+ arena_nthreads_inc(arena, false);
+ }
+ if (tsd_iarena_get(tsdn_tsd(tsdn)) == arena) {
+ arena_nthreads_inc(arena, true);
+ }
+ if (config_stats) {
+ ql_new(&arena->tcache_ql);
+ ql_new(&arena->cache_bin_array_descriptor_ql);
+ tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
+ if (tcache != NULL && tcache->arena == arena) {
+ ql_elm_new(tcache, link);
+ ql_tail_insert(&arena->tcache_ql, tcache, link);
+ cache_bin_array_descriptor_init(
+ &tcache->cache_bin_array_descriptor,
+ tcache->bins_small, tcache->bins_large);
+ ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+ &tcache->cache_bin_array_descriptor, link);
+ }
+ }
+
+ for (i = 0; i < NBINS; i++) {
+ bin_postfork_child(tsdn, &arena->bins[i]);
+ }
+ malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
+ base_postfork_child(tsdn, arena->base);
+ malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
+ extents_postfork_child(tsdn, &arena->extents_dirty);
+ extents_postfork_child(tsdn, &arena->extents_muzzy);
+ extents_postfork_child(tsdn, &arena->extents_retained);
+ malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
+ malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
+ malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
+ if (config_stats) {
+ malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
+ }
}
diff --git a/deps/jemalloc/src/atomic.c b/deps/jemalloc/src/atomic.c
deleted file mode 100644
index 77ee31311..000000000
--- a/deps/jemalloc/src/atomic.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define JEMALLOC_ATOMIC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/deps/jemalloc/src/background_thread.c b/deps/jemalloc/src/background_thread.c
new file mode 100644
index 000000000..3517a3bb8
--- /dev/null
+++ b/deps/jemalloc/src/background_thread.c
@@ -0,0 +1,909 @@
+#define JEMALLOC_BACKGROUND_THREAD_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
+/******************************************************************************/
+/* Data. */
+
+/* This option should be opt-in only. */
+#define BACKGROUND_THREAD_DEFAULT false
+/* Read-only after initialization. */
+bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
+
+/* Used for thread creation, termination and stats. */
+malloc_mutex_t background_thread_lock;
+/* Indicates global state. Atomic because decay reads this w/o locking. */
+atomic_b_t background_thread_enabled_state;
+size_t n_background_threads;
+size_t max_background_threads;
+/* Thread info per-index. */
+background_thread_info_t *background_thread_info;
+
+/* False if no necessary runtime support. */
+bool can_enable_background_thread;
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#include <dlfcn.h>
+
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+ void *(*)(void *), void *__restrict);
+
+static void
+pthread_create_wrapper_init(void) {
+#ifdef JEMALLOC_LAZY_LOCK
+ if (!isthreaded) {
+ isthreaded = true;
+ }
+#endif
+}
+
+int
+pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *__restrict arg) {
+ pthread_create_wrapper_init();
+
+ return pthread_create_fptr(thread, attr, start_routine, arg);
+}
+#endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
+
+#ifndef JEMALLOC_BACKGROUND_THREAD
+#define NOT_REACHED { not_reached(); }
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+ arena_decay_t *decay, size_t npages_new) NOT_REACHED
+void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+bool background_thread_stats_read(tsdn_t *tsdn,
+ background_thread_stats_t *stats) NOT_REACHED
+void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
+#undef NOT_REACHED
+#else
+
+static bool background_thread_enabled_at_fork;
+
+static void
+background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
+ background_thread_wakeup_time_set(tsdn, info, 0);
+ info->npages_to_purge_new = 0;
+ if (config_stats) {
+ info->tot_n_runs = 0;
+ nstime_init(&info->tot_sleep_time, 0);
+ }
+}
+
+static inline bool
+set_current_thread_affinity(UNUSED int cpu) {
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+
+ return (ret != 0);
+#else
+ return false;
+#endif
+}
+
+/* Threshold for determining when to wake up the background thread. */
+#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
+#define BILLION UINT64_C(1000000000)
+/* Minimal sleep interval 100 ms. */
+#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+
+static inline size_t
+decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
+ size_t i;
+ uint64_t sum = 0;
+ for (i = 0; i < interval; i++) {
+ sum += decay->backlog[i] * h_steps[i];
+ }
+ for (; i < SMOOTHSTEP_NSTEPS; i++) {
+ sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
+ }
+
+ return (size_t)(sum >> SMOOTHSTEP_BFP);
+}
+
+static uint64_t
+arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
+ extents_t *extents) {
+ if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+ /* Use minimal interval if decay is contended. */
+ return BACKGROUND_THREAD_MIN_INTERVAL_NS;
+ }
+
+ uint64_t interval;
+ ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+ if (decay_time <= 0) {
+ /* Purging is eagerly done or disabled currently. */
+ interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+ goto label_done;
+ }
+
+ uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+ assert(decay_interval_ns > 0);
+ size_t npages = extents_npages_get(extents);
+ if (npages == 0) {
+ unsigned i;
+ for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+ if (decay->backlog[i] > 0) {
+ break;
+ }
+ }
+ if (i == SMOOTHSTEP_NSTEPS) {
+ /* No dirty pages recorded. Sleep indefinitely. */
+ interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+ goto label_done;
+ }
+ }
+ if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+ /* Use max interval. */
+ interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
+ goto label_done;
+ }
+
+ size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
+ size_t ub = SMOOTHSTEP_NSTEPS;
+ /* Minimal 2 intervals to ensure reaching next epoch deadline. */
+ lb = (lb < 2) ? 2 : lb;
+ if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
+ (lb + 2 > ub)) {
+ interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+ goto label_done;
+ }
+
+ assert(lb + 2 <= ub);
+ size_t npurge_lb, npurge_ub;
+ npurge_lb = decay_npurge_after_interval(decay, lb);
+ if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+ interval = decay_interval_ns * lb;
+ goto label_done;
+ }
+ npurge_ub = decay_npurge_after_interval(decay, ub);
+ if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+ interval = decay_interval_ns * ub;
+ goto label_done;
+ }
+
+ unsigned n_search = 0;
+ size_t target, npurge;
+ while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
+ && (lb + 2 < ub)) {
+ target = (lb + ub) / 2;
+ npurge = decay_npurge_after_interval(decay, target);
+ if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+ ub = target;
+ npurge_ub = npurge;
+ } else {
+ lb = target;
+ npurge_lb = npurge;
+ }
+ assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+ }
+ interval = decay_interval_ns * (ub + lb) / 2;
+label_done:
+ interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
+ BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
+ malloc_mutex_unlock(tsdn, &decay->mtx);
+
+ return interval;
+}
+
+/* Compute purge interval for background threads. */
+static uint64_t
+arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
+ uint64_t i1, i2;
+ i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
+ &arena->extents_dirty);
+ if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+ return i1;
+ }
+ i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
+ &arena->extents_muzzy);
+
+ return i1 < i2 ? i1 : i2;
+}
+
+static void
+background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
+ uint64_t interval) {
+ if (config_stats) {
+ info->tot_n_runs++;
+ }
+ info->npages_to_purge_new = 0;
+
+ struct timeval tv;
+ /* Specific clock required by timedwait. */
+ gettimeofday(&tv, NULL);
+ nstime_t before_sleep;
+ nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
+
+ int ret;
+ if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
+ assert(background_thread_indefinite_sleep(info));
+ ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+ assert(ret == 0);
+ } else {
+ assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
+ interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+ /* We need malloc clock (can be different from tv). */
+ nstime_t next_wakeup;
+ nstime_init(&next_wakeup, 0);
+ nstime_update(&next_wakeup);
+ nstime_iadd(&next_wakeup, interval);
+ assert(nstime_ns(&next_wakeup) <
+ BACKGROUND_THREAD_INDEFINITE_SLEEP);
+ background_thread_wakeup_time_set(tsdn, info,
+ nstime_ns(&next_wakeup));
+
+ nstime_t ts_wakeup;
+ nstime_copy(&ts_wakeup, &before_sleep);
+ nstime_iadd(&ts_wakeup, interval);
+ struct timespec ts;
+ ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
+ ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
+
+ assert(!background_thread_indefinite_sleep(info));
+ ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+ assert(ret == ETIMEDOUT || ret == 0);
+ background_thread_wakeup_time_set(tsdn, info,
+ BACKGROUND_THREAD_INDEFINITE_SLEEP);
+ }
+ if (config_stats) {
+ gettimeofday(&tv, NULL);
+ nstime_t after_sleep;
+ nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
+ if (nstime_compare(&after_sleep, &before_sleep) > 0) {
+ nstime_subtract(&after_sleep, &before_sleep);
+ nstime_add(&info->tot_sleep_time, &after_sleep);
+ }
+ }
+}
+
+static bool
+background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
+ if (unlikely(info->state == background_thread_paused)) {
+ malloc_mutex_unlock(tsdn, &info->mtx);
+ /* Wait on global lock to update status. */
+ malloc_mutex_lock(tsdn, &background_thread_lock);
+ malloc_mutex_unlock(tsdn, &background_thread_lock);
+ malloc_mutex_lock(tsdn, &info->mtx);
+ return true;
+ }
+
+ return false;
+}
+
+static inline void
+background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
+ uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+ unsigned narenas = narenas_total_get();
+
+ for (unsigned i = ind; i < narenas; i += max_background_threads) {
+ arena_t *arena = arena_get(tsdn, i, false);
+ if (!arena) {
+ continue;
+ }
+ arena_decay(tsdn, arena, true, false);
+ if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+ /* Min interval will be used. */
+ continue;
+ }
+ uint64_t interval = arena_decay_compute_purge_interval(tsdn,
+ arena);
+ assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
+ if (min_interval > interval) {
+ min_interval = interval;
+ }
+ }
+ background_thread_sleep(tsdn, info, min_interval);
+}
+
+static bool
+background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
+ if (info == &background_thread_info[0]) {
+ malloc_mutex_assert_owner(tsd_tsdn(tsd),
+ &background_thread_lock);
+ } else {
+ malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
+ &background_thread_lock);
+ }
+
+ pre_reentrancy(tsd, NULL);
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ bool has_thread;
+ assert(info->state != background_thread_paused);
+ if (info->state == background_thread_started) {
+ has_thread = true;
+ info->state = background_thread_stopped;
+ pthread_cond_signal(&info->cond);
+ } else {
+ has_thread = false;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+ if (!has_thread) {
+ post_reentrancy(tsd);
+ return false;
+ }
+ void *ret;
+ if (pthread_join(info->thread, &ret)) {
+ post_reentrancy(tsd);
+ return true;
+ }
+ assert(ret == NULL);
+ n_background_threads--;
+ post_reentrancy(tsd);
+
+ return false;
+}
+
+static void *background_thread_entry(void *ind_arg);
+
+static int
+background_thread_create_signals_masked(pthread_t *thread,
+ const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
+ /*
+ * Mask signals during thread creation so that the thread inherits
+ * an empty signal set.
+ */
+ sigset_t set;
+ sigfillset(&set);
+ sigset_t oldset;
+ int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+ if (mask_err != 0) {
+ return mask_err;
+ }
+ int create_err = pthread_create_wrapper(thread, attr, start_routine,
+ arg);
+ /*
+ * Restore the signal mask. Failure to restore the signal mask here
+ * changes program behavior.
+ */
+ int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ if (restore_err != 0) {
+ malloc_printf("<jemalloc>: background thread creation "
+ "failed (%d), and signal mask restoration failed "
+ "(%d)\n", create_err, restore_err);
+ if (opt_abort) {
+ abort();
+ }
+ }
+ return create_err;
+}
+
+static bool
+check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
+ bool *created_threads) {
+ bool ret = false;
+ if (likely(*n_created == n_background_threads)) {
+ return ret;
+ }
+
+ tsdn_t *tsdn = tsd_tsdn(tsd);
+ malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
+ for (unsigned i = 1; i < max_background_threads; i++) {
+ if (created_threads[i]) {
+ continue;
+ }
+ background_thread_info_t *info = &background_thread_info[i];
+ malloc_mutex_lock(tsdn, &info->mtx);
+ /*
+ * In case of the background_thread_paused state because of
+ * arena reset, delay the creation.
+ */
+ bool create = (info->state == background_thread_started);
+ malloc_mutex_unlock(tsdn, &info->mtx);
+ if (!create) {
+ continue;
+ }
+
+ pre_reentrancy(tsd, NULL);
+ int err = background_thread_create_signals_masked(&info->thread,
+ NULL, background_thread_entry, (void *)(uintptr_t)i);
+ post_reentrancy(tsd);
+
+ if (err == 0) {
+ (*n_created)++;
+ created_threads[i] = true;
+ } else {
+ malloc_printf("<jemalloc>: background thread "
+ "creation failed (%d)\n", err);
+ if (opt_abort) {
+ abort();
+ }
+ }
+ /* Return to restart the loop since we unlocked. */
+ ret = true;
+ break;
+ }
+ malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
+
+ return ret;
+}
+
+static void
+background_thread0_work(tsd_t *tsd) {
+ /* Thread0 is also responsible for launching / terminating threads. */
+ VARIABLE_ARRAY(bool, created_threads, max_background_threads);
+ unsigned i;
+ for (i = 1; i < max_background_threads; i++) {
+ created_threads[i] = false;
+ }
+ /* Start working, and create more threads when asked. */
+ unsigned n_created = 1;
+ while (background_thread_info[0].state != background_thread_stopped) {
+ if (background_thread_pause_check(tsd_tsdn(tsd),
+ &background_thread_info[0])) {
+ continue;
+ }
+ if (check_background_thread_creation(tsd, &n_created,
+ (bool *)&created_threads)) {
+ continue;
+ }
+ background_work_sleep_once(tsd_tsdn(tsd),
+ &background_thread_info[0], 0);
+ }
+
+ /*
+ * Shut down other threads at exit. Note that the ctl thread is holding
+ * the global background_thread mutex (and is waiting) for us.
+ */
+ assert(!background_thread_enabled());
+ for (i = 1; i < max_background_threads; i++) {
+ background_thread_info_t *info = &background_thread_info[i];
+ assert(info->state != background_thread_paused);
+ if (created_threads[i]) {
+ background_threads_disable_single(tsd, info);
+ } else {
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ if (info->state != background_thread_stopped) {
+ /* The thread was not created. */
+ assert(info->state ==
+ background_thread_started);
+ n_background_threads--;
+ info->state = background_thread_stopped;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ }
+ }
+ background_thread_info[0].state = background_thread_stopped;
+ assert(n_background_threads == 1);
+}
+
+static void
+background_work(tsd_t *tsd, unsigned ind) {
+ background_thread_info_t *info = &background_thread_info[ind];
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
+ BACKGROUND_THREAD_INDEFINITE_SLEEP);
+ if (ind == 0) {
+ background_thread0_work(tsd);
+ } else {
+ while (info->state != background_thread_stopped) {
+ if (background_thread_pause_check(tsd_tsdn(tsd),
+ info)) {
+ continue;
+ }
+ background_work_sleep_once(tsd_tsdn(tsd), info, ind);
+ }
+ }
+ assert(info->state == background_thread_stopped);
+ background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+}
+
+static void *
+background_thread_entry(void *ind_arg) {
+ unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
+ assert(thread_ind < max_background_threads);
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+ pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
+#endif
+ if (opt_percpu_arena != percpu_arena_disabled) {
+ set_current_thread_affinity((int)thread_ind);
+ }
+ /*
+ * Start periodic background work. We use internal tsd which avoids
+ * side effects, for example triggering new arena creation (which in
+ * turn triggers another background thread creation).
+ */
+ background_work(tsd_internal_fetch(), thread_ind);
+ assert(pthread_equal(pthread_self(),
+ background_thread_info[thread_ind].thread));
+
+ return NULL;
+}
+
+static void
+background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+ info->state = background_thread_started;
+ background_thread_info_init(tsd_tsdn(tsd), info);
+ n_background_threads++;
+}
+
+/* Create a new background thread if needed. */
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+ assert(have_background_thread);
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+ /* We create at most NCPUs threads. */
+ size_t thread_ind = arena_ind % max_background_threads;
+ background_thread_info_t *info = &background_thread_info[thread_ind];
+
+ bool need_new_thread;
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ need_new_thread = background_thread_enabled() &&
+ (info->state == background_thread_stopped);
+ if (need_new_thread) {
+ background_thread_init(tsd, info);
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ if (!need_new_thread) {
+ return false;
+ }
+ if (arena_ind != 0) {
+ /* Threads are created asynchronously by Thread 0. */
+ background_thread_info_t *t0 = &background_thread_info[0];
+ malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+ assert(t0->state == background_thread_started);
+ pthread_cond_signal(&t0->cond);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+ return false;
+ }
+
+ pre_reentrancy(tsd, NULL);
+ /*
+ * To avoid complications (besides reentrancy), create internal
+ * background threads with the underlying pthread_create.
+ */
+ int err = background_thread_create_signals_masked(&info->thread, NULL,
+ background_thread_entry, (void *)thread_ind);
+ post_reentrancy(tsd);
+
+ if (err != 0) {
+ malloc_printf("<jemalloc>: arena 0 background thread creation "
+ "failed (%d)\n", err);
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ info->state = background_thread_stopped;
+ n_background_threads--;
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+ return true;
+ }
+
+ return false;
+}
+
+bool
+background_threads_enable(tsd_t *tsd) {
+ assert(n_background_threads == 0);
+ assert(background_thread_enabled());
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+ VARIABLE_ARRAY(bool, marked, max_background_threads);
+ unsigned i, nmarked;
+ for (i = 0; i < max_background_threads; i++) {
+ marked[i] = false;
+ }
+ nmarked = 0;
+ /* Thread 0 is required and created at the end. */
+ marked[0] = true;
+ /* Mark the threads we need to create for thread 0. */
+ unsigned n = narenas_total_get();
+ for (i = 1; i < n; i++) {
+ if (marked[i % max_background_threads] ||
+ arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+ continue;
+ }
+ background_thread_info_t *info = &background_thread_info[
+ i % max_background_threads];
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ assert(info->state == background_thread_stopped);
+ background_thread_init(tsd, info);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ marked[i % max_background_threads] = true;
+ if (++nmarked == max_background_threads) {
+ break;
+ }
+ }
+
+ return background_thread_create(tsd, 0);
+}
+
+bool
+background_threads_disable(tsd_t *tsd) {
+ assert(!background_thread_enabled());
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+ /* Thread 0 will be responsible for terminating other threads. */
+ if (background_threads_disable_single(tsd,
+ &background_thread_info[0])) {
+ return true;
+ }
+ assert(n_background_threads == 0);
+
+ return false;
+}
+
+/* Check if we need to signal the background thread early. */
+void
+background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+ arena_decay_t *decay, size_t npages_new) {
+ background_thread_info_t *info = arena_background_thread_info_get(
+ arena);
+ if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+ /*
+ * Background thread may hold the mutex for a long period of
+ * time. We'd like to avoid the variance on application
+ * threads. So keep this non-blocking, and leave the work to a
+ * future epoch.
+ */
+ return;
+ }
+
+ if (info->state != background_thread_started) {
+ goto label_done;
+ }
+ if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+ goto label_done;
+ }
+
+ ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+ if (decay_time <= 0) {
+ /* Purging is eagerly done or disabled currently. */
+ goto label_done_unlock2;
+ }
+ uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+ assert(decay_interval_ns > 0);
+
+ nstime_t diff;
+ nstime_init(&diff, background_thread_wakeup_time_get(info));
+ if (nstime_compare(&diff, &decay->epoch) <= 0) {
+ goto label_done_unlock2;
+ }
+ nstime_subtract(&diff, &decay->epoch);
+ if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+ goto label_done_unlock2;
+ }
+
+ if (npages_new > 0) {
+ size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
+ /*
+ * Compute how many new pages we would need to purge by the next
+ * wakeup, which is used to determine if we should signal the
+ * background thread.
+ */
+ uint64_t npurge_new;
+ if (n_epoch >= SMOOTHSTEP_NSTEPS) {
+ npurge_new = npages_new;
+ } else {
+ uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
+ assert(h_steps_max >=
+ h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+ npurge_new = npages_new * (h_steps_max -
+ h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+ npurge_new >>= SMOOTHSTEP_BFP;
+ }
+ info->npages_to_purge_new += npurge_new;
+ }
+
+ bool should_signal;
+ if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+ should_signal = true;
+ } else if (unlikely(background_thread_indefinite_sleep(info)) &&
+ (extents_npages_get(&arena->extents_dirty) > 0 ||
+ extents_npages_get(&arena->extents_muzzy) > 0 ||
+ info->npages_to_purge_new > 0)) {
+ should_signal = true;
+ } else {
+ should_signal = false;
+ }
+
+ if (should_signal) {
+ info->npages_to_purge_new = 0;
+ pthread_cond_signal(&info->cond);
+ }
+label_done_unlock2:
+ malloc_mutex_unlock(tsdn, &decay->mtx);
+label_done:
+ malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
+void
+background_thread_prefork0(tsdn_t *tsdn) {
+ malloc_mutex_prefork(tsdn, &background_thread_lock);
+ background_thread_enabled_at_fork = background_thread_enabled();
+}
+
+void
+background_thread_prefork1(tsdn_t *tsdn) {
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
+ }
+}
+
+void
+background_thread_postfork_parent(tsdn_t *tsdn) {
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ malloc_mutex_postfork_parent(tsdn,
+ &background_thread_info[i].mtx);
+ }
+ malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
+}
+
+void
+background_thread_postfork_child(tsdn_t *tsdn) {
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ malloc_mutex_postfork_child(tsdn,
+ &background_thread_info[i].mtx);
+ }
+ malloc_mutex_postfork_child(tsdn, &background_thread_lock);
+ if (!background_thread_enabled_at_fork) {
+ return;
+ }
+
+ /* Clear background_thread state (reset to disabled for child). */
+ malloc_mutex_lock(tsdn, &background_thread_lock);
+ n_background_threads = 0;
+ background_thread_enabled_set(tsdn, false);
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ background_thread_info_t *info = &background_thread_info[i];
+ malloc_mutex_lock(tsdn, &info->mtx);
+ info->state = background_thread_stopped;
+ int ret = pthread_cond_init(&info->cond, NULL);
+ assert(ret == 0);
+ background_thread_info_init(tsdn, info);
+ malloc_mutex_unlock(tsdn, &info->mtx);
+ }
+ malloc_mutex_unlock(tsdn, &background_thread_lock);
+}
+
+bool
+background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
+ assert(config_stats);
+ malloc_mutex_lock(tsdn, &background_thread_lock);
+ if (!background_thread_enabled()) {
+ malloc_mutex_unlock(tsdn, &background_thread_lock);
+ return true;
+ }
+
+ stats->num_threads = n_background_threads;
+ uint64_t num_runs = 0;
+ nstime_init(&stats->run_interval, 0);
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ background_thread_info_t *info = &background_thread_info[i];
+ malloc_mutex_lock(tsdn, &info->mtx);
+ if (info->state != background_thread_stopped) {
+ num_runs += info->tot_n_runs;
+ nstime_add(&stats->run_interval, &info->tot_sleep_time);
+ }
+ malloc_mutex_unlock(tsdn, &info->mtx);
+ }
+ stats->num_runs = num_runs;
+ if (num_runs > 0) {
+ nstime_idivide(&stats->run_interval, num_runs);
+ }
+ malloc_mutex_unlock(tsdn, &background_thread_lock);
+
+ return false;
+}
+
+#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#undef BILLION
+#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
+
+static bool
+pthread_create_fptr_init(void) {
+ if (pthread_create_fptr != NULL) {
+ return false;
+ }
+ pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+ if (pthread_create_fptr == NULL) {
+ can_enable_background_thread = false;
+ if (config_lazy_lock || opt_background_thread) {
+ malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+ "\"pthread_create\")\n");
+ abort();
+ }
+ } else {
+ can_enable_background_thread = true;
+ }
+
+ return false;
+}
+
+/*
+ * When lazy lock is enabled, we need to make sure setting isthreaded before
+ * taking any background_thread locks. This is called early in ctl (instead of
+ * wait for the pthread_create calls to trigger) because the mutex is required
+ * before creating background threads.
+ */
+void
+background_thread_ctl_init(tsdn_t *tsdn) {
+ malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+ pthread_create_fptr_init();
+ pthread_create_wrapper_init();
+#endif
+}
+
+#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
+
+bool
+background_thread_boot0(void) {
+ if (!have_background_thread && opt_background_thread) {
+ malloc_printf("<jemalloc>: option background_thread currently "
+ "supports pthread only\n");
+ return true;
+ }
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+ if ((config_lazy_lock || opt_background_thread) &&
+ pthread_create_fptr_init()) {
+ return true;
+ }
+#endif
+ return false;
+}
+
+bool
+background_thread_boot1(tsdn_t *tsdn) {
+#ifdef JEMALLOC_BACKGROUND_THREAD
+ assert(have_background_thread);
+ assert(narenas_total_get() > 0);
+
+ if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
+ ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
+ opt_max_background_threads = ncpus;
+ }
+ max_background_threads = opt_max_background_threads;
+
+ background_thread_enabled_set(tsdn, opt_background_thread);
+ if (malloc_mutex_init(&background_thread_lock,
+ "background_thread_global",
+ WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
+ b0get(), opt_max_background_threads *
+ sizeof(background_thread_info_t), CACHELINE);
+ if (background_thread_info == NULL) {
+ return true;
+ }
+
+ for (unsigned i = 0; i < max_background_threads; i++) {
+ background_thread_info_t *info = &background_thread_info[i];
+ /* Thread mutex is rank_inclusive because of thread0. */
+ if (malloc_mutex_init(&info->mtx, "background_thread",
+ WITNESS_RANK_BACKGROUND_THREAD,
+ malloc_mutex_address_ordered)) {
+ return true;
+ }
+ if (pthread_cond_init(&info->cond, NULL)) {
+ return true;
+ }
+ malloc_mutex_lock(tsdn, &info->mtx);
+ info->state = background_thread_stopped;
+ background_thread_info_init(tsdn, info);
+ malloc_mutex_unlock(tsdn, &info->mtx);
+ }
+#endif
+
+ return false;
+}
diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c
index 4e62e8fa9..b0324b5d7 100644
--- a/deps/jemalloc/src/base.c
+++ b/deps/jemalloc/src/base.c
@@ -1,142 +1,514 @@
-#define JEMALLOC_BASE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/sz.h"
/******************************************************************************/
/* Data. */
-static malloc_mutex_t base_mtx;
+static base_t *b0;
-/*
- * Current pages that are being used for internal memory allocations. These
- * pages are carved up in cacheline-size quanta, so that there is no chance of
- * false cache line sharing.
- */
-static void *base_pages;
-static void *base_next_addr;
-static void *base_past_addr; /* Addr immediately past base_pages. */
-static extent_node_t *base_nodes;
+metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
+
+const char *metadata_thp_mode_names[] = {
+ "disabled",
+ "auto",
+ "always"
+};
/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-static bool base_pages_alloc(size_t minsize);
+static inline bool
+metadata_thp_madvise(void) {
+ return (metadata_thp_enabled() &&
+ (init_system_thp_mode == thp_mode_default));
+}
-/******************************************************************************/
+static void *
+base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+ void *addr;
+ bool zero = true;
+ bool commit = true;
-static bool
-base_pages_alloc(size_t minsize)
-{
- size_t csize;
- bool zero;
+ /* Use huge page sizes and alignment regardless of opt_metadata_thp. */
+ assert(size == HUGEPAGE_CEILING(size));
+ size_t alignment = HUGEPAGE;
+ if (extent_hooks == &extent_hooks_default) {
+ addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
+ } else {
+ /* No arena context as we are creating new arenas. */
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ pre_reentrancy(tsd, NULL);
+ addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
+ &zero, &commit, ind);
+ post_reentrancy(tsd);
+ }
- assert(minsize != 0);
- csize = CHUNK_CEILING(minsize);
- zero = false;
- base_pages = chunk_alloc(csize, chunksize, true, &zero,
- chunk_dss_prec_get());
- if (base_pages == NULL)
- return (true);
- base_next_addr = base_pages;
- base_past_addr = (void *)((uintptr_t)base_pages + csize);
+ return addr;
+}
- return (false);
+static void
+base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+ size_t size) {
+ /*
+ * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
+ * stopping at first success. This cascade is performed for consistency
+ * with the cascade in extent_dalloc_wrapper() because an application's
+ * custom hooks may not support e.g. dalloc. This function is only ever
+ * called as a side effect of arena destruction, so although it might
+ * seem pointless to do anything besides dalloc here, the application
+ * may in fact want the end state of all associated virtual memory to be
+ * in some consistent-but-allocated state.
+ */
+ if (extent_hooks == &extent_hooks_default) {
+ if (!extent_dalloc_mmap(addr, size)) {
+ goto label_done;
+ }
+ if (!pages_decommit(addr, size)) {
+ goto label_done;
+ }
+ if (!pages_purge_forced(addr, size)) {
+ goto label_done;
+ }
+ if (!pages_purge_lazy(addr, size)) {
+ goto label_done;
+ }
+ /* Nothing worked. This should never happen. */
+ not_reached();
+ } else {
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ pre_reentrancy(tsd, NULL);
+ if (extent_hooks->dalloc != NULL &&
+ !extent_hooks->dalloc(extent_hooks, addr, size, true,
+ ind)) {
+ goto label_post_reentrancy;
+ }
+ if (extent_hooks->decommit != NULL &&
+ !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
+ ind)) {
+ goto label_post_reentrancy;
+ }
+ if (extent_hooks->purge_forced != NULL &&
+ !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
+ size, ind)) {
+ goto label_post_reentrancy;
+ }
+ if (extent_hooks->purge_lazy != NULL &&
+ !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
+ ind)) {
+ goto label_post_reentrancy;
+ }
+ /* Nothing worked. That's the application's problem. */
+ label_post_reentrancy:
+ post_reentrancy(tsd);
+ }
+label_done:
+ if (metadata_thp_madvise()) {
+ /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
+ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+ (size & HUGEPAGE_MASK) == 0);
+ pages_nohuge(addr, size);
+ }
}
-void *
-base_alloc(size_t size)
-{
+static void
+base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
+ size_t size) {
+ size_t sn;
+
+ sn = *extent_sn_next;
+ (*extent_sn_next)++;
+
+ extent_binit(extent, addr, size, sn);
+}
+
+static size_t
+base_get_num_blocks(base_t *base, bool with_new_block) {
+ base_block_t *b = base->blocks;
+ assert(b != NULL);
+
+ size_t n_blocks = with_new_block ? 2 : 1;
+ while (b->next != NULL) {
+ n_blocks++;
+ b = b->next;
+ }
+
+ return n_blocks;
+}
+
+static void
+base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
+ assert(opt_metadata_thp == metadata_thp_auto);
+ malloc_mutex_assert_owner(tsdn, &base->mtx);
+ if (base->auto_thp_switched) {
+ return;
+ }
+ /* Called when adding a new block. */
+ bool should_switch;
+ if (base_ind_get(base) != 0) {
+ should_switch = (base_get_num_blocks(base, true) ==
+ BASE_AUTO_THP_THRESHOLD);
+ } else {
+ should_switch = (base_get_num_blocks(base, true) ==
+ BASE_AUTO_THP_THRESHOLD_A0);
+ }
+ if (!should_switch) {
+ return;
+ }
+
+ base->auto_thp_switched = true;
+ assert(!config_stats || base->n_thp == 0);
+ /* Make the initial blocks THP lazily. */
+ base_block_t *block = base->blocks;
+ while (block != NULL) {
+ assert((block->size & HUGEPAGE_MASK) == 0);
+ pages_huge(block, block->size);
+ if (config_stats) {
+ base->n_thp += HUGEPAGE_CEILING(block->size -
+ extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
+ }
+ block = block->next;
+ assert(block == NULL || (base_ind_get(base) == 0));
+ }
+}
+
+static void *
+base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
+ size_t alignment) {
+ void *ret;
+
+ assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
+ assert(size == ALIGNMENT_CEILING(size, alignment));
+
+ *gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
+ alignment) - (uintptr_t)extent_addr_get(extent);
+ ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
+ assert(extent_bsize_get(extent) >= *gap_size + size);
+ extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
+ *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
+ extent_sn_get(extent));
+ return ret;
+}
+
+static void
+base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
+ void *addr, size_t size) {
+ if (extent_bsize_get(extent) > 0) {
+ /*
+ * Compute the index for the largest size class that does not
+ * exceed extent's size.
+ */
+ szind_t index_floor =
+ sz_size2index(extent_bsize_get(extent) + 1) - 1;
+ extent_heap_insert(&base->avail[index_floor], extent);
+ }
+
+ if (config_stats) {
+ base->allocated += size;
+ /*
+ * Add one PAGE to base_resident for every page boundary that is
+ * crossed by the new allocation. Adjust n_thp similarly when
+ * metadata_thp is enabled.
+ */
+ base->resident += PAGE_CEILING((uintptr_t)addr + size) -
+ PAGE_CEILING((uintptr_t)addr - gap_size);
+ assert(base->allocated <= base->resident);
+ assert(base->resident <= base->mapped);
+ if (metadata_thp_madvise() && (opt_metadata_thp ==
+ metadata_thp_always || base->auto_thp_switched)) {
+ base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
+ - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
+ LG_HUGEPAGE;
+ assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
+ }
+ }
+}
+
+static void *
+base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
+ size_t alignment) {
void *ret;
- size_t csize;
+ size_t gap_size;
- /* Round size up to nearest multiple of the cacheline size. */
- csize = CACHELINE_CEILING(size);
+ ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
+ base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
+ return ret;
+}
+
+/*
+ * Allocate a block of virtual memory that is large enough to start with a
+ * base_block_t header, followed by an object of specified size and alignment.
+ * On success a pointer to the initialized base_block_t header is returned.
+ */
+static base_block_t *
+base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
+ unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+ size_t alignment) {
+ alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
+ size_t usize = ALIGNMENT_CEILING(size, alignment);
+ size_t header_size = sizeof(base_block_t);
+ size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
+ header_size;
+ /*
+ * Create increasingly larger blocks in order to limit the total number
+ * of disjoint virtual memory ranges. Choose the next size in the page
+ * size class series (skipping size classes that are not a multiple of
+ * HUGEPAGE), or a size large enough to satisfy the requested size and
+ * alignment, whichever is larger.
+ */
+ size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
+ + usize));
+ pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
+ *pind_last;
+ size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
+ size_t block_size = (min_block_size > next_block_size) ? min_block_size
+ : next_block_size;
+ base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
+ block_size);
+ if (block == NULL) {
+ return NULL;
+ }
- malloc_mutex_lock(&base_mtx);
- /* Make sure there's enough space for the allocation. */
- if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
- if (base_pages_alloc(csize)) {
- malloc_mutex_unlock(&base_mtx);
- return (NULL);
+ if (metadata_thp_madvise()) {
+ void *addr = (void *)block;
+ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+ (block_size & HUGEPAGE_MASK) == 0);
+ if (opt_metadata_thp == metadata_thp_always) {
+ pages_huge(addr, block_size);
+ } else if (opt_metadata_thp == metadata_thp_auto &&
+ base != NULL) {
+ /* base != NULL indicates this is not a new base. */
+ malloc_mutex_lock(tsdn, &base->mtx);
+ base_auto_thp_switch(tsdn, base);
+ if (base->auto_thp_switched) {
+ pages_huge(addr, block_size);
+ }
+ malloc_mutex_unlock(tsdn, &base->mtx);
}
}
- /* Allocate. */
- ret = base_next_addr;
- base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
- malloc_mutex_unlock(&base_mtx);
- VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
- return (ret);
+ *pind_last = sz_psz2ind(block_size);
+ block->size = block_size;
+ block->next = NULL;
+ assert(block_size >= header_size);
+ base_extent_init(extent_sn_next, &block->extent,
+ (void *)((uintptr_t)block + header_size), block_size - header_size);
+ return block;
}
-void *
-base_calloc(size_t number, size_t size)
-{
- void *ret = base_alloc(number * size);
+/*
+ * Allocate an extent that is at least as large as specified size, with
+ * specified alignment.
+ */
+static extent_t *
+base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
+ malloc_mutex_assert_owner(tsdn, &base->mtx);
- if (ret != NULL)
- memset(ret, 0, number * size);
+ extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+ /*
+ * Drop mutex during base_block_alloc(), because an extent hook will be
+ * called.
+ */
+ malloc_mutex_unlock(tsdn, &base->mtx);
+ base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
+ base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
+ alignment);
+ malloc_mutex_lock(tsdn, &base->mtx);
+ if (block == NULL) {
+ return NULL;
+ }
+ block->next = base->blocks;
+ base->blocks = block;
+ if (config_stats) {
+ base->allocated += sizeof(base_block_t);
+ base->resident += PAGE_CEILING(sizeof(base_block_t));
+ base->mapped += block->size;
+ if (metadata_thp_madvise() &&
+ !(opt_metadata_thp == metadata_thp_auto
+ && !base->auto_thp_switched)) {
+ assert(base->n_thp > 0);
+ base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
+ LG_HUGEPAGE;
+ }
+ assert(base->allocated <= base->resident);
+ assert(base->resident <= base->mapped);
+ assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
+ }
+ return &block->extent;
+}
- return (ret);
+base_t *
+b0get(void) {
+ return b0;
}
-extent_node_t *
-base_node_alloc(void)
-{
- extent_node_t *ret;
+base_t *
+base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+ pszind_t pind_last = 0;
+ size_t extent_sn_next = 0;
+ base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
+ &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
+ if (block == NULL) {
+ return NULL;
+ }
- malloc_mutex_lock(&base_mtx);
- if (base_nodes != NULL) {
- ret = base_nodes;
- base_nodes = *(extent_node_t **)ret;
- malloc_mutex_unlock(&base_mtx);
- VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t));
- } else {
- malloc_mutex_unlock(&base_mtx);
- ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+ size_t gap_size;
+ size_t base_alignment = CACHELINE;
+ size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+ base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+ &gap_size, base_size, base_alignment);
+ base->ind = ind;
+ atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
+ if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
+ malloc_mutex_rank_exclusive)) {
+ base_unmap(tsdn, extent_hooks, ind, block, block->size);
+ return NULL;
+ }
+ base->pind_last = pind_last;
+ base->extent_sn_next = extent_sn_next;
+ base->blocks = block;
+ base->auto_thp_switched = false;
+ for (szind_t i = 0; i < NSIZES; i++) {
+ extent_heap_new(&base->avail[i]);
+ }
+ if (config_stats) {
+ base->allocated = sizeof(base_block_t);
+ base->resident = PAGE_CEILING(sizeof(base_block_t));
+ base->mapped = block->size;
+ base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
+ metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
+ >> LG_HUGEPAGE : 0;
+ assert(base->allocated <= base->resident);
+ assert(base->resident <= base->mapped);
+ assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
}
+ base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
+ base_size);
- return (ret);
+ return base;
}
void
-base_node_dealloc(extent_node_t *node)
-{
+base_delete(tsdn_t *tsdn, base_t *base) {
+ extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+ base_block_t *next = base->blocks;
+ do {
+ base_block_t *block = next;
+ next = block->next;
+ base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
+ block->size);
+ } while (next != NULL);
+}
- VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
- malloc_mutex_lock(&base_mtx);
- *(extent_node_t **)node = base_nodes;
- base_nodes = node;
- malloc_mutex_unlock(&base_mtx);
+extent_hooks_t *
+base_extent_hooks_get(base_t *base) {
+ return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
+ ATOMIC_ACQUIRE);
}
-bool
-base_boot(void)
-{
+extent_hooks_t *
+base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
+ extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
+ atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
+ return old_extent_hooks;
+}
+
+static void *
+base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
+ size_t *esn) {
+ alignment = QUANTUM_CEILING(alignment);
+ size_t usize = ALIGNMENT_CEILING(size, alignment);
+ size_t asize = usize + alignment - QUANTUM;
+
+ extent_t *extent = NULL;
+ malloc_mutex_lock(tsdn, &base->mtx);
+ for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
+ extent = extent_heap_remove_first(&base->avail[i]);
+ if (extent != NULL) {
+ /* Use existing space. */
+ break;
+ }
+ }
+ if (extent == NULL) {
+ /* Try to allocate more space. */
+ extent = base_extent_alloc(tsdn, base, usize, alignment);
+ }
+ void *ret;
+ if (extent == NULL) {
+ ret = NULL;
+ goto label_return;
+ }
+
+ ret = base_extent_bump_alloc(base, extent, usize, alignment);
+ if (esn != NULL) {
+ *esn = extent_sn_get(extent);
+ }
+label_return:
+ malloc_mutex_unlock(tsdn, &base->mtx);
+ return ret;
+}
- base_nodes = NULL;
- if (malloc_mutex_init(&base_mtx))
- return (true);
+/*
+ * base_alloc() returns zeroed memory, which is always demand-zeroed for the
+ * auto arenas, in order to make multi-page sparse data structures such as radix
+ * tree nodes efficient with respect to physical memory usage. Upon success a
+ * pointer to at least size bytes with specified alignment is returned. Note
+ * that size is rounded up to the nearest multiple of alignment to avoid false
+ * sharing.
+ */
+void *
+base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
+ return base_alloc_impl(tsdn, base, size, alignment, NULL);
+}
- return (false);
+extent_t *
+base_alloc_extent(tsdn_t *tsdn, base_t *base) {
+ size_t esn;
+ extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
+ CACHELINE, &esn);
+ if (extent == NULL) {
+ return NULL;
+ }
+ extent_esn_set(extent, esn);
+ return extent;
}
void
-base_prefork(void)
-{
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
+ size_t *mapped, size_t *n_thp) {
+ cassert(config_stats);
- malloc_mutex_prefork(&base_mtx);
+ malloc_mutex_lock(tsdn, &base->mtx);
+ assert(base->allocated <= base->resident);
+ assert(base->resident <= base->mapped);
+ *allocated = base->allocated;
+ *resident = base->resident;
+ *mapped = base->mapped;
+ *n_thp = base->n_thp;
+ malloc_mutex_unlock(tsdn, &base->mtx);
}
void
-base_postfork_parent(void)
-{
+base_prefork(tsdn_t *tsdn, base_t *base) {
+ malloc_mutex_prefork(tsdn, &base->mtx);
+}
- malloc_mutex_postfork_parent(&base_mtx);
+void
+base_postfork_parent(tsdn_t *tsdn, base_t *base) {
+ malloc_mutex_postfork_parent(tsdn, &base->mtx);
}
void
-base_postfork_child(void)
-{
+base_postfork_child(tsdn_t *tsdn, base_t *base) {
+ malloc_mutex_postfork_child(tsdn, &base->mtx);
+}
- malloc_mutex_postfork_child(&base_mtx);
+bool
+base_boot(tsdn_t *tsdn) {
+ b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+ return (b0 == NULL);
}
diff --git a/deps/jemalloc/src/bin.c b/deps/jemalloc/src/bin.c
new file mode 100644
index 000000000..0886bc4ea
--- /dev/null
+++ b/deps/jemalloc/src/bin.c
@@ -0,0 +1,50 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/witness.h"
+
+const bin_info_t bin_infos[NBINS] = {
+#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \
+ {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
+#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \
+ lg_delta_lookup) \
+ BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta), \
+ (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) + \
+ (ndelta<<lg_delta)))
+ SIZE_CLASSES
+#undef BIN_INFO_bin_yes
+#undef BIN_INFO_bin_no
+#undef SC
+};
+
+bool
+bin_init(bin_t *bin) {
+ if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+ bin->slabcur = NULL;
+ extent_heap_new(&bin->slabs_nonfull);
+ extent_list_init(&bin->slabs_full);
+ if (config_stats) {
+ memset(&bin->stats, 0, sizeof(bin_stats_t));
+ }
+ return false;
+}
+
+void
+bin_prefork(tsdn_t *tsdn, bin_t *bin) {
+ malloc_mutex_prefork(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
+ malloc_mutex_postfork_parent(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
+ malloc_mutex_postfork_child(tsdn, &bin->lock);
+}
diff --git a/deps/jemalloc/src/bitmap.c b/deps/jemalloc/src/bitmap.c
index e2bd907d5..468b3178e 100644
--- a/deps/jemalloc/src/bitmap.c
+++ b/deps/jemalloc/src/bitmap.c
@@ -1,24 +1,15 @@
-#define JEMALLOC_BITMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static size_t bits2groups(size_t nbits);
+#include "jemalloc/internal/assert.h"
/******************************************************************************/
-static size_t
-bits2groups(size_t nbits)
-{
-
- return ((nbits >> LG_BITMAP_GROUP_NBITS) +
- !!(nbits & BITMAP_GROUP_NBITS_MASK));
-}
+#ifdef BITMAP_USE_TREE
void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
unsigned i;
size_t group_count;
@@ -31,60 +22,100 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
* that requires only one group.
*/
binfo->levels[0].group_offset = 0;
- group_count = bits2groups(nbits);
+ group_count = BITMAP_BITS2GROUPS(nbits);
for (i = 1; group_count > 1; i++) {
assert(i < BITMAP_MAX_LEVELS);
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
- group_count = bits2groups(group_count);
+ group_count = BITMAP_BITS2GROUPS(group_count);
}
binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+ group_count;
+ assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
binfo->nlevels = i;
binfo->nbits = nbits;
}
-size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
-
- return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
- bitmap_info_t binfo;
-
- bitmap_info_init(&binfo, nbits);
- return (bitmap_info_ngroups(&binfo));
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
+ return binfo->levels[binfo->nlevels].group_offset;
}
void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
size_t extra;
unsigned i;
/*
* Bits are actually inverted with regard to the external bitmap
- * interface, so the bitmap starts out with all 1 bits, except for
- * trailing unused bits (if any). Note that each group uses bit 0 to
- * correspond to the first logical bit in the group, so extra bits
- * are the most significant bits of the last group.
+ * interface.
+ */
+
+ if (fill) {
+ /* The "filled" bitmap starts out with all 0 bits. */
+ memset(bitmap, 0, bitmap_size(binfo));
+ return;
+ }
+
+ /*
+ * The "empty" bitmap starts out with all 1 bits, except for trailing
+ * unused bits (if any). Note that each group uses bit 0 to correspond
+ * to the first logical bit in the group, so extra bits are the most
+ * significant bits of the last group.
*/
- memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
- LG_SIZEOF_BITMAP);
+ memset(bitmap, 0xffU, bitmap_size(binfo));
extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
& BITMAP_GROUP_NBITS_MASK;
- if (extra != 0)
+ if (extra != 0) {
bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+ }
for (i = 1; i < binfo->nlevels; i++) {
size_t group_count = binfo->levels[i].group_offset -
binfo->levels[i-1].group_offset;
extra = (BITMAP_GROUP_NBITS - (group_count &
BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
- if (extra != 0)
+ if (extra != 0) {
bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+ }
}
}
+
+#else /* BITMAP_USE_TREE */
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
+ assert(nbits > 0);
+ assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+ binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
+ binfo->nbits = nbits;
+}
+
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
+ return binfo->ngroups;
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
+ size_t extra;
+
+ if (fill) {
+ memset(bitmap, 0, bitmap_size(binfo));
+ return;
+ }
+
+ memset(bitmap, 0xffU, bitmap_size(binfo));
+ extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+ & BITMAP_GROUP_NBITS_MASK;
+ if (extra != 0) {
+ bitmap[binfo->ngroups - 1] >>= extra;
+ }
+}
+
+#endif /* BITMAP_USE_TREE */
+
+size_t
+bitmap_size(const bitmap_info_t *binfo) {
+ return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
+}
diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c
deleted file mode 100644
index 90ab116ae..000000000
--- a/deps/jemalloc/src/chunk.c
+++ /dev/null
@@ -1,395 +0,0 @@
-#define JEMALLOC_CHUNK_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Data. */
-
-const char *opt_dss = DSS_DEFAULT;
-size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
-
-malloc_mutex_t chunks_mtx;
-chunk_stats_t stats_chunks;
-
-/*
- * Trees of chunks that were previously allocated (trees differ only in node
- * ordering). These are used when allocating chunks, in an attempt to re-use
- * address space. Depending on function, different tree orderings are needed,
- * which is why there are two trees with the same contents.
- */
-static extent_tree_t chunks_szad_mmap;
-static extent_tree_t chunks_ad_mmap;
-static extent_tree_t chunks_szad_dss;
-static extent_tree_t chunks_ad_dss;
-
-rtree_t *chunks_rtree;
-
-/* Various chunk-related settings. */
-size_t chunksize;
-size_t chunksize_mask; /* (chunksize - 1). */
-size_t chunk_npages;
-size_t map_bias;
-size_t arena_maxclass; /* Max size class for arenas. */
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void *chunk_recycle(extent_tree_t *chunks_szad,
- extent_tree_t *chunks_ad, size_t size, size_t alignment, bool base,
- bool *zero);
-static void chunk_record(extent_tree_t *chunks_szad,
- extent_tree_t *chunks_ad, void *chunk, size_t size);
-
-/******************************************************************************/
-
-static void *
-chunk_recycle(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, size_t size,
- size_t alignment, bool base, bool *zero)
-{
- void *ret;
- extent_node_t *node;
- extent_node_t key;
- size_t alloc_size, leadsize, trailsize;
- bool zeroed;
-
- if (base) {
- /*
- * This function may need to call base_node_{,de}alloc(), but
- * the current chunk allocation request is on behalf of the
- * base allocator. Avoid deadlock (and if that weren't an
- * issue, potential for infinite recursion) by returning NULL.
- */
- return (NULL);
- }
-
- alloc_size = size + alignment - chunksize;
- /* Beware size_t wrap-around. */
- if (alloc_size < size)
- return (NULL);
- key.addr = NULL;
- key.size = alloc_size;
- malloc_mutex_lock(&chunks_mtx);
- node = extent_tree_szad_nsearch(chunks_szad, &key);
- if (node == NULL) {
- malloc_mutex_unlock(&chunks_mtx);
- return (NULL);
- }
- leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
- (uintptr_t)node->addr;
- assert(node->size >= leadsize + size);
- trailsize = node->size - leadsize - size;
- ret = (void *)((uintptr_t)node->addr + leadsize);
- zeroed = node->zeroed;
- if (zeroed)
- *zero = true;
- /* Remove node from the tree. */
- extent_tree_szad_remove(chunks_szad, node);
- extent_tree_ad_remove(chunks_ad, node);
- if (leadsize != 0) {
- /* Insert the leading space as a smaller chunk. */
- node->size = leadsize;
- extent_tree_szad_insert(chunks_szad, node);
- extent_tree_ad_insert(chunks_ad, node);
- node = NULL;
- }
- if (trailsize != 0) {
- /* Insert the trailing space as a smaller chunk. */
- if (node == NULL) {
- /*
- * An additional node is required, but
- * base_node_alloc() can cause a new base chunk to be
- * allocated. Drop chunks_mtx in order to avoid
- * deadlock, and if node allocation fails, deallocate
- * the result before returning an error.
- */
- malloc_mutex_unlock(&chunks_mtx);
- node = base_node_alloc();
- if (node == NULL) {
- chunk_dealloc(ret, size, true);
- return (NULL);
- }
- malloc_mutex_lock(&chunks_mtx);
- }
- node->addr = (void *)((uintptr_t)(ret) + size);
- node->size = trailsize;
- node->zeroed = zeroed;
- extent_tree_szad_insert(chunks_szad, node);
- extent_tree_ad_insert(chunks_ad, node);
- node = NULL;
- }
- malloc_mutex_unlock(&chunks_mtx);
-
- if (node != NULL)
- base_node_dealloc(node);
- if (*zero) {
- if (zeroed == false)
- memset(ret, 0, size);
- else if (config_debug) {
- size_t i;
- size_t *p = (size_t *)(uintptr_t)ret;
-
- VALGRIND_MAKE_MEM_DEFINED(ret, size);
- for (i = 0; i < size / sizeof(size_t); i++)
- assert(p[i] == 0);
- }
- }
- return (ret);
-}
-
-/*
- * If the caller specifies (*zero == false), it is still possible to receive
- * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc()
- * takes advantage of this to avoid demanding zeroed chunks, but taking
- * advantage of them if they are returned.
- */
-void *
-chunk_alloc(size_t size, size_t alignment, bool base, bool *zero,
- dss_prec_t dss_prec)
-{
- void *ret;
-
- assert(size != 0);
- assert((size & chunksize_mask) == 0);
- assert(alignment != 0);
- assert((alignment & chunksize_mask) == 0);
-
- /* "primary" dss. */
- if (config_dss && dss_prec == dss_prec_primary) {
- if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
- goto label_return;
- }
- /* mmap. */
- if ((ret = chunk_recycle(&chunks_szad_mmap, &chunks_ad_mmap, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_mmap(size, alignment, zero)) != NULL)
- goto label_return;
- /* "secondary" dss. */
- if (config_dss && dss_prec == dss_prec_secondary) {
- if ((ret = chunk_recycle(&chunks_szad_dss, &chunks_ad_dss, size,
- alignment, base, zero)) != NULL)
- goto label_return;
- if ((ret = chunk_alloc_dss(size, alignment, zero)) != NULL)
- goto label_return;
- }
-
- /* All strategies for allocation failed. */
- ret = NULL;
-label_return:
- if (ret != NULL) {
- if (config_ivsalloc && base == false) {
- if (rtree_set(chunks_rtree, (uintptr_t)ret, 1)) {
- chunk_dealloc(ret, size, true);
- return (NULL);
- }
- }
- if (config_stats || config_prof) {
- bool gdump;
- malloc_mutex_lock(&chunks_mtx);
- if (config_stats)
- stats_chunks.nchunks += (size / chunksize);
- stats_chunks.curchunks += (size / chunksize);
- if (stats_chunks.curchunks > stats_chunks.highchunks) {
- stats_chunks.highchunks =
- stats_chunks.curchunks;
- if (config_prof)
- gdump = true;
- } else if (config_prof)
- gdump = false;
- malloc_mutex_unlock(&chunks_mtx);
- if (config_prof && opt_prof && opt_prof_gdump && gdump)
- prof_gdump();
- }
- if (config_valgrind)
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- }
- assert(CHUNK_ADDR2BASE(ret) == ret);
- return (ret);
-}
-
-static void
-chunk_record(extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, void *chunk,
- size_t size)
-{
- bool unzeroed;
- extent_node_t *xnode, *node, *prev, *xprev, key;
-
- unzeroed = pages_purge(chunk, size);
- VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
-
- /*
- * Allocate a node before acquiring chunks_mtx even though it might not
- * be needed, because base_node_alloc() may cause a new base chunk to
- * be allocated, which could cause deadlock if chunks_mtx were already
- * held.
- */
- xnode = base_node_alloc();
- /* Use xprev to implement conditional deferred deallocation of prev. */
- xprev = NULL;
-
- malloc_mutex_lock(&chunks_mtx);
- key.addr = (void *)((uintptr_t)chunk + size);
- node = extent_tree_ad_nsearch(chunks_ad, &key);
- /* Try to coalesce forward. */
- if (node != NULL && node->addr == key.addr) {
- /*
- * Coalesce chunk with the following address range. This does
- * not change the position within chunks_ad, so only
- * remove/insert from/into chunks_szad.
- */
- extent_tree_szad_remove(chunks_szad, node);
- node->addr = chunk;
- node->size += size;
- node->zeroed = (node->zeroed && (unzeroed == false));
- extent_tree_szad_insert(chunks_szad, node);
- } else {
- /* Coalescing forward failed, so insert a new node. */
- if (xnode == NULL) {
- /*
- * base_node_alloc() failed, which is an exceedingly
- * unlikely failure. Leak chunk; its pages have
- * already been purged, so this is only a virtual
- * memory leak.
- */
- goto label_return;
- }
- node = xnode;
- xnode = NULL; /* Prevent deallocation below. */
- node->addr = chunk;
- node->size = size;
- node->zeroed = (unzeroed == false);
- extent_tree_ad_insert(chunks_ad, node);
- extent_tree_szad_insert(chunks_szad, node);
- }
-
- /* Try to coalesce backward. */
- prev = extent_tree_ad_prev(chunks_ad, node);
- if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
- chunk) {
- /*
- * Coalesce chunk with the previous address range. This does
- * not change the position within chunks_ad, so only
- * remove/insert node from/into chunks_szad.
- */
- extent_tree_szad_remove(chunks_szad, prev);
- extent_tree_ad_remove(chunks_ad, prev);
-
- extent_tree_szad_remove(chunks_szad, node);
- node->addr = prev->addr;
- node->size += prev->size;
- node->zeroed = (node->zeroed && prev->zeroed);
- extent_tree_szad_insert(chunks_szad, node);
-
- xprev = prev;
- }
-
-label_return:
- malloc_mutex_unlock(&chunks_mtx);
- /*
- * Deallocate xnode and/or xprev after unlocking chunks_mtx in order to
- * avoid potential deadlock.
- */
- if (xnode != NULL)
- base_node_dealloc(xnode);
- if (xprev != NULL)
- base_node_dealloc(xprev);
-}
-
-void
-chunk_unmap(void *chunk, size_t size)
-{
- assert(chunk != NULL);
- assert(CHUNK_ADDR2BASE(chunk) == chunk);
- assert(size != 0);
- assert((size & chunksize_mask) == 0);
-
- if (config_dss && chunk_in_dss(chunk))
- chunk_record(&chunks_szad_dss, &chunks_ad_dss, chunk, size);
- else if (chunk_dealloc_mmap(chunk, size))
- chunk_record(&chunks_szad_mmap, &chunks_ad_mmap, chunk, size);
-}
-
-void
-chunk_dealloc(void *chunk, size_t size, bool unmap)
-{
-
- assert(chunk != NULL);
- assert(CHUNK_ADDR2BASE(chunk) == chunk);
- assert(size != 0);
- assert((size & chunksize_mask) == 0);
-
- if (config_ivsalloc)
- rtree_set(chunks_rtree, (uintptr_t)chunk, 0);
- if (config_stats || config_prof) {
- malloc_mutex_lock(&chunks_mtx);
- assert(stats_chunks.curchunks >= (size / chunksize));
- stats_chunks.curchunks -= (size / chunksize);
- malloc_mutex_unlock(&chunks_mtx);
- }
-
- if (unmap)
- chunk_unmap(chunk, size);
-}
-
-bool
-chunk_boot(void)
-{
-
- /* Set variables according to the value of opt_lg_chunk. */
- chunksize = (ZU(1) << opt_lg_chunk);
- assert(chunksize >= PAGE);
- chunksize_mask = chunksize - 1;
- chunk_npages = (chunksize >> LG_PAGE);
-
- if (config_stats || config_prof) {
- if (malloc_mutex_init(&chunks_mtx))
- return (true);
- memset(&stats_chunks, 0, sizeof(chunk_stats_t));
- }
- if (config_dss && chunk_dss_boot())
- return (true);
- extent_tree_szad_new(&chunks_szad_mmap);
- extent_tree_ad_new(&chunks_ad_mmap);
- extent_tree_szad_new(&chunks_szad_dss);
- extent_tree_ad_new(&chunks_ad_dss);
- if (config_ivsalloc) {
- chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
- opt_lg_chunk, base_alloc, NULL);
- if (chunks_rtree == NULL)
- return (true);
- }
-
- return (false);
-}
-
-void
-chunk_prefork(void)
-{
-
- malloc_mutex_prefork(&chunks_mtx);
- if (config_ivsalloc)
- rtree_prefork(chunks_rtree);
- chunk_dss_prefork();
-}
-
-void
-chunk_postfork_parent(void)
-{
-
- chunk_dss_postfork_parent();
- if (config_ivsalloc)
- rtree_postfork_parent(chunks_rtree);
- malloc_mutex_postfork_parent(&chunks_mtx);
-}
-
-void
-chunk_postfork_child(void)
-{
-
- chunk_dss_postfork_child();
- if (config_ivsalloc)
- rtree_postfork_child(chunks_rtree);
- malloc_mutex_postfork_child(&chunks_mtx);
-}
diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c
deleted file mode 100644
index 510bb8bee..000000000
--- a/deps/jemalloc/src/chunk_dss.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#define JEMALLOC_CHUNK_DSS_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-/******************************************************************************/
-/* Data. */
-
-const char *dss_prec_names[] = {
- "disabled",
- "primary",
- "secondary",
- "N/A"
-};
-
-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t dss_prec_default = DSS_PREC_DEFAULT;
-
-/*
- * Protects sbrk() calls. This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
- */
-static malloc_mutex_t dss_mtx;
-
-/* Base address of the DSS. */
-static void *dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void *dss_prev;
-/* Current upper limit on DSS addresses. */
-static void *dss_max;
-
-/******************************************************************************/
-
-static void *
-chunk_dss_sbrk(intptr_t increment)
-{
-
-#ifdef JEMALLOC_HAVE_SBRK
- return (sbrk(increment));
-#else
- not_implemented();
- return (NULL);
-#endif
-}
-
-dss_prec_t
-chunk_dss_prec_get(void)
-{
- dss_prec_t ret;
-
- if (config_dss == false)
- return (dss_prec_disabled);
- malloc_mutex_lock(&dss_mtx);
- ret = dss_prec_default;
- malloc_mutex_unlock(&dss_mtx);
- return (ret);
-}
-
-bool
-chunk_dss_prec_set(dss_prec_t dss_prec)
-{
-
- if (config_dss == false)
- return (true);
- malloc_mutex_lock(&dss_mtx);
- dss_prec_default = dss_prec;
- malloc_mutex_unlock(&dss_mtx);
- return (false);
-}
-
-void *
-chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
-{
- void *ret;
-
- cassert(config_dss);
- assert(size > 0 && (size & chunksize_mask) == 0);
- assert(alignment > 0 && (alignment & chunksize_mask) == 0);
-
- /*
- * sbrk() uses a signed increment argument, so take care not to
- * interpret a huge allocation request as a negative increment.
- */
- if ((intptr_t)size < 0)
- return (NULL);
-
- malloc_mutex_lock(&dss_mtx);
- if (dss_prev != (void *)-1) {
- size_t gap_size, cpad_size;
- void *cpad, *dss_next;
- intptr_t incr;
-
- /*
- * The loop is necessary to recover from races with other
- * threads that are using the DSS for something other than
- * malloc.
- */
- do {
- /* Get the current end of the DSS. */
- dss_max = chunk_dss_sbrk(0);
- /*
- * Calculate how much padding is necessary to
- * chunk-align the end of the DSS.
- */
- gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
- chunksize_mask;
- /*
- * Compute how much chunk-aligned pad space (if any) is
- * necessary to satisfy alignment. This space can be
- * recycled for later use.
- */
- cpad = (void *)((uintptr_t)dss_max + gap_size);
- ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
- alignment);
- cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
- dss_next = (void *)((uintptr_t)ret + size);
- if ((uintptr_t)ret < (uintptr_t)dss_max ||
- (uintptr_t)dss_next < (uintptr_t)dss_max) {
- /* Wrap-around. */
- malloc_mutex_unlock(&dss_mtx);
- return (NULL);
- }
- incr = gap_size + cpad_size + size;
- dss_prev = chunk_dss_sbrk(incr);
- if (dss_prev == dss_max) {
- /* Success. */
- dss_max = dss_next;
- malloc_mutex_unlock(&dss_mtx);
- if (cpad_size != 0)
- chunk_unmap(cpad, cpad_size);
- if (*zero) {
- VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
- memset(ret, 0, size);
- }
- return (ret);
- }
- } while (dss_prev != (void *)-1);
- }
- malloc_mutex_unlock(&dss_mtx);
-
- return (NULL);
-}
-
-bool
-chunk_in_dss(void *chunk)
-{
- bool ret;
-
- cassert(config_dss);
-
- malloc_mutex_lock(&dss_mtx);
- if ((uintptr_t)chunk >= (uintptr_t)dss_base
- && (uintptr_t)chunk < (uintptr_t)dss_max)
- ret = true;
- else
- ret = false;
- malloc_mutex_unlock(&dss_mtx);
-
- return (ret);
-}
-
-bool
-chunk_dss_boot(void)
-{
-
- cassert(config_dss);
-
- if (malloc_mutex_init(&dss_mtx))
- return (true);
- dss_base = chunk_dss_sbrk(0);
- dss_prev = dss_base;
- dss_max = dss_base;
-
- return (false);
-}
-
-void
-chunk_dss_prefork(void)
-{
-
- if (config_dss)
- malloc_mutex_prefork(&dss_mtx);
-}
-
-void
-chunk_dss_postfork_parent(void)
-{
-
- if (config_dss)
- malloc_mutex_postfork_parent(&dss_mtx);
-}
-
-void
-chunk_dss_postfork_child(void)
-{
-
- if (config_dss)
- malloc_mutex_postfork_child(&dss_mtx);
-}
-
-/******************************************************************************/
diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c
deleted file mode 100644
index 2056d793f..000000000
--- a/deps/jemalloc/src/chunk_mmap.c
+++ /dev/null
@@ -1,210 +0,0 @@
-#define JEMALLOC_CHUNK_MMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void *pages_map(void *addr, size_t size);
-static void pages_unmap(void *addr, size_t size);
-static void *chunk_alloc_mmap_slow(size_t size, size_t alignment,
- bool *zero);
-
-/******************************************************************************/
-
-static void *
-pages_map(void *addr, size_t size)
-{
- void *ret;
-
- assert(size != 0);
-
-#ifdef _WIN32
- /*
- * If VirtualAlloc can't allocate at the given address when one is
- * given, it fails and returns NULL.
- */
- ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
- PAGE_READWRITE);
-#else
- /*
- * We don't use MAP_FIXED here, because it can cause the *replacement*
- * of existing mappings, and we only want to create new mappings.
- */
- ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
- -1, 0);
- assert(ret != NULL);
-
- if (ret == MAP_FAILED)
- ret = NULL;
- else if (addr != NULL && ret != addr) {
- /*
- * We succeeded in mapping memory, but not in the right place.
- */
- if (munmap(ret, size) == -1) {
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc: Error in munmap(): %s\n",
- buf);
- if (opt_abort)
- abort();
- }
- ret = NULL;
- }
-#endif
- assert(ret == NULL || (addr == NULL && ret != addr)
- || (addr != NULL && ret == addr));
- return (ret);
-}
-
-static void
-pages_unmap(void *addr, size_t size)
-{
-
-#ifdef _WIN32
- if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
-#else
- if (munmap(addr, size) == -1)
-#endif
- {
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc>: Error in "
-#ifdef _WIN32
- "VirtualFree"
-#else
- "munmap"
-#endif
- "(): %s\n", buf);
- if (opt_abort)
- abort();
- }
-}
-
-static void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
-{
- void *ret = (void *)((uintptr_t)addr + leadsize);
-
- assert(alloc_size >= leadsize + size);
-#ifdef _WIN32
- {
- void *new_addr;
-
- pages_unmap(addr, alloc_size);
- new_addr = pages_map(ret, size);
- if (new_addr == ret)
- return (ret);
- if (new_addr)
- pages_unmap(new_addr, size);
- return (NULL);
- }
-#else
- {
- size_t trailsize = alloc_size - leadsize - size;
-
- if (leadsize != 0)
- pages_unmap(addr, leadsize);
- if (trailsize != 0)
- pages_unmap((void *)((uintptr_t)ret + size), trailsize);
- return (ret);
- }
-#endif
-}
-
-bool
-pages_purge(void *addr, size_t length)
-{
- bool unzeroed;
-
-#ifdef _WIN32
- VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
- unzeroed = true;
-#else
-# ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-# define JEMALLOC_MADV_PURGE MADV_DONTNEED
-# define JEMALLOC_MADV_ZEROS true
-# elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-# define JEMALLOC_MADV_PURGE MADV_FREE
-# define JEMALLOC_MADV_ZEROS false
-# else
-# error "No method defined for purging unused dirty pages."
-# endif
- int err = madvise(addr, length, JEMALLOC_MADV_PURGE);
- unzeroed = (JEMALLOC_MADV_ZEROS == false || err != 0);
-# undef JEMALLOC_MADV_PURGE
-# undef JEMALLOC_MADV_ZEROS
-#endif
- return (unzeroed);
-}
-
-static void *
-chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
-{
- void *ret, *pages;
- size_t alloc_size, leadsize;
-
- alloc_size = size + alignment - PAGE;
- /* Beware size_t wrap-around. */
- if (alloc_size < size)
- return (NULL);
- do {
- pages = pages_map(NULL, alloc_size);
- if (pages == NULL)
- return (NULL);
- leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
- (uintptr_t)pages;
- ret = pages_trim(pages, alloc_size, leadsize, size);
- } while (ret == NULL);
-
- assert(ret != NULL);
- *zero = true;
- return (ret);
-}
-
-void *
-chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
-{
- void *ret;
- size_t offset;
-
- /*
- * Ideally, there would be a way to specify alignment to mmap() (like
- * NetBSD has), but in the absence of such a feature, we have to work
- * hard to efficiently create aligned mappings. The reliable, but
- * slow method is to create a mapping that is over-sized, then trim the
- * excess. However, that always results in one or two calls to
- * pages_unmap().
- *
- * Optimistically try mapping precisely the right amount before falling
- * back to the slow method, with the expectation that the optimistic
- * approach works most of the time.
- */
-
- assert(alignment != 0);
- assert((alignment & chunksize_mask) == 0);
-
- ret = pages_map(NULL, size);
- if (ret == NULL)
- return (NULL);
- offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
- if (offset != 0) {
- pages_unmap(ret, size);
- return (chunk_alloc_mmap_slow(size, alignment, zero));
- }
-
- assert(ret != NULL);
- *zero = true;
- return (ret);
-}
-
-bool
-chunk_dealloc_mmap(void *chunk, size_t size)
-{
-
- if (config_munmap)
- pages_unmap(chunk, size);
-
- return (config_munmap == false);
-}
diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c
index 04c529661..e95e0a3ed 100644
--- a/deps/jemalloc/src/ckh.c
+++ b/deps/jemalloc/src/ckh.c
@@ -34,14 +34,24 @@
* respectively.
*
******************************************************************************/
-#define JEMALLOC_CKH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_CKH_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/ckh.h"
+
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/util.h"
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-static bool ckh_grow(ckh_t *ckh);
-static void ckh_shrink(ckh_t *ckh);
+static bool ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void ckh_shrink(tsd_t *tsd, ckh_t *ckh);
/******************************************************************************/
@@ -49,27 +59,26 @@ static void ckh_shrink(ckh_t *ckh);
* Search bucket for key and return the cell number if found; SIZE_T_MAX
* otherwise.
*/
-JEMALLOC_INLINE_C size_t
-ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
-{
+static size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
ckhc_t *cell;
unsigned i;
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
- if (cell->key != NULL && ckh->keycomp(key, cell->key))
- return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+ if (cell->key != NULL && ckh->keycomp(key, cell->key)) {
+ return (bucket << LG_CKH_BUCKET_CELLS) + i;
+ }
}
- return (SIZE_T_MAX);
+ return SIZE_T_MAX;
}
/*
* Search table for key and return cell number if found; SIZE_T_MAX otherwise.
*/
-JEMALLOC_INLINE_C size_t
-ckh_isearch(ckh_t *ckh, const void *key)
-{
+static size_t
+ckh_isearch(ckh_t *ckh, const void *key) {
size_t hashes[2], bucket, cell;
assert(ckh != NULL);
@@ -79,19 +88,19 @@ ckh_isearch(ckh_t *ckh, const void *key)
/* Search primary bucket. */
bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
- if (cell != SIZE_T_MAX)
- return (cell);
+ if (cell != SIZE_T_MAX) {
+ return cell;
+ }
/* Search secondary bucket. */
bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
cell = ckh_bucket_search(ckh, bucket, key);
- return (cell);
+ return cell;
}
-JEMALLOC_INLINE_C bool
+static bool
ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
- const void *data)
-{
+ const void *data) {
ckhc_t *cell;
unsigned offset, i;
@@ -99,7 +108,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
* Cycle through the cells in the bucket, starting at a random position.
* The randomness avoids worst-case search overhead as buckets fill up.
*/
- prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+ offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+ LG_CKH_BUCKET_CELLS);
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -107,11 +117,11 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
cell->key = key;
cell->data = data;
ckh->count++;
- return (false);
+ return false;
}
}
- return (true);
+ return true;
}
/*
@@ -120,10 +130,9 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
* eviction/relocation procedure until either success or detection of an
* eviction/relocation bucket cycle.
*/
-JEMALLOC_INLINE_C bool
+static bool
ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
- void const **argdata)
-{
+ void const **argdata) {
const void *key, *data, *tkey, *tdata;
ckhc_t *cell;
size_t hashes[2], bucket, tbucket;
@@ -141,7 +150,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
* were an item for which both hashes indicated the same
* bucket.
*/
- prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+ i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+ LG_CKH_BUCKET_CELLS);
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
assert(cell->key != NULL);
@@ -181,18 +191,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
if (tbucket == argbucket) {
*argkey = key;
*argdata = data;
- return (true);
+ return true;
}
bucket = tbucket;
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
- return (false);
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+ return false;
+ }
}
}
-JEMALLOC_INLINE_C bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
-{
+static bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
size_t hashes[2], bucket;
const void *key = *argkey;
const void *data = *argdata;
@@ -201,27 +211,28 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
/* Try to insert in primary bucket. */
bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
- return (false);
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+ return false;
+ }
/* Try to insert in secondary bucket. */
bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
- if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
- return (false);
+ if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+ return false;
+ }
/*
* Try to find a place for this item via iterative eviction/relocation.
*/
- return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+ return ckh_evict_reloc_insert(ckh, bucket, argkey, argdata);
}
/*
* Try to rebuild the hash table from scratch by inserting all items from the
* old table into the new.
*/
-JEMALLOC_INLINE_C bool
-ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
-{
+static bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
size_t count, i, nins;
const void *key, *data;
@@ -233,22 +244,20 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
data = aTab[i].data;
if (ckh_try_insert(ckh, &key, &data)) {
ckh->count = count;
- return (true);
+ return true;
}
nins++;
}
}
- return (false);
+ return false;
}
static bool
-ckh_grow(ckh_t *ckh)
-{
+ckh_grow(tsd_t *tsd, ckh_t *ckh) {
bool ret;
ckhc_t *tab, *ttab;
- size_t lg_curcells;
- unsigned lg_prevbuckets;
+ unsigned lg_prevbuckets, lg_curcells;
#ifdef CKH_COUNT
ckh->ngrows++;
@@ -265,12 +274,13 @@ ckh_grow(ckh_t *ckh)
size_t usize;
lg_curcells++;
- usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
- if (usize == 0) {
+ usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
ret = true;
goto label_return;
}
- tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+ true, NULL, true, arena_ichoose(tsd, NULL));
if (tab == NULL) {
ret = true;
goto label_return;
@@ -281,28 +291,27 @@ ckh_grow(ckh_t *ckh)
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
- if (ckh_rebuild(ckh, tab) == false) {
- idalloc(tab);
+ if (!ckh_rebuild(ckh, tab)) {
+ idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
break;
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloc(ckh->tab);
+ idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
}
ret = false;
label_return:
- return (ret);
+ return ret;
}
static void
-ckh_shrink(ckh_t *ckh)
-{
+ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
ckhc_t *tab, *ttab;
- size_t lg_curcells, usize;
- unsigned lg_prevbuckets;
+ size_t usize;
+ unsigned lg_prevbuckets, lg_curcells;
/*
* It is possible (though unlikely, given well behaved hashes) that the
@@ -310,10 +319,12 @@ ckh_shrink(ckh_t *ckh)
*/
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
- usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
- if (usize == 0)
+ usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
return;
- tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ }
+ tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+ true, arena_ichoose(tsd, NULL));
if (tab == NULL) {
/*
* An OOM error isn't worth propagating, since it doesn't
@@ -327,8 +338,8 @@ ckh_shrink(ckh_t *ckh)
tab = ttab;
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
- if (ckh_rebuild(ckh, tab) == false) {
- idalloc(tab);
+ if (!ckh_rebuild(ckh, tab)) {
+ idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
#ifdef CKH_COUNT
ckh->nshrinks++;
#endif
@@ -336,7 +347,7 @@ ckh_shrink(ckh_t *ckh)
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloc(ckh->tab);
+ idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
#ifdef CKH_COUNT
@@ -345,8 +356,8 @@ ckh_shrink(ckh_t *ckh)
}
bool
-ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
-{
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ ckh_keycomp_t *keycomp) {
bool ret;
size_t mincells, usize;
unsigned lg_mincells;
@@ -366,29 +377,31 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ckh->count = 0;
/*
- * Find the minimum power of 2 that is large enough to fit aBaseCount
+ * Find the minimum power of 2 that is large enough to fit minitems
* entries. We are using (2+,2) cuckoo hashing, which has an expected
* maximum load factor of at least ~0.86, so 0.75 is a conservative load
- * factor that will typically allow 2^aLgMinItems to fit without ever
+ * factor that will typically allow mincells items to fit without ever
* growing the table.
*/
assert(LG_CKH_BUCKET_CELLS > 0);
mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
for (lg_mincells = LG_CKH_BUCKET_CELLS;
(ZU(1) << lg_mincells) < mincells;
- lg_mincells++)
- ; /* Do nothing. */
+ lg_mincells++) {
+ /* Do nothing. */
+ }
ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
ckh->hash = hash;
ckh->keycomp = keycomp;
- usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
- if (usize == 0) {
+ usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
ret = true;
goto label_return;
}
- ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+ ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+ NULL, true, arena_ichoose(tsd, NULL));
if (ckh->tab == NULL) {
ret = true;
goto label_return;
@@ -396,20 +409,18 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
ret = false;
label_return:
- return (ret);
+ return ret;
}
void
-ckh_delete(ckh_t *ckh)
-{
-
+ckh_delete(tsd_t *tsd, ckh_t *ckh) {
assert(ckh != NULL);
#ifdef CKH_VERBOSE
malloc_printf(
- "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
- " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
- " nrelocs: %"PRIu64"\n", __func__, ckh,
+ "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
+ " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
+ " nrelocs: %"FMTu64"\n", __func__, ckh,
(unsigned long long)ckh->ngrows,
(unsigned long long)ckh->nshrinks,
(unsigned long long)ckh->nshrinkfails,
@@ -417,43 +428,42 @@ ckh_delete(ckh_t *ckh)
(unsigned long long)ckh->nrelocs);
#endif
- idalloc(ckh->tab);
- if (config_debug)
- memset(ckh, 0x5a, sizeof(ckh_t));
+ idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
+ if (config_debug) {
+ memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
+ }
}
size_t
-ckh_count(ckh_t *ckh)
-{
-
+ckh_count(ckh_t *ckh) {
assert(ckh != NULL);
- return (ckh->count);
+ return ckh->count;
}
bool
-ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
-{
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
size_t i, ncells;
for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
if (ckh->tab[i].key != NULL) {
- if (key != NULL)
+ if (key != NULL) {
*key = (void *)ckh->tab[i].key;
- if (data != NULL)
+ }
+ if (data != NULL) {
*data = (void *)ckh->tab[i].data;
+ }
*tabind = i + 1;
- return (false);
+ return false;
}
}
- return (true);
+ return true;
}
bool
-ckh_insert(ckh_t *ckh, const void *key, const void *data)
-{
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) {
bool ret;
assert(ckh != NULL);
@@ -464,7 +474,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
#endif
while (ckh_try_insert(ckh, &key, &data)) {
- if (ckh_grow(ckh)) {
+ if (ckh_grow(tsd, ckh)) {
ret = true;
goto label_return;
}
@@ -472,22 +482,24 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
ret = false;
label_return:
- return (ret);
+ return ret;
}
bool
-ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
-{
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+ void **data) {
size_t cell;
assert(ckh != NULL);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
- if (key != NULL)
+ if (key != NULL) {
*key = (void *)ckh->tab[cell].key;
- if (data != NULL)
+ }
+ if (data != NULL) {
*data = (void *)ckh->tab[cell].data;
+ }
ckh->tab[cell].key = NULL;
ckh->tab[cell].data = NULL; /* Not necessary. */
@@ -497,54 +509,50 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+ LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
> ckh->lg_minbuckets) {
/* Ignore error due to OOM. */
- ckh_shrink(ckh);
+ ckh_shrink(tsd, ckh);
}
- return (false);
+ return false;
}
- return (true);
+ return true;
}
bool
-ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
-{
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) {
size_t cell;
assert(ckh != NULL);
cell = ckh_isearch(ckh, searchkey);
if (cell != SIZE_T_MAX) {
- if (key != NULL)
+ if (key != NULL) {
*key = (void *)ckh->tab[cell].key;
- if (data != NULL)
+ }
+ if (data != NULL) {
*data = (void *)ckh->tab[cell].data;
- return (false);
+ }
+ return false;
}
- return (true);
+ return true;
}
void
-ckh_string_hash(const void *key, size_t r_hash[2])
-{
-
+ckh_string_hash(const void *key, size_t r_hash[2]) {
hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
}
bool
-ckh_string_keycomp(const void *k1, const void *k2)
-{
-
- assert(k1 != NULL);
- assert(k2 != NULL);
+ckh_string_keycomp(const void *k1, const void *k2) {
+ assert(k1 != NULL);
+ assert(k2 != NULL);
- return (strcmp((char *)k1, (char *)k2) ? false : true);
+ return !strcmp((char *)k1, (char *)k2);
}
void
-ckh_pointer_hash(const void *key, size_t r_hash[2])
-{
+ckh_pointer_hash(const void *key, size_t r_hash[2]) {
union {
const void *v;
size_t i;
@@ -556,8 +564,6 @@ ckh_pointer_hash(const void *key, size_t r_hash[2])
}
bool
-ckh_pointer_keycomp(const void *k1, const void *k2)
-{
-
- return ((k1 == k2) ? true : false);
+ckh_pointer_keycomp(const void *k1, const void *k2) {
+ return (k1 == k2);
}
diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c
index cc2c5aef5..1e713a3d1 100644
--- a/deps/jemalloc/src/ctl.c
+++ b/deps/jemalloc/src/ctl.c
@@ -1,146 +1,152 @@
-#define JEMALLOC_CTL_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_CTL_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/util.h"
/******************************************************************************/
/* Data. */
/*
* ctl_mtx protects the following:
- * - ctl_stats.*
- * - opt_prof_active
+ * - ctl_stats->*
*/
static malloc_mutex_t ctl_mtx;
static bool ctl_initialized;
-static uint64_t ctl_epoch;
-static ctl_stats_t ctl_stats;
+static ctl_stats_t *ctl_stats;
+static ctl_arenas_t *ctl_arenas;
/******************************************************************************/
/* Helpers for named and indexed nodes. */
-static inline const ctl_named_node_t *
-ctl_named_node(const ctl_node_t *node)
-{
-
+static const ctl_named_node_t *
+ctl_named_node(const ctl_node_t *node) {
return ((node->named) ? (const ctl_named_node_t *)node : NULL);
}
-static inline const ctl_named_node_t *
-ctl_named_children(const ctl_named_node_t *node, int index)
-{
+static const ctl_named_node_t *
+ctl_named_children(const ctl_named_node_t *node, size_t index) {
const ctl_named_node_t *children = ctl_named_node(node->children);
return (children ? &children[index] : NULL);
}
-static inline const ctl_indexed_node_t *
-ctl_indexed_node(const ctl_node_t *node)
-{
-
- return ((node->named == false) ? (const ctl_indexed_node_t *)node :
- NULL);
+static const ctl_indexed_node_t *
+ctl_indexed_node(const ctl_node_t *node) {
+ return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
}
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-#define CTL_PROTO(n) \
-static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \
- size_t *oldlenp, void *newp, size_t newlen);
-
-#define INDEX_PROTO(n) \
-static const ctl_named_node_t *n##_index(const size_t *mib, \
- size_t miblen, size_t i);
-
-static bool ctl_arena_init(ctl_arena_stats_t *astats);
-static void ctl_arena_clear(ctl_arena_stats_t *astats);
-static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
- arena_t *arena);
-static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
- ctl_arena_stats_t *astats);
-static void ctl_arena_refresh(arena_t *arena, unsigned i);
-static bool ctl_grow(void);
-static void ctl_refresh(void);
-static bool ctl_init(void);
-static int ctl_lookup(const char *name, ctl_node_t const **nodesp,
- size_t *mibp, size_t *depthp);
+#define CTL_PROTO(n) \
+static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+
+#define INDEX_PROTO(n) \
+static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \
+ const size_t *mib, size_t miblen, size_t i);
CTL_PROTO(version)
CTL_PROTO(epoch)
+CTL_PROTO(background_thread)
+CTL_PROTO(max_background_threads)
CTL_PROTO(thread_tcache_enabled)
CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_prof_name)
+CTL_PROTO(thread_prof_active)
CTL_PROTO(thread_arena)
CTL_PROTO(thread_allocated)
CTL_PROTO(thread_allocatedp)
CTL_PROTO(thread_deallocated)
CTL_PROTO(thread_deallocatedp)
+CTL_PROTO(config_cache_oblivious)
CTL_PROTO(config_debug)
-CTL_PROTO(config_dss)
CTL_PROTO(config_fill)
CTL_PROTO(config_lazy_lock)
-CTL_PROTO(config_mremap)
-CTL_PROTO(config_munmap)
+CTL_PROTO(config_malloc_conf)
CTL_PROTO(config_prof)
CTL_PROTO(config_prof_libgcc)
CTL_PROTO(config_prof_libunwind)
CTL_PROTO(config_stats)
-CTL_PROTO(config_tcache)
-CTL_PROTO(config_tls)
CTL_PROTO(config_utrace)
-CTL_PROTO(config_valgrind)
CTL_PROTO(config_xmalloc)
CTL_PROTO(opt_abort)
+CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_metadata_thp)
+CTL_PROTO(opt_retain)
CTL_PROTO(opt_dss)
-CTL_PROTO(opt_lg_chunk)
CTL_PROTO(opt_narenas)
-CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_percpu_arena)
+CTL_PROTO(opt_background_thread)
+CTL_PROTO(opt_max_background_threads)
+CTL_PROTO(opt_dirty_decay_ms)
+CTL_PROTO(opt_muzzy_decay_ms)
CTL_PROTO(opt_stats_print)
+CTL_PROTO(opt_stats_print_opts)
CTL_PROTO(opt_junk)
CTL_PROTO(opt_zero)
-CTL_PROTO(opt_quarantine)
-CTL_PROTO(opt_redzone)
CTL_PROTO(opt_utrace)
-CTL_PROTO(opt_valgrind)
CTL_PROTO(opt_xmalloc)
CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_thp)
+CTL_PROTO(opt_lg_extent_max_active_fit)
CTL_PROTO(opt_lg_tcache_max)
CTL_PROTO(opt_prof)
CTL_PROTO(opt_prof_prefix)
CTL_PROTO(opt_prof_active)
+CTL_PROTO(opt_prof_thread_active_init)
CTL_PROTO(opt_lg_prof_sample)
CTL_PROTO(opt_lg_prof_interval)
CTL_PROTO(opt_prof_gdump)
CTL_PROTO(opt_prof_final)
CTL_PROTO(opt_prof_leak)
CTL_PROTO(opt_prof_accum)
+CTL_PROTO(tcache_create)
+CTL_PROTO(tcache_flush)
+CTL_PROTO(tcache_destroy)
+CTL_PROTO(arena_i_initialized)
+CTL_PROTO(arena_i_decay)
CTL_PROTO(arena_i_purge)
-static void arena_purge(unsigned arena_ind);
+CTL_PROTO(arena_i_reset)
+CTL_PROTO(arena_i_destroy)
CTL_PROTO(arena_i_dss)
+CTL_PROTO(arena_i_dirty_decay_ms)
+CTL_PROTO(arena_i_muzzy_decay_ms)
+CTL_PROTO(arena_i_extent_hooks)
+CTL_PROTO(arena_i_retain_grow_limit)
INDEX_PROTO(arena_i)
CTL_PROTO(arenas_bin_i_size)
CTL_PROTO(arenas_bin_i_nregs)
-CTL_PROTO(arenas_bin_i_run_size)
+CTL_PROTO(arenas_bin_i_slab_size)
INDEX_PROTO(arenas_bin_i)
-CTL_PROTO(arenas_lrun_i_size)
-INDEX_PROTO(arenas_lrun_i)
+CTL_PROTO(arenas_lextent_i_size)
+INDEX_PROTO(arenas_lextent_i)
CTL_PROTO(arenas_narenas)
-CTL_PROTO(arenas_initialized)
+CTL_PROTO(arenas_dirty_decay_ms)
+CTL_PROTO(arenas_muzzy_decay_ms)
CTL_PROTO(arenas_quantum)
CTL_PROTO(arenas_page)
CTL_PROTO(arenas_tcache_max)
CTL_PROTO(arenas_nbins)
CTL_PROTO(arenas_nhbins)
-CTL_PROTO(arenas_nlruns)
-CTL_PROTO(arenas_purge)
-CTL_PROTO(arenas_extend)
+CTL_PROTO(arenas_nlextents)
+CTL_PROTO(arenas_create)
+CTL_PROTO(arenas_lookup)
+CTL_PROTO(prof_thread_active_init)
CTL_PROTO(prof_active)
CTL_PROTO(prof_dump)
+CTL_PROTO(prof_gdump)
+CTL_PROTO(prof_reset)
CTL_PROTO(prof_interval)
-CTL_PROTO(stats_chunks_current)
-CTL_PROTO(stats_chunks_total)
-CTL_PROTO(stats_chunks_high)
-CTL_PROTO(stats_huge_allocated)
-CTL_PROTO(stats_huge_nmalloc)
-CTL_PROTO(stats_huge_ndalloc)
+CTL_PROTO(lg_prof_sample)
CTL_PROTO(stats_arenas_i_small_allocated)
CTL_PROTO(stats_arenas_i_small_nmalloc)
CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -149,119 +155,183 @@ CTL_PROTO(stats_arenas_i_large_allocated)
CTL_PROTO(stats_arenas_i_large_nmalloc)
CTL_PROTO(stats_arenas_i_large_ndalloc)
CTL_PROTO(stats_arenas_i_large_nrequests)
-CTL_PROTO(stats_arenas_i_bins_j_allocated)
CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
CTL_PROTO(stats_arenas_i_bins_j_nrequests)
+CTL_PROTO(stats_arenas_i_bins_j_curregs)
CTL_PROTO(stats_arenas_i_bins_j_nfills)
CTL_PROTO(stats_arenas_i_bins_j_nflushes)
-CTL_PROTO(stats_arenas_i_bins_j_nruns)
-CTL_PROTO(stats_arenas_i_bins_j_nreruns)
-CTL_PROTO(stats_arenas_i_bins_j_curruns)
+CTL_PROTO(stats_arenas_i_bins_j_nslabs)
+CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
+CTL_PROTO(stats_arenas_i_bins_j_curslabs)
INDEX_PROTO(stats_arenas_i_bins_j)
-CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
-CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
-CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
-CTL_PROTO(stats_arenas_i_lruns_j_curruns)
-INDEX_PROTO(stats_arenas_i_lruns_j)
+CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
+CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
+CTL_PROTO(stats_arenas_i_lextents_j_nrequests)
+CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
+INDEX_PROTO(stats_arenas_i_lextents_j)
CTL_PROTO(stats_arenas_i_nthreads)
+CTL_PROTO(stats_arenas_i_uptime)
CTL_PROTO(stats_arenas_i_dss)
+CTL_PROTO(stats_arenas_i_dirty_decay_ms)
+CTL_PROTO(stats_arenas_i_muzzy_decay_ms)
CTL_PROTO(stats_arenas_i_pactive)
CTL_PROTO(stats_arenas_i_pdirty)
+CTL_PROTO(stats_arenas_i_pmuzzy)
CTL_PROTO(stats_arenas_i_mapped)
-CTL_PROTO(stats_arenas_i_npurge)
-CTL_PROTO(stats_arenas_i_nmadvise)
-CTL_PROTO(stats_arenas_i_purged)
+CTL_PROTO(stats_arenas_i_retained)
+CTL_PROTO(stats_arenas_i_dirty_npurge)
+CTL_PROTO(stats_arenas_i_dirty_nmadvise)
+CTL_PROTO(stats_arenas_i_dirty_purged)
+CTL_PROTO(stats_arenas_i_muzzy_npurge)
+CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
+CTL_PROTO(stats_arenas_i_muzzy_purged)
+CTL_PROTO(stats_arenas_i_base)
+CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_thp)
+CTL_PROTO(stats_arenas_i_tcache_bytes)
+CTL_PROTO(stats_arenas_i_resident)
INDEX_PROTO(stats_arenas_i)
-CTL_PROTO(stats_cactive)
CTL_PROTO(stats_allocated)
CTL_PROTO(stats_active)
+CTL_PROTO(stats_background_thread_num_threads)
+CTL_PROTO(stats_background_thread_num_runs)
+CTL_PROTO(stats_background_thread_run_interval)
+CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_thp)
+CTL_PROTO(stats_resident)
CTL_PROTO(stats_mapped)
+CTL_PROTO(stats_retained)
+
+#define MUTEX_STATS_CTL_PROTO_GEN(n) \
+CTL_PROTO(stats_##n##_num_ops) \
+CTL_PROTO(stats_##n##_num_wait) \
+CTL_PROTO(stats_##n##_num_spin_acq) \
+CTL_PROTO(stats_##n##_num_owner_switch) \
+CTL_PROTO(stats_##n##_total_wait_time) \
+CTL_PROTO(stats_##n##_max_wait_time) \
+CTL_PROTO(stats_##n##_max_num_thds)
+
+/* Global mutexes. */
+#define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
+MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+
+/* Per arena mutexes. */
+#define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(arenas_i_mutexes_##mtx)
+MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+
+/* Arena bin mutexes. */
+MUTEX_STATS_CTL_PROTO_GEN(arenas_i_bins_j_mutex)
+#undef MUTEX_STATS_CTL_PROTO_GEN
+
+CTL_PROTO(stats_mutexes_reset)
/******************************************************************************/
/* mallctl tree. */
-/* Maximum tree depth. */
-#define CTL_MAX_DEPTH 6
-
-#define NAME(n) {true}, n
-#define CHILD(t, c) \
+#define NAME(n) {true}, n
+#define CHILD(t, c) \
sizeof(c##_node) / sizeof(ctl_##t##_node_t), \
(ctl_node_t *)c##_node, \
NULL
-#define CTL(c) 0, NULL, c##_ctl
+#define CTL(c) 0, NULL, c##_ctl
/*
* Only handles internal indexed nodes, since there are currently no external
* ones.
*/
-#define INDEX(i) {false}, i##_index
+#define INDEX(i) {false}, i##_index
-static const ctl_named_node_t tcache_node[] = {
+static const ctl_named_node_t thread_tcache_node[] = {
{NAME("enabled"), CTL(thread_tcache_enabled)},
{NAME("flush"), CTL(thread_tcache_flush)}
};
+static const ctl_named_node_t thread_prof_node[] = {
+ {NAME("name"), CTL(thread_prof_name)},
+ {NAME("active"), CTL(thread_prof_active)}
+};
+
static const ctl_named_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)},
{NAME("allocated"), CTL(thread_allocated)},
{NAME("allocatedp"), CTL(thread_allocatedp)},
{NAME("deallocated"), CTL(thread_deallocated)},
{NAME("deallocatedp"), CTL(thread_deallocatedp)},
- {NAME("tcache"), CHILD(named, tcache)}
+ {NAME("tcache"), CHILD(named, thread_tcache)},
+ {NAME("prof"), CHILD(named, thread_prof)}
};
static const ctl_named_node_t config_node[] = {
- {NAME("debug"), CTL(config_debug)},
- {NAME("dss"), CTL(config_dss)},
- {NAME("fill"), CTL(config_fill)},
- {NAME("lazy_lock"), CTL(config_lazy_lock)},
- {NAME("mremap"), CTL(config_mremap)},
- {NAME("munmap"), CTL(config_munmap)},
- {NAME("prof"), CTL(config_prof)},
- {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
- {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
- {NAME("stats"), CTL(config_stats)},
- {NAME("tcache"), CTL(config_tcache)},
- {NAME("tls"), CTL(config_tls)},
- {NAME("utrace"), CTL(config_utrace)},
- {NAME("valgrind"), CTL(config_valgrind)},
- {NAME("xmalloc"), CTL(config_xmalloc)}
+ {NAME("cache_oblivious"), CTL(config_cache_oblivious)},
+ {NAME("debug"), CTL(config_debug)},
+ {NAME("fill"), CTL(config_fill)},
+ {NAME("lazy_lock"), CTL(config_lazy_lock)},
+ {NAME("malloc_conf"), CTL(config_malloc_conf)},
+ {NAME("prof"), CTL(config_prof)},
+ {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+ {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+ {NAME("stats"), CTL(config_stats)},
+ {NAME("utrace"), CTL(config_utrace)},
+ {NAME("xmalloc"), CTL(config_xmalloc)}
};
static const ctl_named_node_t opt_node[] = {
- {NAME("abort"), CTL(opt_abort)},
- {NAME("dss"), CTL(opt_dss)},
- {NAME("lg_chunk"), CTL(opt_lg_chunk)},
- {NAME("narenas"), CTL(opt_narenas)},
- {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
- {NAME("stats_print"), CTL(opt_stats_print)},
- {NAME("junk"), CTL(opt_junk)},
- {NAME("zero"), CTL(opt_zero)},
- {NAME("quarantine"), CTL(opt_quarantine)},
- {NAME("redzone"), CTL(opt_redzone)},
- {NAME("utrace"), CTL(opt_utrace)},
- {NAME("valgrind"), CTL(opt_valgrind)},
- {NAME("xmalloc"), CTL(opt_xmalloc)},
- {NAME("tcache"), CTL(opt_tcache)},
- {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
- {NAME("prof"), CTL(opt_prof)},
- {NAME("prof_prefix"), CTL(opt_prof_prefix)},
- {NAME("prof_active"), CTL(opt_prof_active)},
- {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
- {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
- {NAME("prof_gdump"), CTL(opt_prof_gdump)},
- {NAME("prof_final"), CTL(opt_prof_final)},
- {NAME("prof_leak"), CTL(opt_prof_leak)},
- {NAME("prof_accum"), CTL(opt_prof_accum)}
+ {NAME("abort"), CTL(opt_abort)},
+ {NAME("abort_conf"), CTL(opt_abort_conf)},
+ {NAME("metadata_thp"), CTL(opt_metadata_thp)},
+ {NAME("retain"), CTL(opt_retain)},
+ {NAME("dss"), CTL(opt_dss)},
+ {NAME("narenas"), CTL(opt_narenas)},
+ {NAME("percpu_arena"), CTL(opt_percpu_arena)},
+ {NAME("background_thread"), CTL(opt_background_thread)},
+ {NAME("max_background_threads"), CTL(opt_max_background_threads)},
+ {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
+ {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
+ {NAME("stats_print"), CTL(opt_stats_print)},
+ {NAME("stats_print_opts"), CTL(opt_stats_print_opts)},
+ {NAME("junk"), CTL(opt_junk)},
+ {NAME("zero"), CTL(opt_zero)},
+ {NAME("utrace"), CTL(opt_utrace)},
+ {NAME("xmalloc"), CTL(opt_xmalloc)},
+ {NAME("tcache"), CTL(opt_tcache)},
+ {NAME("thp"), CTL(opt_thp)},
+ {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
+ {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)},
+ {NAME("prof"), CTL(opt_prof)},
+ {NAME("prof_prefix"), CTL(opt_prof_prefix)},
+ {NAME("prof_active"), CTL(opt_prof_active)},
+ {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+ {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+ {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+ {NAME("prof_gdump"), CTL(opt_prof_gdump)},
+ {NAME("prof_final"), CTL(opt_prof_final)},
+ {NAME("prof_leak"), CTL(opt_prof_leak)},
+ {NAME("prof_accum"), CTL(opt_prof_accum)}
+};
+
+static const ctl_named_node_t tcache_node[] = {
+ {NAME("create"), CTL(tcache_create)},
+ {NAME("flush"), CTL(tcache_flush)},
+ {NAME("destroy"), CTL(tcache_destroy)}
};
static const ctl_named_node_t arena_i_node[] = {
- {NAME("purge"), CTL(arena_i_purge)},
- {NAME("dss"), CTL(arena_i_dss)}
+ {NAME("initialized"), CTL(arena_i_initialized)},
+ {NAME("decay"), CTL(arena_i_decay)},
+ {NAME("purge"), CTL(arena_i_purge)},
+ {NAME("reset"), CTL(arena_i_reset)},
+ {NAME("destroy"), CTL(arena_i_destroy)},
+ {NAME("dss"), CTL(arena_i_dss)},
+ {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
+ {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
+ {NAME("extent_hooks"), CTL(arena_i_extent_hooks)},
+ {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)}
};
static const ctl_named_node_t super_arena_i_node[] = {
- {NAME(""), CHILD(named, arena_i)}
+ {NAME(""), CHILD(named, arena_i)}
};
static const ctl_indexed_node_t arena_node[] = {
@@ -269,147 +339,212 @@ static const ctl_indexed_node_t arena_node[] = {
};
static const ctl_named_node_t arenas_bin_i_node[] = {
- {NAME("size"), CTL(arenas_bin_i_size)},
- {NAME("nregs"), CTL(arenas_bin_i_nregs)},
- {NAME("run_size"), CTL(arenas_bin_i_run_size)}
+ {NAME("size"), CTL(arenas_bin_i_size)},
+ {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+ {NAME("slab_size"), CTL(arenas_bin_i_slab_size)}
};
static const ctl_named_node_t super_arenas_bin_i_node[] = {
- {NAME(""), CHILD(named, arenas_bin_i)}
+ {NAME(""), CHILD(named, arenas_bin_i)}
};
static const ctl_indexed_node_t arenas_bin_node[] = {
{INDEX(arenas_bin_i)}
};
-static const ctl_named_node_t arenas_lrun_i_node[] = {
- {NAME("size"), CTL(arenas_lrun_i_size)}
+static const ctl_named_node_t arenas_lextent_i_node[] = {
+ {NAME("size"), CTL(arenas_lextent_i_size)}
};
-static const ctl_named_node_t super_arenas_lrun_i_node[] = {
- {NAME(""), CHILD(named, arenas_lrun_i)}
+static const ctl_named_node_t super_arenas_lextent_i_node[] = {
+ {NAME(""), CHILD(named, arenas_lextent_i)}
};
-static const ctl_indexed_node_t arenas_lrun_node[] = {
- {INDEX(arenas_lrun_i)}
+static const ctl_indexed_node_t arenas_lextent_node[] = {
+ {INDEX(arenas_lextent_i)}
};
static const ctl_named_node_t arenas_node[] = {
- {NAME("narenas"), CTL(arenas_narenas)},
- {NAME("initialized"), CTL(arenas_initialized)},
- {NAME("quantum"), CTL(arenas_quantum)},
- {NAME("page"), CTL(arenas_page)},
- {NAME("tcache_max"), CTL(arenas_tcache_max)},
- {NAME("nbins"), CTL(arenas_nbins)},
- {NAME("nhbins"), CTL(arenas_nhbins)},
- {NAME("bin"), CHILD(indexed, arenas_bin)},
- {NAME("nlruns"), CTL(arenas_nlruns)},
- {NAME("lrun"), CHILD(indexed, arenas_lrun)},
- {NAME("purge"), CTL(arenas_purge)},
- {NAME("extend"), CTL(arenas_extend)}
+ {NAME("narenas"), CTL(arenas_narenas)},
+ {NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
+ {NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
+ {NAME("quantum"), CTL(arenas_quantum)},
+ {NAME("page"), CTL(arenas_page)},
+ {NAME("tcache_max"), CTL(arenas_tcache_max)},
+ {NAME("nbins"), CTL(arenas_nbins)},
+ {NAME("nhbins"), CTL(arenas_nhbins)},
+ {NAME("bin"), CHILD(indexed, arenas_bin)},
+ {NAME("nlextents"), CTL(arenas_nlextents)},
+ {NAME("lextent"), CHILD(indexed, arenas_lextent)},
+ {NAME("create"), CTL(arenas_create)},
+ {NAME("lookup"), CTL(arenas_lookup)}
};
static const ctl_named_node_t prof_node[] = {
+ {NAME("thread_active_init"), CTL(prof_thread_active_init)},
{NAME("active"), CTL(prof_active)},
{NAME("dump"), CTL(prof_dump)},
- {NAME("interval"), CTL(prof_interval)}
+ {NAME("gdump"), CTL(prof_gdump)},
+ {NAME("reset"), CTL(prof_reset)},
+ {NAME("interval"), CTL(prof_interval)},
+ {NAME("lg_sample"), CTL(lg_prof_sample)}
};
-static const ctl_named_node_t stats_chunks_node[] = {
- {NAME("current"), CTL(stats_chunks_current)},
- {NAME("total"), CTL(stats_chunks_total)},
- {NAME("high"), CTL(stats_chunks_high)}
+static const ctl_named_node_t stats_arenas_i_small_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
};
-static const ctl_named_node_t stats_huge_node[] = {
- {NAME("allocated"), CTL(stats_huge_allocated)},
- {NAME("nmalloc"), CTL(stats_huge_nmalloc)},
- {NAME("ndalloc"), CTL(stats_huge_ndalloc)}
+static const ctl_named_node_t stats_arenas_i_large_node[] = {
+ {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+ {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
};
-static const ctl_named_node_t stats_arenas_i_small_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
+#define MUTEX_PROF_DATA_NODE(prefix) \
+static const ctl_named_node_t stats_##prefix##_node[] = { \
+ {NAME("num_ops"), \
+ CTL(stats_##prefix##_num_ops)}, \
+ {NAME("num_wait"), \
+ CTL(stats_##prefix##_num_wait)}, \
+ {NAME("num_spin_acq"), \
+ CTL(stats_##prefix##_num_spin_acq)}, \
+ {NAME("num_owner_switch"), \
+ CTL(stats_##prefix##_num_owner_switch)}, \
+ {NAME("total_wait_time"), \
+ CTL(stats_##prefix##_total_wait_time)}, \
+ {NAME("max_wait_time"), \
+ CTL(stats_##prefix##_max_wait_time)}, \
+ {NAME("max_num_thds"), \
+ CTL(stats_##prefix##_max_num_thds)} \
+ /* Note that # of current waiting thread not provided. */ \
};
-static const ctl_named_node_t stats_arenas_i_large_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
-};
+MUTEX_PROF_DATA_NODE(arenas_i_bins_j_mutex)
static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
- {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
- {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
- {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)},
- {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)},
- {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)}
+ {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+ {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)},
+ {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+ {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+ {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
+ {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
+ {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
+ {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}
};
+
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i_bins_j)}
+ {NAME(""), CHILD(named, stats_arenas_i_bins_j)}
};
static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
{INDEX(stats_arenas_i_bins_j)}
};
-static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = {
- {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)},
- {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)}
+static const ctl_named_node_t stats_arenas_i_lextents_j_node[] = {
+ {NAME("nmalloc"), CTL(stats_arenas_i_lextents_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_lextents_j_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_lextents_j_nrequests)},
+ {NAME("curlextents"), CTL(stats_arenas_i_lextents_j_curlextents)}
};
-static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i_lruns_j)}
+static const ctl_named_node_t super_stats_arenas_i_lextents_j_node[] = {
+ {NAME(""), CHILD(named, stats_arenas_i_lextents_j)}
+};
+
+static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
+ {INDEX(stats_arenas_i_lextents_j)}
};
-static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = {
- {INDEX(stats_arenas_i_lruns_j)}
+#define OP(mtx) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
+MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+
+static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
+#define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
+MUTEX_PROF_ARENA_MUTEXES
+#undef OP
};
static const ctl_named_node_t stats_arenas_i_node[] = {
- {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
- {NAME("dss"), CTL(stats_arenas_i_dss)},
- {NAME("pactive"), CTL(stats_arenas_i_pactive)},
- {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
- {NAME("mapped"), CTL(stats_arenas_i_mapped)},
- {NAME("npurge"), CTL(stats_arenas_i_npurge)},
- {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)},
- {NAME("purged"), CTL(stats_arenas_i_purged)},
- {NAME("small"), CHILD(named, stats_arenas_i_small)},
- {NAME("large"), CHILD(named, stats_arenas_i_large)},
- {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
- {NAME("lruns"), CHILD(indexed, stats_arenas_i_lruns)}
+ {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
+ {NAME("uptime"), CTL(stats_arenas_i_uptime)},
+ {NAME("dss"), CTL(stats_arenas_i_dss)},
+ {NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
+ {NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
+ {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+ {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
+ {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)},
+ {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+ {NAME("retained"), CTL(stats_arenas_i_retained)},
+ {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)},
+ {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
+ {NAME("dirty_purged"), CTL(stats_arenas_i_dirty_purged)},
+ {NAME("muzzy_npurge"), CTL(stats_arenas_i_muzzy_npurge)},
+ {NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
+ {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)},
+ {NAME("base"), CTL(stats_arenas_i_base)},
+ {NAME("internal"), CTL(stats_arenas_i_internal)},
+ {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)},
+ {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
+ {NAME("resident"), CTL(stats_arenas_i_resident)},
+ {NAME("small"), CHILD(named, stats_arenas_i_small)},
+ {NAME("large"), CHILD(named, stats_arenas_i_large)},
+ {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
+ {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)},
+ {NAME("mutexes"), CHILD(named, stats_arenas_i_mutexes)}
};
static const ctl_named_node_t super_stats_arenas_i_node[] = {
- {NAME(""), CHILD(named, stats_arenas_i)}
+ {NAME(""), CHILD(named, stats_arenas_i)}
};
static const ctl_indexed_node_t stats_arenas_node[] = {
{INDEX(stats_arenas_i)}
};
+static const ctl_named_node_t stats_background_thread_node[] = {
+ {NAME("num_threads"), CTL(stats_background_thread_num_threads)},
+ {NAME("num_runs"), CTL(stats_background_thread_num_runs)},
+ {NAME("run_interval"), CTL(stats_background_thread_run_interval)}
+};
+
+#define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
+MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+
+static const ctl_named_node_t stats_mutexes_node[] = {
+#define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
+MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+ {NAME("reset"), CTL(stats_mutexes_reset)}
+};
+#undef MUTEX_PROF_DATA_NODE
+
static const ctl_named_node_t stats_node[] = {
- {NAME("cactive"), CTL(stats_cactive)},
- {NAME("allocated"), CTL(stats_allocated)},
- {NAME("active"), CTL(stats_active)},
- {NAME("mapped"), CTL(stats_mapped)},
- {NAME("chunks"), CHILD(named, stats_chunks)},
- {NAME("huge"), CHILD(named, stats_huge)},
- {NAME("arenas"), CHILD(indexed, stats_arenas)}
+ {NAME("allocated"), CTL(stats_allocated)},
+ {NAME("active"), CTL(stats_active)},
+ {NAME("metadata"), CTL(stats_metadata)},
+ {NAME("metadata_thp"), CTL(stats_metadata_thp)},
+ {NAME("resident"), CTL(stats_resident)},
+ {NAME("mapped"), CTL(stats_mapped)},
+ {NAME("retained"), CTL(stats_retained)},
+ {NAME("background_thread"),
+ CHILD(named, stats_background_thread)},
+ {NAME("mutexes"), CHILD(named, stats_mutexes)},
+ {NAME("arenas"), CHILD(indexed, stats_arenas)}
};
static const ctl_named_node_t root_node[] = {
{NAME("version"), CTL(version)},
{NAME("epoch"), CTL(epoch)},
+ {NAME("background_thread"), CTL(background_thread)},
+ {NAME("max_background_threads"), CTL(max_background_threads)},
{NAME("thread"), CHILD(named, thread)},
{NAME("config"), CHILD(named, config)},
{NAME("opt"), CHILD(named, opt)},
+ {NAME("tcache"), CHILD(named, tcache)},
{NAME("arena"), CHILD(indexed, arena)},
{NAME("arenas"), CHILD(named, arenas)},
{NAME("prof"), CHILD(named, prof)},
@@ -426,303 +561,519 @@ static const ctl_named_node_t super_root_node[] = {
/******************************************************************************/
-static bool
-ctl_arena_init(ctl_arena_stats_t *astats)
-{
+/*
+ * Sets *dst + *src non-atomically. This is safe, since everything is
+ * synchronized by the ctl mutex.
+ */
+static void
+ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
+#ifdef JEMALLOC_ATOMIC_U64
+ uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+ uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
+ atomic_store_u64(dst, cur_dst + cur_src, ATOMIC_RELAXED);
+#else
+ *dst += *src;
+#endif
+}
+
+/* Likewise: with ctl mutex synchronization, reading is simple. */
+static uint64_t
+ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+ return *p;
+#endif
+}
- if (astats->lstats == NULL) {
- astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
- sizeof(malloc_large_stats_t));
- if (astats->lstats == NULL)
- return (true);
+static void
+accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+ size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+ size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
+ atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
+}
+
+/******************************************************************************/
+
+static unsigned
+arenas_i2a_impl(size_t i, bool compat, bool validate) {
+ unsigned a;
+
+ switch (i) {
+ case MALLCTL_ARENAS_ALL:
+ a = 0;
+ break;
+ case MALLCTL_ARENAS_DESTROYED:
+ a = 1;
+ break;
+ default:
+ if (compat && i == ctl_arenas->narenas) {
+ /*
+ * Provide deprecated backward compatibility for
+ * accessing the merged stats at index narenas rather
+ * than via MALLCTL_ARENAS_ALL. This is scheduled for
+ * removal in 6.0.0.
+ */
+ a = 0;
+ } else if (validate && i >= ctl_arenas->narenas) {
+ a = UINT_MAX;
+ } else {
+ /*
+ * This function should never be called for an index
+ * more than one past the range of indices that have
+ * initialized ctl data.
+ */
+ assert(i < ctl_arenas->narenas || (!validate && i ==
+ ctl_arenas->narenas));
+ a = (unsigned)i + 2;
+ }
+ break;
}
- return (false);
+ return a;
}
-static void
-ctl_arena_clear(ctl_arena_stats_t *astats)
-{
+static unsigned
+arenas_i2a(size_t i) {
+ return arenas_i2a_impl(i, true, false);
+}
+
+static ctl_arena_t *
+arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
+ ctl_arena_t *ret;
+
+ assert(!compat || !init);
+
+ ret = ctl_arenas->arenas[arenas_i2a_impl(i, compat, false)];
+ if (init && ret == NULL) {
+ if (config_stats) {
+ struct container_s {
+ ctl_arena_t ctl_arena;
+ ctl_arena_stats_t astats;
+ };
+ struct container_s *cont =
+ (struct container_s *)base_alloc(tsd_tsdn(tsd),
+ b0get(), sizeof(struct container_s), QUANTUM);
+ if (cont == NULL) {
+ return NULL;
+ }
+ ret = &cont->ctl_arena;
+ ret->astats = &cont->astats;
+ } else {
+ ret = (ctl_arena_t *)base_alloc(tsd_tsdn(tsd), b0get(),
+ sizeof(ctl_arena_t), QUANTUM);
+ if (ret == NULL) {
+ return NULL;
+ }
+ }
+ ret->arena_ind = (unsigned)i;
+ ctl_arenas->arenas[arenas_i2a_impl(i, compat, false)] = ret;
+ }
+
+ assert(ret == NULL || arenas_i2a(ret->arena_ind) == arenas_i2a(i));
+ return ret;
+}
+
+static ctl_arena_t *
+arenas_i(size_t i) {
+ ctl_arena_t *ret = arenas_i_impl(tsd_fetch(), i, true, false);
+ assert(ret != NULL);
+ return ret;
+}
- astats->dss = dss_prec_names[dss_prec_limit];
- astats->pactive = 0;
- astats->pdirty = 0;
+static void
+ctl_arena_clear(ctl_arena_t *ctl_arena) {
+ ctl_arena->nthreads = 0;
+ ctl_arena->dss = dss_prec_names[dss_prec_limit];
+ ctl_arena->dirty_decay_ms = -1;
+ ctl_arena->muzzy_decay_ms = -1;
+ ctl_arena->pactive = 0;
+ ctl_arena->pdirty = 0;
+ ctl_arena->pmuzzy = 0;
if (config_stats) {
- memset(&astats->astats, 0, sizeof(arena_stats_t));
- astats->allocated_small = 0;
- astats->nmalloc_small = 0;
- astats->ndalloc_small = 0;
- astats->nrequests_small = 0;
- memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
- memset(astats->lstats, 0, nlclasses *
- sizeof(malloc_large_stats_t));
+ memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
+ ctl_arena->astats->allocated_small = 0;
+ ctl_arena->astats->nmalloc_small = 0;
+ ctl_arena->astats->ndalloc_small = 0;
+ ctl_arena->astats->nrequests_small = 0;
+ memset(ctl_arena->astats->bstats, 0, NBINS *
+ sizeof(bin_stats_t));
+ memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) *
+ sizeof(arena_stats_large_t));
}
}
static void
-ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
-{
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
unsigned i;
- arena_stats_merge(arena, &cstats->dss, &cstats->pactive,
- &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats);
-
- for (i = 0; i < NBINS; i++) {
- cstats->allocated_small += cstats->bstats[i].allocated;
- cstats->nmalloc_small += cstats->bstats[i].nmalloc;
- cstats->ndalloc_small += cstats->bstats[i].ndalloc;
- cstats->nrequests_small += cstats->bstats[i].nrequests;
+ if (config_stats) {
+ arena_stats_merge(tsdn, arena, &ctl_arena->nthreads,
+ &ctl_arena->dss, &ctl_arena->dirty_decay_ms,
+ &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
+ &ctl_arena->pdirty, &ctl_arena->pmuzzy,
+ &ctl_arena->astats->astats, ctl_arena->astats->bstats,
+ ctl_arena->astats->lstats);
+
+ for (i = 0; i < NBINS; i++) {
+ ctl_arena->astats->allocated_small +=
+ ctl_arena->astats->bstats[i].curregs *
+ sz_index2size(i);
+ ctl_arena->astats->nmalloc_small +=
+ ctl_arena->astats->bstats[i].nmalloc;
+ ctl_arena->astats->ndalloc_small +=
+ ctl_arena->astats->bstats[i].ndalloc;
+ ctl_arena->astats->nrequests_small +=
+ ctl_arena->astats->bstats[i].nrequests;
+ }
+ } else {
+ arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
+ &ctl_arena->dss, &ctl_arena->dirty_decay_ms,
+ &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
+ &ctl_arena->pdirty, &ctl_arena->pmuzzy);
}
}
static void
-ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
-{
+ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
+ bool destroyed) {
unsigned i;
- sstats->pactive += astats->pactive;
- sstats->pdirty += astats->pdirty;
-
- sstats->astats.mapped += astats->astats.mapped;
- sstats->astats.npurge += astats->astats.npurge;
- sstats->astats.nmadvise += astats->astats.nmadvise;
- sstats->astats.purged += astats->astats.purged;
-
- sstats->allocated_small += astats->allocated_small;
- sstats->nmalloc_small += astats->nmalloc_small;
- sstats->ndalloc_small += astats->ndalloc_small;
- sstats->nrequests_small += astats->nrequests_small;
-
- sstats->astats.allocated_large += astats->astats.allocated_large;
- sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
- sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
- sstats->astats.nrequests_large += astats->astats.nrequests_large;
-
- for (i = 0; i < nlclasses; i++) {
- sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
- sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
- sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
- sstats->lstats[i].curruns += astats->lstats[i].curruns;
- }
-
- for (i = 0; i < NBINS; i++) {
- sstats->bstats[i].allocated += astats->bstats[i].allocated;
- sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
- sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
- sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
- if (config_tcache) {
- sstats->bstats[i].nfills += astats->bstats[i].nfills;
- sstats->bstats[i].nflushes +=
+ if (!destroyed) {
+ ctl_sdarena->nthreads += ctl_arena->nthreads;
+ ctl_sdarena->pactive += ctl_arena->pactive;
+ ctl_sdarena->pdirty += ctl_arena->pdirty;
+ ctl_sdarena->pmuzzy += ctl_arena->pmuzzy;
+ } else {
+ assert(ctl_arena->nthreads == 0);
+ assert(ctl_arena->pactive == 0);
+ assert(ctl_arena->pdirty == 0);
+ assert(ctl_arena->pmuzzy == 0);
+ }
+
+ if (config_stats) {
+ ctl_arena_stats_t *sdstats = ctl_sdarena->astats;
+ ctl_arena_stats_t *astats = ctl_arena->astats;
+
+ if (!destroyed) {
+ accum_atomic_zu(&sdstats->astats.mapped,
+ &astats->astats.mapped);
+ accum_atomic_zu(&sdstats->astats.retained,
+ &astats->astats.retained);
+ }
+
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+ &astats->astats.decay_dirty.npurge);
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+ &astats->astats.decay_dirty.nmadvise);
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+ &astats->astats.decay_dirty.purged);
+
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+ &astats->astats.decay_muzzy.npurge);
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+ &astats->astats.decay_muzzy.nmadvise);
+ ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+ &astats->astats.decay_muzzy.purged);
+
+#define OP(mtx) malloc_mutex_prof_merge( \
+ &(sdstats->astats.mutex_prof_data[ \
+ arena_prof_mutex_##mtx]), \
+ &(astats->astats.mutex_prof_data[ \
+ arena_prof_mutex_##mtx]));
+MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+ if (!destroyed) {
+ accum_atomic_zu(&sdstats->astats.base,
+ &astats->astats.base);
+ accum_atomic_zu(&sdstats->astats.internal,
+ &astats->astats.internal);
+ accum_atomic_zu(&sdstats->astats.resident,
+ &astats->astats.resident);
+ accum_atomic_zu(&sdstats->astats.metadata_thp,
+ &astats->astats.metadata_thp);
+ } else {
+ assert(atomic_load_zu(
+ &astats->astats.internal, ATOMIC_RELAXED) == 0);
+ }
+
+ if (!destroyed) {
+ sdstats->allocated_small += astats->allocated_small;
+ } else {
+ assert(astats->allocated_small == 0);
+ }
+ sdstats->nmalloc_small += astats->nmalloc_small;
+ sdstats->ndalloc_small += astats->ndalloc_small;
+ sdstats->nrequests_small += astats->nrequests_small;
+
+ if (!destroyed) {
+ accum_atomic_zu(&sdstats->astats.allocated_large,
+ &astats->astats.allocated_large);
+ } else {
+ assert(atomic_load_zu(&astats->astats.allocated_large,
+ ATOMIC_RELAXED) == 0);
+ }
+ ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+ &astats->astats.nmalloc_large);
+ ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+ &astats->astats.ndalloc_large);
+ ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+ &astats->astats.nrequests_large);
+
+ accum_atomic_zu(&sdstats->astats.tcache_bytes,
+ &astats->astats.tcache_bytes);
+
+ if (ctl_arena->arena_ind == 0) {
+ sdstats->astats.uptime = astats->astats.uptime;
+ }
+
+ for (i = 0; i < NBINS; i++) {
+ sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+ sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+ sdstats->bstats[i].nrequests +=
+ astats->bstats[i].nrequests;
+ if (!destroyed) {
+ sdstats->bstats[i].curregs +=
+ astats->bstats[i].curregs;
+ } else {
+ assert(astats->bstats[i].curregs == 0);
+ }
+ sdstats->bstats[i].nfills += astats->bstats[i].nfills;
+ sdstats->bstats[i].nflushes +=
astats->bstats[i].nflushes;
+ sdstats->bstats[i].nslabs += astats->bstats[i].nslabs;
+ sdstats->bstats[i].reslabs += astats->bstats[i].reslabs;
+ if (!destroyed) {
+ sdstats->bstats[i].curslabs +=
+ astats->bstats[i].curslabs;
+ } else {
+ assert(astats->bstats[i].curslabs == 0);
+ }
+ malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
+ &astats->bstats[i].mutex_data);
+ }
+
+ for (i = 0; i < NSIZES - NBINS; i++) {
+ ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+ &astats->lstats[i].nmalloc);
+ ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+ &astats->lstats[i].ndalloc);
+ ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+ &astats->lstats[i].nrequests);
+ if (!destroyed) {
+ sdstats->lstats[i].curlextents +=
+ astats->lstats[i].curlextents;
+ } else {
+ assert(astats->lstats[i].curlextents == 0);
+ }
}
- sstats->bstats[i].nruns += astats->bstats[i].nruns;
- sstats->bstats[i].reruns += astats->bstats[i].reruns;
- sstats->bstats[i].curruns += astats->bstats[i].curruns;
}
}
static void
-ctl_arena_refresh(arena_t *arena, unsigned i)
-{
- ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
- ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
+ unsigned i, bool destroyed) {
+ ctl_arena_t *ctl_arena = arenas_i(i);
+
+ ctl_arena_clear(ctl_arena);
+ ctl_arena_stats_amerge(tsdn, ctl_arena, arena);
+ /* Merge into sum stats as well. */
+ ctl_arena_stats_sdmerge(ctl_sdarena, ctl_arena, destroyed);
+}
- ctl_arena_clear(astats);
+static unsigned
+ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
+ unsigned arena_ind;
+ ctl_arena_t *ctl_arena;
- sstats->nthreads += astats->nthreads;
- if (config_stats) {
- ctl_arena_stats_amerge(astats, arena);
- /* Merge into sum stats as well. */
- ctl_arena_stats_smerge(sstats, astats);
+ if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link)) !=
+ NULL) {
+ ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
+ arena_ind = ctl_arena->arena_ind;
} else {
- astats->pactive += arena->nactive;
- astats->pdirty += arena->ndirty;
- /* Merge into sum stats as well. */
- sstats->pactive += arena->nactive;
- sstats->pdirty += arena->ndirty;
+ arena_ind = ctl_arenas->narenas;
}
-}
-static bool
-ctl_grow(void)
-{
- ctl_arena_stats_t *astats;
- arena_t **tarenas;
-
- /* Allocate extended arena stats and arenas arrays. */
- astats = (ctl_arena_stats_t *)imalloc((ctl_stats.narenas + 2) *
- sizeof(ctl_arena_stats_t));
- if (astats == NULL)
- return (true);
- tarenas = (arena_t **)imalloc((ctl_stats.narenas + 1) *
- sizeof(arena_t *));
- if (tarenas == NULL) {
- idalloc(astats);
- return (true);
- }
-
- /* Initialize the new astats element. */
- memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
- sizeof(ctl_arena_stats_t));
- memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
- if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) {
- idalloc(tarenas);
- idalloc(astats);
- return (true);
- }
- /* Swap merged stats to their new location. */
- {
- ctl_arena_stats_t tstats;
- memcpy(&tstats, &astats[ctl_stats.narenas],
- sizeof(ctl_arena_stats_t));
- memcpy(&astats[ctl_stats.narenas],
- &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t));
- memcpy(&astats[ctl_stats.narenas + 1], &tstats,
- sizeof(ctl_arena_stats_t));
- }
- /* Initialize the new arenas element. */
- tarenas[ctl_stats.narenas] = NULL;
- {
- arena_t **arenas_old = arenas;
- /*
- * Swap extended arenas array into place. Although ctl_mtx
- * protects this function from other threads extending the
- * array, it does not protect from other threads mutating it
- * (i.e. initializing arenas and setting array elements to
- * point to them). Therefore, array copying must happen under
- * the protection of arenas_lock.
- */
- malloc_mutex_lock(&arenas_lock);
- arenas = tarenas;
- memcpy(arenas, arenas_old, ctl_stats.narenas *
- sizeof(arena_t *));
- narenas_total++;
- arenas_extend(narenas_total - 1);
- malloc_mutex_unlock(&arenas_lock);
- /*
- * Deallocate arenas_old only if it came from imalloc() (not
- * base_alloc()).
- */
- if (ctl_stats.narenas != narenas_auto)
- idalloc(arenas_old);
+ /* Trigger stats allocation. */
+ if (arenas_i_impl(tsd, arena_ind, false, true) == NULL) {
+ return UINT_MAX;
}
- ctl_stats.arenas = astats;
- ctl_stats.narenas++;
- return (false);
-}
+ /* Initialize new arena. */
+ if (arena_init(tsd_tsdn(tsd), arena_ind, extent_hooks) == NULL) {
+ return UINT_MAX;
+ }
-static void
-ctl_refresh(void)
-{
- unsigned i;
- VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
+ if (arena_ind == ctl_arenas->narenas) {
+ ctl_arenas->narenas++;
+ }
- if (config_stats) {
- malloc_mutex_lock(&chunks_mtx);
- ctl_stats.chunks.current = stats_chunks.curchunks;
- ctl_stats.chunks.total = stats_chunks.nchunks;
- ctl_stats.chunks.high = stats_chunks.highchunks;
- malloc_mutex_unlock(&chunks_mtx);
+ return arena_ind;
+}
- malloc_mutex_lock(&huge_mtx);
- ctl_stats.huge.allocated = huge_allocated;
- ctl_stats.huge.nmalloc = huge_nmalloc;
- ctl_stats.huge.ndalloc = huge_ndalloc;
- malloc_mutex_unlock(&huge_mtx);
+static void
+ctl_background_thread_stats_read(tsdn_t *tsdn) {
+ background_thread_stats_t *stats = &ctl_stats->background_thread;
+ if (!have_background_thread ||
+ background_thread_stats_read(tsdn, stats)) {
+ memset(stats, 0, sizeof(background_thread_stats_t));
+ nstime_init(&stats->run_interval, 0);
}
+}
+
+static void
+ctl_refresh(tsdn_t *tsdn) {
+ unsigned i;
+ ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
+ VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
/*
* Clear sum stats, since they will be merged into by
* ctl_arena_refresh().
*/
- ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
- ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
-
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
- for (i = 0; i < ctl_stats.narenas; i++) {
- if (arenas[i] != NULL)
- ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
- else
- ctl_stats.arenas[i].nthreads = 0;
- }
- malloc_mutex_unlock(&arenas_lock);
- for (i = 0; i < ctl_stats.narenas; i++) {
+ ctl_arena_clear(ctl_sarena);
+
+ for (i = 0; i < ctl_arenas->narenas; i++) {
+ tarenas[i] = arena_get(tsdn, i, false);
+ }
+
+ for (i = 0; i < ctl_arenas->narenas; i++) {
+ ctl_arena_t *ctl_arena = arenas_i(i);
bool initialized = (tarenas[i] != NULL);
- ctl_stats.arenas[i].initialized = initialized;
- if (initialized)
- ctl_arena_refresh(tarenas[i], i);
+ ctl_arena->initialized = initialized;
+ if (initialized) {
+ ctl_arena_refresh(tsdn, tarenas[i], ctl_sarena, i,
+ false);
+ }
}
if (config_stats) {
- ctl_stats.allocated =
- ctl_stats.arenas[ctl_stats.narenas].allocated_small
- + ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large
- + ctl_stats.huge.allocated;
- ctl_stats.active =
- (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE)
- + ctl_stats.huge.allocated;
- ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+ ctl_stats->allocated = ctl_sarena->astats->allocated_small +
+ atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
+ ATOMIC_RELAXED);
+ ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
+ ctl_stats->metadata = atomic_load_zu(
+ &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
+ atomic_load_zu(&ctl_sarena->astats->astats.internal,
+ ATOMIC_RELAXED);
+ ctl_stats->metadata_thp = atomic_load_zu(
+ &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
+ ctl_stats->resident = atomic_load_zu(
+ &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
+ ctl_stats->mapped = atomic_load_zu(
+ &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
+ ctl_stats->retained = atomic_load_zu(
+ &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
+
+ ctl_background_thread_stats_read(tsdn);
+
+#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx) \
+ malloc_mutex_lock(tsdn, &mtx); \
+ malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx); \
+ malloc_mutex_unlock(tsdn, &mtx);
+
+ if (config_prof && opt_prof) {
+ READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
+ bt2gctx_mtx);
+ }
+ if (have_background_thread) {
+ READ_GLOBAL_MUTEX_PROF_DATA(
+ global_prof_mutex_background_thread,
+ background_thread_lock);
+ } else {
+ memset(&ctl_stats->mutex_prof_data[
+ global_prof_mutex_background_thread], 0,
+ sizeof(mutex_prof_data_t));
+ }
+ /* We own ctl mutex already. */
+ malloc_mutex_prof_read(tsdn,
+ &ctl_stats->mutex_prof_data[global_prof_mutex_ctl],
+ &ctl_mtx);
+#undef READ_GLOBAL_MUTEX_PROF_DATA
}
-
- ctl_epoch++;
+ ctl_arenas->epoch++;
}
static bool
-ctl_init(void)
-{
+ctl_init(tsd_t *tsd) {
bool ret;
+ tsdn_t *tsdn = tsd_tsdn(tsd);
+
+ malloc_mutex_lock(tsdn, &ctl_mtx);
+ if (!ctl_initialized) {
+ ctl_arena_t *ctl_sarena, *ctl_darena;
+ unsigned i;
+
+ /*
+ * Allocate demand-zeroed space for pointers to the full
+ * range of supported arena indices.
+ */
+ if (ctl_arenas == NULL) {
+ ctl_arenas = (ctl_arenas_t *)base_alloc(tsdn,
+ b0get(), sizeof(ctl_arenas_t), QUANTUM);
+ if (ctl_arenas == NULL) {
+ ret = true;
+ goto label_return;
+ }
+ }
+
+ if (config_stats && ctl_stats == NULL) {
+ ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
+ sizeof(ctl_stats_t), QUANTUM);
+ if (ctl_stats == NULL) {
+ ret = true;
+ goto label_return;
+ }
+ }
- malloc_mutex_lock(&ctl_mtx);
- if (ctl_initialized == false) {
/*
- * Allocate space for one extra arena stats element, which
- * contains summed stats across all arenas.
+ * Allocate space for the current full range of arenas
+ * here rather than doing it lazily elsewhere, in order
+ * to limit when OOM-caused errors can occur.
*/
- assert(narenas_auto == narenas_total_get());
- ctl_stats.narenas = narenas_auto;
- ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc(
- (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t));
- if (ctl_stats.arenas == NULL) {
+ if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
+ true)) == NULL) {
ret = true;
goto label_return;
}
- memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) *
- sizeof(ctl_arena_stats_t));
+ ctl_sarena->initialized = true;
+ if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
+ false, true)) == NULL) {
+ ret = true;
+ goto label_return;
+ }
+ ctl_arena_clear(ctl_darena);
/*
- * Initialize all stats structures, regardless of whether they
- * ever get used. Lazy initialization would allow errors to
- * cause inconsistent state to be viewable by the application.
+ * Don't toggle ctl_darena to initialized until an arena is
+ * actually destroyed, so that arena.<i>.initialized can be used
+ * to query whether the stats are relevant.
*/
- if (config_stats) {
- unsigned i;
- for (i = 0; i <= ctl_stats.narenas; i++) {
- if (ctl_arena_init(&ctl_stats.arenas[i])) {
- ret = true;
- goto label_return;
- }
+
+ ctl_arenas->narenas = narenas_total_get();
+ for (i = 0; i < ctl_arenas->narenas; i++) {
+ if (arenas_i_impl(tsd, i, false, true) == NULL) {
+ ret = true;
+ goto label_return;
}
}
- ctl_stats.arenas[ctl_stats.narenas].initialized = true;
- ctl_epoch = 0;
- ctl_refresh();
+ ql_new(&ctl_arenas->destroyed);
+ ctl_refresh(tsdn);
+
ctl_initialized = true;
}
ret = false;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+ return ret;
}
static int
-ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
- size_t *depthp)
-{
+ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
+ size_t *mibp, size_t *depthp) {
int ret;
const char *elm, *tdot, *dot;
size_t elen, i, j;
@@ -750,9 +1101,10 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
if (strlen(child->name) == elen &&
strncmp(elm, child->name, elen) == 0) {
node = child;
- if (nodesp != NULL)
+ if (nodesp != NULL) {
nodesp[i] =
(const ctl_node_t *)node;
+ }
mibp[i] = j;
break;
}
@@ -773,14 +1125,15 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
}
inode = ctl_indexed_node(node->children);
- node = inode->index(mibp, *depthp, (size_t)index);
+ node = inode->index(tsdn, mibp, *depthp, (size_t)index);
if (node == NULL) {
ret = ENOENT;
goto label_return;
}
- if (nodesp != NULL)
+ if (nodesp != NULL) {
nodesp[i] = (const ctl_node_t *)node;
+ }
mibp[i] = (size_t)index;
}
@@ -813,33 +1166,33 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
ret = 0;
label_return:
- return (ret);
+ return ret;
}
int
-ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen)
-{
+ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen) {
int ret;
size_t depth;
ctl_node_t const *nodes[CTL_MAX_DEPTH];
size_t mib[CTL_MAX_DEPTH];
const ctl_named_node_t *node;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init(tsd)) {
ret = EAGAIN;
goto label_return;
}
depth = CTL_MAX_DEPTH;
- ret = ctl_lookup(name, nodes, mib, &depth);
- if (ret != 0)
+ ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
+ if (ret != 0) {
goto label_return;
+ }
node = ctl_named_node(nodes[depth-1]);
- if (node != NULL && node->ctl)
- ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
- else {
+ if (node != NULL && node->ctl) {
+ ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
+ } else {
/* The name refers to a partial path through the ctl tree. */
ret = ENOENT;
}
@@ -849,29 +1202,27 @@ label_return:
}
int
-ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
-{
+ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
int ret;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init(tsd)) {
ret = EAGAIN;
goto label_return;
}
- ret = ctl_lookup(name, NULL, mibp, miblenp);
+ ret = ctl_lookup(tsd_tsdn(tsd), name, NULL, mibp, miblenp);
label_return:
return(ret);
}
int
-ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
const ctl_named_node_t *node;
size_t i;
- if (ctl_initialized == false && ctl_init()) {
+ if (!ctl_initialized && ctl_init(tsd)) {
ret = EAGAIN;
goto label_return;
}
@@ -893,7 +1244,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
/* Indexed element. */
inode = ctl_indexed_node(node->children);
- node = inode->index(mib, miblen, mib[i]);
+ node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
if (node == NULL) {
ret = ENOENT;
goto label_return;
@@ -902,9 +1253,9 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
}
/* Call the ctl function. */
- if (node && node->ctl)
- ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
- else {
+ if (node && node->ctl) {
+ ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+ } else {
/* Partial MIB. */
ret = ENOENT;
}
@@ -914,56 +1265,58 @@ label_return:
}
bool
-ctl_boot(void)
-{
-
- if (malloc_mutex_init(&ctl_mtx))
- return (true);
+ctl_boot(void) {
+ if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
ctl_initialized = false;
- return (false);
+ return false;
}
void
-ctl_prefork(void)
-{
-
- malloc_mutex_prefork(&ctl_mtx);
+ctl_prefork(tsdn_t *tsdn) {
+ malloc_mutex_prefork(tsdn, &ctl_mtx);
}
void
-ctl_postfork_parent(void)
-{
-
- malloc_mutex_postfork_parent(&ctl_mtx);
+ctl_postfork_parent(tsdn_t *tsdn) {
+ malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
}
void
-ctl_postfork_child(void)
-{
-
- malloc_mutex_postfork_child(&ctl_mtx);
+ctl_postfork_child(tsdn_t *tsdn) {
+ malloc_mutex_postfork_child(tsdn, &ctl_mtx);
}
/******************************************************************************/
/* *_ctl() functions. */
-#define READONLY() do { \
+#define READONLY() do { \
if (newp != NULL || newlen != 0) { \
ret = EPERM; \
goto label_return; \
} \
} while (0)
-#define WRITEONLY() do { \
+#define WRITEONLY() do { \
if (oldp != NULL || oldlenp != NULL) { \
ret = EPERM; \
goto label_return; \
} \
} while (0)
-#define READ(v, t) do { \
+#define READ_XOR_WRITE() do { \
+ if ((oldp != NULL && oldlenp != NULL) && (newp != NULL || \
+ newlen != 0)) { \
+ ret = EPERM; \
+ goto label_return; \
+ } \
+} while (0)
+
+#define READ(v, t) do { \
if (oldp != NULL && oldlenp != NULL) { \
if (*oldlenp != sizeof(t)) { \
size_t copylen = (sizeof(t) <= *oldlenp) \
@@ -971,12 +1324,12 @@ ctl_postfork_child(void)
memcpy(oldp, (void *)&(v), copylen); \
ret = EINVAL; \
goto label_return; \
- } else \
- *(t *)oldp = (v); \
+ } \
+ *(t *)oldp = (v); \
} \
} while (0)
-#define WRITE(v, t) do { \
+#define WRITE(v, t) do { \
if (newp != NULL) { \
if (newlen != sizeof(t)) { \
ret = EINVAL; \
@@ -986,101 +1339,109 @@ ctl_postfork_child(void)
} \
} while (0)
+#define MIB_UNSIGNED(v, i) do { \
+ if (mib[i] > UINT_MAX) { \
+ ret = EFAULT; \
+ goto label_return; \
+ } \
+ v = (unsigned)mib[i]; \
+} while (0)
+
/*
* There's a lot of code duplication in the following macros due to limitations
* in how nested cpp macros are expanded.
*/
-#define CTL_RO_CLGEN(c, l, n, v, t) \
+#define CTL_RO_CLGEN(c, l, n, v, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
t oldval; \
\
- if ((c) == false) \
- return (ENOENT); \
- if (l) \
- malloc_mutex_lock(&ctl_mtx); \
+ if (!(c)) { \
+ return ENOENT; \
+ } \
+ if (l) { \
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \
+ } \
READONLY(); \
oldval = (v); \
READ(oldval, t); \
\
ret = 0; \
label_return: \
- if (l) \
- malloc_mutex_unlock(&ctl_mtx); \
- return (ret); \
+ if (l) { \
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \
+ } \
+ return ret; \
}
-#define CTL_RO_CGEN(c, n, v, t) \
+#define CTL_RO_CGEN(c, n, v, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
t oldval; \
\
- if ((c) == false) \
- return (ENOENT); \
- malloc_mutex_lock(&ctl_mtx); \
+ if (!(c)) { \
+ return ENOENT; \
+ } \
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \
READONLY(); \
oldval = (v); \
READ(oldval, t); \
\
ret = 0; \
label_return: \
- malloc_mutex_unlock(&ctl_mtx); \
- return (ret); \
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \
+ return ret; \
}
-#define CTL_RO_GEN(n, v, t) \
+#define CTL_RO_GEN(n, v, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
t oldval; \
\
- malloc_mutex_lock(&ctl_mtx); \
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); \
READONLY(); \
oldval = (v); \
READ(oldval, t); \
\
ret = 0; \
label_return: \
- malloc_mutex_unlock(&ctl_mtx); \
- return (ret); \
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); \
+ return ret; \
}
/*
* ctl_mtx is not acquired, under the assumption that no pertinent data will
* mutate during the call.
*/
-#define CTL_RO_NL_CGEN(c, n, v, t) \
+#define CTL_RO_NL_CGEN(c, n, v, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
t oldval; \
\
- if ((c) == false) \
- return (ENOENT); \
+ if (!(c)) { \
+ return ENOENT; \
+ } \
READONLY(); \
oldval = (v); \
READ(oldval, t); \
\
ret = 0; \
label_return: \
- return (ret); \
+ return ret; \
}
-#define CTL_RO_NL_GEN(n, v, t) \
+#define CTL_RO_NL_GEN(n, v, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
t oldval; \
\
@@ -1090,24 +1451,42 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
\
ret = 0; \
label_return: \
- return (ret); \
+ return ret; \
+}
+
+#define CTL_TSD_RO_NL_CGEN(c, n, m, t) \
+static int \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
+ int ret; \
+ t oldval; \
+ \
+ if (!(c)) { \
+ return ENOENT; \
+ } \
+ READONLY(); \
+ oldval = (m(tsd)); \
+ READ(oldval, t); \
+ \
+ ret = 0; \
+label_return: \
+ return ret; \
}
-#define CTL_RO_BOOL_CONFIG_GEN(n) \
+#define CTL_RO_CONFIG_GEN(n, t) \
static int \
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \
- void *newp, size_t newlen) \
-{ \
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, \
+ size_t *oldlenp, void *newp, size_t newlen) { \
int ret; \
- bool oldval; \
+ t oldval; \
\
READONLY(); \
oldval = n; \
- READ(oldval, bool); \
+ READ(oldval, t); \
\
ret = 0; \
label_return: \
- return (ret); \
+ return ret; \
}
/******************************************************************************/
@@ -1115,62 +1494,192 @@ label_return: \
CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
static int
-epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
UNUSED uint64_t newval;
- malloc_mutex_lock(&ctl_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
WRITE(newval, uint64_t);
- if (newp != NULL)
- ctl_refresh();
- READ(ctl_epoch, uint64_t);
+ if (newp != NULL) {
+ ctl_refresh(tsd_tsdn(tsd));
+ }
+ READ(ctl_arenas->epoch, uint64_t);
+
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
+}
+
+static int
+background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ bool oldval;
+
+ if (!have_background_thread) {
+ return ENOENT;
+ }
+ background_thread_ctl_init(tsd_tsdn(tsd));
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+ if (newp == NULL) {
+ oldval = background_thread_enabled();
+ READ(oldval, bool);
+ } else {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = background_thread_enabled();
+ READ(oldval, bool);
+
+ bool newval = *(bool *)newp;
+ if (newval == oldval) {
+ ret = 0;
+ goto label_return;
+ }
+
+ background_thread_enabled_set(tsd_tsdn(tsd), newval);
+ if (newval) {
+ if (!can_enable_background_thread) {
+ malloc_printf("<jemalloc>: Error in dlsym("
+ "RTLD_NEXT, \"pthread_create\"). Cannot "
+ "enable background_thread\n");
+ ret = EFAULT;
+ goto label_return;
+ }
+ if (background_threads_enable(tsd)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ } else {
+ if (background_threads_disable(tsd)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ }
+ }
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
+ return ret;
+}
+static int
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ size_t oldval;
+
+ if (!have_background_thread) {
+ return ENOENT;
+ }
+ background_thread_ctl_init(tsd_tsdn(tsd));
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+ if (newp == NULL) {
+ oldval = max_background_threads;
+ READ(oldval, size_t);
+ } else {
+ if (newlen != sizeof(size_t)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = max_background_threads;
+ READ(oldval, size_t);
+
+ size_t newval = *(size_t *)newp;
+ if (newval == oldval) {
+ ret = 0;
+ goto label_return;
+ }
+ if (newval > opt_max_background_threads) {
+ ret = EINVAL;
+ goto label_return;
+ }
+
+ if (background_thread_enabled()) {
+ if (!can_enable_background_thread) {
+ malloc_printf("<jemalloc>: Error in dlsym("
+ "RTLD_NEXT, \"pthread_create\"). Cannot "
+ "enable background_thread\n");
+ ret = EFAULT;
+ goto label_return;
+ }
+ background_thread_enabled_set(tsd_tsdn(tsd), false);
+ if (background_threads_disable(tsd)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ max_background_threads = newval;
+ background_thread_enabled_set(tsd_tsdn(tsd), true);
+ if (background_threads_enable(tsd)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ } else {
+ max_background_threads = newval;
+ }
+ }
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
+ return ret;
}
/******************************************************************************/
-CTL_RO_BOOL_CONFIG_GEN(config_debug)
-CTL_RO_BOOL_CONFIG_GEN(config_dss)
-CTL_RO_BOOL_CONFIG_GEN(config_fill)
-CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
-CTL_RO_BOOL_CONFIG_GEN(config_mremap)
-CTL_RO_BOOL_CONFIG_GEN(config_munmap)
-CTL_RO_BOOL_CONFIG_GEN(config_prof)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
-CTL_RO_BOOL_CONFIG_GEN(config_stats)
-CTL_RO_BOOL_CONFIG_GEN(config_tcache)
-CTL_RO_BOOL_CONFIG_GEN(config_tls)
-CTL_RO_BOOL_CONFIG_GEN(config_utrace)
-CTL_RO_BOOL_CONFIG_GEN(config_valgrind)
-CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
+CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
+CTL_RO_CONFIG_GEN(config_debug, bool)
+CTL_RO_CONFIG_GEN(config_fill, bool)
+CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
+CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
+CTL_RO_CONFIG_GEN(config_prof, bool)
+CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
+CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_stats, bool)
+CTL_RO_CONFIG_GEN(config_utrace, bool)
+CTL_RO_CONFIG_GEN(config_xmalloc, bool)
/******************************************************************************/
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
+CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
+ const char *)
+CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
-CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
-CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
-CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
+CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
+ const char *)
+CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
+CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
+CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
+CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
-CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool)
-CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
-CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
+CTL_RO_NL_GEN(opt_stats_print_opts, opt_stats_print_opts, const char *)
+CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
-CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool)
CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
-CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
-CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
+CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
+ size_t)
+CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
-CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */
+CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
+ opt_prof_thread_active_init, bool)
CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
@@ -1181,504 +1690,1194 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
/******************************************************************************/
static int
-thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
+ arena_t *oldarena;
unsigned newind, oldind;
- malloc_mutex_lock(&ctl_mtx);
- newind = oldind = choose_arena(NULL)->ind;
+ oldarena = arena_choose(tsd, NULL);
+ if (oldarena == NULL) {
+ return EAGAIN;
+ }
+ newind = oldind = arena_ind_get(oldarena);
WRITE(newind, unsigned);
READ(oldind, unsigned);
+
if (newind != oldind) {
- arena_t *arena;
+ arena_t *newarena;
- if (newind >= ctl_stats.narenas) {
+ if (newind >= narenas_total_get()) {
/* New arena index is out of range. */
ret = EFAULT;
goto label_return;
}
+ if (have_percpu_arena &&
+ PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+ if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
+ /*
+ * If perCPU arena is enabled, thread_arena
+ * control is not allowed for the auto arena
+ * range.
+ */
+ ret = EPERM;
+ goto label_return;
+ }
+ }
+
/* Initialize arena if necessary. */
- malloc_mutex_lock(&arenas_lock);
- if ((arena = arenas[newind]) == NULL && (arena =
- arenas_extend(newind)) == NULL) {
- malloc_mutex_unlock(&arenas_lock);
+ newarena = arena_get(tsd_tsdn(tsd), newind, true);
+ if (newarena == NULL) {
ret = EAGAIN;
goto label_return;
}
- assert(arena == arenas[newind]);
- arenas[oldind]->nthreads--;
- arenas[newind]->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
-
- /* Set new arena association. */
- if (config_tcache) {
- tcache_t *tcache;
- if ((uintptr_t)(tcache = *tcache_tsd_get()) >
- (uintptr_t)TCACHE_STATE_MAX) {
- tcache_arena_dissociate(tcache);
- tcache_arena_associate(tcache, arena);
- }
+ /* Set new arena/tcache associations. */
+ arena_migrate(tsd, oldind, newind);
+ if (tcache_available(tsd)) {
+ tcache_arena_reassociate(tsd_tsdn(tsd),
+ tsd_tcachep_get(tsd), newarena);
}
- arenas_tsd_set(&arena);
}
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ return ret;
}
-CTL_RO_NL_CGEN(config_stats, thread_allocated,
- thread_allocated_tsd_get()->allocated, uint64_t)
-CTL_RO_NL_CGEN(config_stats, thread_allocatedp,
- &thread_allocated_tsd_get()->allocated, uint64_t *)
-CTL_RO_NL_CGEN(config_stats, thread_deallocated,
- thread_allocated_tsd_get()->deallocated, uint64_t)
-CTL_RO_NL_CGEN(config_stats, thread_deallocatedp,
- &thread_allocated_tsd_get()->deallocated, uint64_t *)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
+ uint64_t)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get,
+ uint64_t *)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get,
+ uint64_t)
+CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
+ tsd_thread_deallocatedp_get, uint64_t *)
static int
-thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen)
-{
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
int ret;
bool oldval;
- if (config_tcache == false)
- return (ENOENT);
-
- oldval = tcache_enabled_get();
+ oldval = tcache_enabled_get(tsd);
if (newp != NULL) {
if (newlen != sizeof(bool)) {
ret = EINVAL;
goto label_return;
}
- tcache_enabled_set(*(bool *)newp);
+ tcache_enabled_set(tsd, *(bool *)newp);
}
READ(oldval, bool);
ret = 0;
label_return:
- return (ret);
+ return ret;
+}
+
+static int
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+
+ if (!tcache_available(tsd)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+
+ READONLY();
+ WRITEONLY();
+
+ tcache_flush(tsd);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static int
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+
+ if (!config_prof) {
+ return ENOENT;
+ }
+
+ READ_XOR_WRITE();
+
+ if (newp != NULL) {
+ if (newlen != sizeof(const char *)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+
+ if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
+ 0) {
+ goto label_return;
+ }
+ } else {
+ const char *oldname = prof_thread_name_get(tsd);
+ READ(oldname, const char *);
+ }
+
+ ret = 0;
+label_return:
+ return ret;
}
static int
-thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen)
-{
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
+ bool oldval;
- if (config_tcache == false)
- return (ENOENT);
+ if (!config_prof) {
+ return ENOENT;
+ }
+
+ oldval = prof_thread_active_get(tsd);
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (prof_thread_active_set(tsd, *(bool *)newp)) {
+ ret = EAGAIN;
+ goto label_return;
+ }
+ }
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+/******************************************************************************/
+
+static int
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned tcache_ind;
READONLY();
+ if (tcaches_create(tsd, &tcache_ind)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ READ(tcache_ind, unsigned);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static int
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned tcache_ind;
+
WRITEONLY();
+ tcache_ind = UINT_MAX;
+ WRITE(tcache_ind, unsigned);
+ if (tcache_ind == UINT_MAX) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ tcaches_flush(tsd, tcache_ind);
+
+ ret = 0;
+label_return:
+ return ret;
+}
- tcache_flush();
+static int
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned tcache_ind;
+
+ WRITEONLY();
+ tcache_ind = UINT_MAX;
+ WRITE(tcache_ind, unsigned);
+ if (tcache_ind == UINT_MAX) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ tcaches_destroy(tsd, tcache_ind);
ret = 0;
label_return:
- return (ret);
+ return ret;
}
/******************************************************************************/
-/* ctl_mutex must be held during execution of this function. */
+static int
+arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ tsdn_t *tsdn = tsd_tsdn(tsd);
+ unsigned arena_ind;
+ bool initialized;
+
+ READONLY();
+ MIB_UNSIGNED(arena_ind, 1);
+
+ malloc_mutex_lock(tsdn, &ctl_mtx);
+ initialized = arenas_i(arena_ind)->initialized;
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+
+ READ(initialized, bool);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
static void
-arena_purge(unsigned arena_ind)
-{
- VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
+arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
+ malloc_mutex_lock(tsdn, &ctl_mtx);
+ {
+ unsigned narenas = ctl_arenas->narenas;
- malloc_mutex_lock(&arenas_lock);
- memcpy(tarenas, arenas, sizeof(arena_t *) * ctl_stats.narenas);
- malloc_mutex_unlock(&arenas_lock);
+ /*
+ * Access via index narenas is deprecated, and scheduled for
+ * removal in 6.0.0.
+ */
+ if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == narenas) {
+ unsigned i;
+ VARIABLE_ARRAY(arena_t *, tarenas, narenas);
- if (arena_ind == ctl_stats.narenas) {
- unsigned i;
- for (i = 0; i < ctl_stats.narenas; i++) {
- if (tarenas[i] != NULL)
- arena_purge_all(tarenas[i]);
+ for (i = 0; i < narenas; i++) {
+ tarenas[i] = arena_get(tsdn, i, false);
+ }
+
+ /*
+ * No further need to hold ctl_mtx, since narenas and
+ * tarenas contain everything needed below.
+ */
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+
+ for (i = 0; i < narenas; i++) {
+ if (tarenas[i] != NULL) {
+ arena_decay(tsdn, tarenas[i], false,
+ all);
+ }
+ }
+ } else {
+ arena_t *tarena;
+
+ assert(arena_ind < narenas);
+
+ tarena = arena_get(tsdn, arena_ind, false);
+
+ /* No further need to hold ctl_mtx. */
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+
+ if (tarena != NULL) {
+ arena_decay(tsdn, tarena, false, all);
+ }
}
- } else {
- assert(arena_ind < ctl_stats.narenas);
- if (tarenas[arena_ind] != NULL)
- arena_purge_all(tarenas[arena_ind]);
}
}
static int
-arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned arena_ind;
+
+ READONLY();
+ WRITEONLY();
+ MIB_UNSIGNED(arena_ind, 1);
+ arena_i_decay(tsd_tsdn(tsd), arena_ind, false);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static int
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
+ unsigned arena_ind;
READONLY();
WRITEONLY();
- malloc_mutex_lock(&ctl_mtx);
- arena_purge(mib[1]);
- malloc_mutex_unlock(&ctl_mtx);
+ MIB_UNSIGNED(arena_ind, 1);
+ arena_i_decay(tsd_tsdn(tsd), arena_ind, true);
ret = 0;
label_return:
- return (ret);
+ return ret;
}
static int
-arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
- int ret, i;
- bool match, err;
- const char *dss;
- unsigned arena_ind = mib[1];
+arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen, unsigned *arena_ind,
+ arena_t **arena) {
+ int ret;
+
+ READONLY();
+ WRITEONLY();
+ MIB_UNSIGNED(*arena_ind, 1);
+
+ *arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);
+ if (*arena == NULL || arena_is_auto(*arena)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static void
+arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
+ /* Temporarily disable the background thread during arena reset. */
+ if (have_background_thread) {
+ malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+ if (background_thread_enabled()) {
+ unsigned ind = arena_ind % ncpus;
+ background_thread_info_t *info =
+ &background_thread_info[ind];
+ assert(info->state == background_thread_started);
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ info->state = background_thread_paused;
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ }
+ }
+}
+
+static void
+arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
+ if (have_background_thread) {
+ if (background_thread_enabled()) {
+ unsigned ind = arena_ind % ncpus;
+ background_thread_info_t *info =
+ &background_thread_info[ind];
+ assert(info->state == background_thread_paused);
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ info->state = background_thread_started;
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+ }
+}
+
+static int
+arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned arena_ind;
+ arena_t *arena;
+
+ ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
+ newp, newlen, &arena_ind, &arena);
+ if (ret != 0) {
+ return ret;
+ }
+
+ arena_reset_prepare_background_thread(tsd, arena_ind);
+ arena_reset(tsd, arena);
+ arena_reset_finish_background_thread(tsd, arena_ind);
+
+ return ret;
+}
+
+static int
+arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned arena_ind;
+ arena_t *arena;
+ ctl_arena_t *ctl_darena, *ctl_arena;
+
+ ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
+ newp, newlen, &arena_ind, &arena);
+ if (ret != 0) {
+ goto label_return;
+ }
+
+ if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
+ true) != 0) {
+ ret = EFAULT;
+ goto label_return;
+ }
+
+ arena_reset_prepare_background_thread(tsd, arena_ind);
+ /* Merge stats after resetting and purging arena. */
+ arena_reset(tsd, arena);
+ arena_decay(tsd_tsdn(tsd), arena, false, true);
+ ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED);
+ ctl_darena->initialized = true;
+ ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true);
+ /* Destroy arena. */
+ arena_destroy(tsd, arena);
+ ctl_arena = arenas_i(arena_ind);
+ ctl_arena->initialized = false;
+ /* Record arena index for later recycling via arenas.create. */
+ ql_elm_new(ctl_arena, destroyed_link);
+ ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
+ arena_reset_finish_background_thread(tsd, arena_ind);
+
+ assert(ret == 0);
+label_return:
+ return ret;
+}
+
+static int
+arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ const char *dss = NULL;
+ unsigned arena_ind;
dss_prec_t dss_prec_old = dss_prec_limit;
dss_prec_t dss_prec = dss_prec_limit;
- malloc_mutex_lock(&ctl_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
WRITE(dss, const char *);
- match = false;
- for (i = 0; i < dss_prec_limit; i++) {
- if (strcmp(dss_prec_names[i], dss) == 0) {
- dss_prec = i;
- match = true;
- break;
+ MIB_UNSIGNED(arena_ind, 1);
+ if (dss != NULL) {
+ int i;
+ bool match = false;
+
+ for (i = 0; i < dss_prec_limit; i++) {
+ if (strcmp(dss_prec_names[i], dss) == 0) {
+ dss_prec = i;
+ match = true;
+ break;
+ }
+ }
+
+ if (!match) {
+ ret = EINVAL;
+ goto label_return;
}
- }
- if (match == false) {
- ret = EINVAL;
- goto label_return;
}
- if (arena_ind < ctl_stats.narenas) {
- arena_t *arena = arenas[arena_ind];
- if (arena != NULL) {
- dss_prec_old = arena_dss_prec_get(arena);
- arena_dss_prec_set(arena, dss_prec);
- err = false;
- } else
- err = true;
+ /*
+ * Access via index narenas is deprecated, and scheduled for removal in
+ * 6.0.0.
+ */
+ if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
+ ctl_arenas->narenas) {
+ if (dss_prec != dss_prec_limit &&
+ extent_dss_prec_set(dss_prec)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ dss_prec_old = extent_dss_prec_get();
} else {
- dss_prec_old = chunk_dss_prec_get();
- err = chunk_dss_prec_set(dss_prec);
+ arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+ if (arena == NULL || (dss_prec != dss_prec_limit &&
+ arena_dss_prec_set(arena, dss_prec))) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ dss_prec_old = arena_dss_prec_get(arena);
}
+
dss = dss_prec_names[dss_prec_old];
READ(dss, const char *);
- if (err) {
+
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
+}
+
+static int
+arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
+ int ret;
+ unsigned arena_ind;
+ arena_t *arena;
+
+ MIB_UNSIGNED(arena_ind, 1);
+ arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+ if (arena == NULL) {
ret = EFAULT;
goto label_return;
}
+ if (oldp != NULL && oldlenp != NULL) {
+ size_t oldval = dirty ? arena_dirty_decay_ms_get(arena) :
+ arena_muzzy_decay_ms_get(arena);
+ READ(oldval, ssize_t);
+ }
+ if (newp != NULL) {
+ if (newlen != sizeof(ssize_t)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena,
+ *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd),
+ arena, *(ssize_t *)newp)) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ }
+
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ return ret;
}
-static const ctl_named_node_t *
-arena_i_index(const size_t *mib, size_t miblen, size_t i)
-{
- const ctl_named_node_t * ret;
+static int
+arena_i_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+ newlen, true);
+}
- malloc_mutex_lock(&ctl_mtx);
- if (i > ctl_stats.narenas) {
- ret = NULL;
+static int
+arena_i_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+ newlen, false);
+}
+
+static int
+arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned arena_ind;
+ arena_t *arena;
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+ MIB_UNSIGNED(arena_ind, 1);
+ if (arena_ind < narenas_total_get()) {
+ extent_hooks_t *old_extent_hooks;
+ arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+ if (arena == NULL) {
+ if (arena_ind >= narenas_auto) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ old_extent_hooks =
+ (extent_hooks_t *)&extent_hooks_default;
+ READ(old_extent_hooks, extent_hooks_t *);
+ if (newp != NULL) {
+ /* Initialize a new arena as a side effect. */
+ extent_hooks_t *new_extent_hooks
+ JEMALLOC_CC_SILENCE_INIT(NULL);
+ WRITE(new_extent_hooks, extent_hooks_t *);
+ arena = arena_init(tsd_tsdn(tsd), arena_ind,
+ new_extent_hooks);
+ if (arena == NULL) {
+ ret = EFAULT;
+ goto label_return;
+ }
+ }
+ } else {
+ if (newp != NULL) {
+ extent_hooks_t *new_extent_hooks
+ JEMALLOC_CC_SILENCE_INIT(NULL);
+ WRITE(new_extent_hooks, extent_hooks_t *);
+ old_extent_hooks = extent_hooks_set(tsd, arena,
+ new_extent_hooks);
+ READ(old_extent_hooks, extent_hooks_t *);
+ } else {
+ old_extent_hooks = extent_hooks_get(arena);
+ READ(old_extent_hooks, extent_hooks_t *);
+ }
+ }
+ } else {
+ ret = EFAULT;
goto label_return;
}
+ ret = 0;
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
+}
+
+static int
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ unsigned arena_ind;
+ arena_t *arena;
+
+ if (!opt_retain) {
+ /* Only relevant when retain is enabled. */
+ return ENOENT;
+ }
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+ MIB_UNSIGNED(arena_ind, 1);
+ if (arena_ind < narenas_total_get() && (arena =
+ arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+ size_t old_limit, new_limit;
+ if (newp != NULL) {
+ WRITE(new_limit, size_t);
+ }
+ bool err = arena_retain_grow_limit_get_set(tsd, arena,
+ &old_limit, newp != NULL ? &new_limit : NULL);
+ if (!err) {
+ READ(old_limit, size_t);
+ ret = 0;
+ } else {
+ ret = EFAULT;
+ }
+ } else {
+ ret = EFAULT;
+ }
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
+}
+
+static const ctl_named_node_t *
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+ const ctl_named_node_t *ret;
+
+ malloc_mutex_lock(tsdn, &ctl_mtx);
+ switch (i) {
+ case MALLCTL_ARENAS_ALL:
+ case MALLCTL_ARENAS_DESTROYED:
+ break;
+ default:
+ if (i > ctl_arenas->narenas) {
+ ret = NULL;
+ goto label_return;
+ }
+ break;
+ }
ret = super_arena_i_node;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+ return ret;
}
/******************************************************************************/
static int
-arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen)
-{
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
unsigned narenas;
- malloc_mutex_lock(&ctl_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
READONLY();
if (*oldlenp != sizeof(unsigned)) {
ret = EINVAL;
goto label_return;
}
- narenas = ctl_stats.narenas;
+ narenas = ctl_arenas->narenas;
READ(narenas, unsigned);
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
}
static int
-arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
- size_t *oldlenp, void *newp, size_t newlen)
-{
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
int ret;
- unsigned nread, i;
- malloc_mutex_lock(&ctl_mtx);
- READONLY();
- if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
- ret = EINVAL;
- nread = (*oldlenp < ctl_stats.narenas * sizeof(bool))
- ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas;
- } else {
- ret = 0;
- nread = ctl_stats.narenas;
+ if (oldp != NULL && oldlenp != NULL) {
+ size_t oldval = (dirty ? arena_dirty_decay_ms_default_get() :
+ arena_muzzy_decay_ms_default_get());
+ READ(oldval, ssize_t);
+ }
+ if (newp != NULL) {
+ if (newlen != sizeof(ssize_t)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+ : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
+ ret = EFAULT;
+ goto label_return;
+ }
}
- for (i = 0; i < nread; i++)
- ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
-
+ ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ return ret;
+}
+
+static int
+arenas_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+ newlen, true);
+}
+
+static int
+arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+ newlen, false);
}
CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
-CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
-CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned)
-CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
+CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
static const ctl_named_node_t *
-arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
-{
-
- if (i > NBINS)
- return (NULL);
- return (super_arenas_bin_i_node);
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+ if (i > NBINS) {
+ return NULL;
+ }
+ return super_arenas_bin_i_node;
}
-CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t)
-CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t)
+CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
+CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(NBINS+(szind_t)mib[2]),
+ size_t)
static const ctl_named_node_t *
-arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
-{
-
- if (i > nlclasses)
- return (NULL);
- return (super_arenas_lrun_i_node);
+arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+ size_t i) {
+ if (i > NSIZES - NBINS) {
+ return NULL;
+ }
+ return super_arenas_lextent_i_node;
}
static int
-arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
+ extent_hooks_t *extent_hooks;
unsigned arena_ind;
- malloc_mutex_lock(&ctl_mtx);
- WRITEONLY();
- arena_ind = UINT_MAX;
- WRITE(arena_ind, unsigned);
- if (newp != NULL && arena_ind >= ctl_stats.narenas)
- ret = EFAULT;
- else {
- if (arena_ind == UINT_MAX)
- arena_ind = ctl_stats.narenas;
- arena_purge(arena_ind);
- ret = 0;
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+
+ extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+ WRITE(extent_hooks, extent_hooks_t *);
+ if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
+ ret = EAGAIN;
+ goto label_return;
}
+ READ(arena_ind, unsigned);
+ ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
}
static int
-arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
- unsigned narenas;
+ unsigned arena_ind;
+ void *ptr;
+ extent_t *extent;
+ arena_t *arena;
+
+ ptr = NULL;
+ ret = EINVAL;
+ malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+ WRITE(ptr, void *);
+ extent = iealloc(tsd_tsdn(tsd), ptr);
+ if (extent == NULL)
+ goto label_return;
- malloc_mutex_lock(&ctl_mtx);
- READONLY();
- if (ctl_grow()) {
- ret = EAGAIN;
+ arena = extent_arena_get(extent);
+ if (arena == NULL)
goto label_return;
- }
- narenas = ctl_stats.narenas - 1;
- READ(narenas, unsigned);
+
+ arena_ind = arena_ind_get(arena);
+ READ(arena_ind, unsigned);
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+ return ret;
}
/******************************************************************************/
static int
-prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
int ret;
bool oldval;
- if (config_prof == false)
- return (ENOENT);
+ if (!config_prof) {
+ return ENOENT;
+ }
- malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */
- oldval = opt_prof_active;
if (newp != NULL) {
- /*
- * The memory barriers will tend to make opt_prof_active
- * propagate faster on systems with weak memory ordering.
- */
- mb_write();
- WRITE(opt_prof_active, bool);
- mb_write();
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
+ *(bool *)newp);
+ } else {
+ oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
}
READ(oldval, bool);
ret = 0;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ return ret;
}
static int
-prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ bool oldval;
+
+ if (!config_prof) {
+ return ENOENT;
+ }
+
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
+ } else {
+ oldval = prof_active_get(tsd_tsdn(tsd));
+ }
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static int
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
int ret;
const char *filename = NULL;
- if (config_prof == false)
- return (ENOENT);
+ if (!config_prof) {
+ return ENOENT;
+ }
WRITEONLY();
WRITE(filename, const char *);
- if (prof_mdump(filename)) {
+ if (prof_mdump(tsd, filename)) {
ret = EFAULT;
goto label_return;
}
ret = 0;
label_return:
- return (ret);
+ return ret;
+}
+
+static int
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ bool oldval;
+
+ if (!config_prof) {
+ return ENOENT;
+ }
+
+ if (newp != NULL) {
+ if (newlen != sizeof(bool)) {
+ ret = EINVAL;
+ goto label_return;
+ }
+ oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
+ } else {
+ oldval = prof_gdump_get(tsd_tsdn(tsd));
+ }
+ READ(oldval, bool);
+
+ ret = 0;
+label_return:
+ return ret;
+}
+
+static int
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+ size_t *oldlenp, void *newp, size_t newlen) {
+ int ret;
+ size_t lg_sample = lg_prof_sample;
+
+ if (!config_prof) {
+ return ENOENT;
+ }
+
+ WRITEONLY();
+ WRITE(lg_sample, size_t);
+ if (lg_sample >= (sizeof(uint64_t) << 3)) {
+ lg_sample = (sizeof(uint64_t) << 3) - 1;
+ }
+
+ prof_reset(tsd, lg_sample);
+
+ ret = 0;
+label_return:
+ return ret;
}
CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
+CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
/******************************************************************************/
-CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *)
-CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
-CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
-CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
+CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
+CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
+CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
+
+CTL_RO_CGEN(config_stats, stats_background_thread_num_threads,
+ ctl_stats->background_thread.num_threads, size_t)
+CTL_RO_CGEN(config_stats, stats_background_thread_num_runs,
+ ctl_stats->background_thread.num_runs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
+ nstime_ns(&ctl_stats->background_thread.run_interval), uint64_t)
+
+CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
+CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
+ ssize_t)
+CTL_RO_GEN(stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms,
+ ssize_t)
+CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
+CTL_RO_GEN(stats_arenas_i_uptime,
+ nstime_ns(&arenas_i(mib[2])->astats->astats.uptime), uint64_t)
+CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
+CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
+CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
+ size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
+ size_t)
-CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current,
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_base,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
+ size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
+ size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp,
+ ATOMIC_RELAXED), size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
+ ATOMIC_RELAXED), size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
size_t)
-CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t)
-CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t)
-CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t)
-CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t)
-
-CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
-CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
-CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
-CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
- ctl_stats.arenas[mib[2]].astats.mapped, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
- ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
- ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
- ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
- ctl_stats.arenas[mib[2]].allocated_small, size_t)
+ arenas_i(mib[2])->astats->allocated_small, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
- ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t)
+ arenas_i(mib[2])->astats->nmalloc_small, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
- ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
+ arenas_i(mib[2])->astats->ndalloc_small, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
- ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
+ arenas_i(mib[2])->astats->nrequests_small, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
- ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
+ atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
+ ATOMIC_RELAXED), size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
- ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
- ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
+/*
+ * Note: "nmalloc" here instead of "nrequests" in the read. This is intentional.
+ */
CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
- ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */
+
+/* Lock profiling related APIs below. */
+#define RO_MUTEX_CTL_GEN(n, l) \
+CTL_RO_CGEN(config_stats, stats_##n##_num_ops, \
+ l.n_lock_ops, uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_num_wait, \
+ l.n_wait_times, uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq, \
+ l.n_spin_acquired, uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch, \
+ l.n_owner_switches, uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time, \
+ nstime_ns(&l.tot_wait_time), uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time, \
+ nstime_ns(&l.max_wait_time), uint64_t) \
+CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds, \
+ l.max_n_thds, uint32_t)
+
+/* Global mutexes. */
+#define OP(mtx) \
+ RO_MUTEX_CTL_GEN(mutexes_##mtx, \
+ ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
+MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+
+/* Per arena mutexes */
+#define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx, \
+ arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
+MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+
+/* tcache bin mutex */
+RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
+ arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
+#undef RO_MUTEX_CTL_GEN
+
+/* Resets all mutex stats, including global, arena and bin mutexes. */
+static int
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+ void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+ if (!config_stats) {
+ return ENOENT;
+ }
+
+ tsdn_t *tsdn = tsd_tsdn(tsd);
+
+#define MUTEX_PROF_RESET(mtx) \
+ malloc_mutex_lock(tsdn, &mtx); \
+ malloc_mutex_prof_data_reset(tsdn, &mtx); \
+ malloc_mutex_unlock(tsdn, &mtx);
+
+ /* Global mutexes: ctl and prof. */
+ MUTEX_PROF_RESET(ctl_mtx);
+ if (have_background_thread) {
+ MUTEX_PROF_RESET(background_thread_lock);
+ }
+ if (config_prof && opt_prof) {
+ MUTEX_PROF_RESET(bt2gctx_mtx);
+ }
+
+
+ /* Per arena mutexes. */
+ unsigned n = narenas_total_get();
+
+ for (unsigned i = 0; i < n; i++) {
+ arena_t *arena = arena_get(tsdn, i, false);
+ if (!arena) {
+ continue;
+ }
+ MUTEX_PROF_RESET(arena->large_mtx);
+ MUTEX_PROF_RESET(arena->extent_avail_mtx);
+ MUTEX_PROF_RESET(arena->extents_dirty.mtx);
+ MUTEX_PROF_RESET(arena->extents_muzzy.mtx);
+ MUTEX_PROF_RESET(arena->extents_retained.mtx);
+ MUTEX_PROF_RESET(arena->decay_dirty.mtx);
+ MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
+ MUTEX_PROF_RESET(arena->tcache_ql_mtx);
+ MUTEX_PROF_RESET(arena->base->mtx);
+
+ for (szind_t i = 0; i < NBINS; i++) {
+ bin_t *bin = &arena->bins[i];
+ MUTEX_PROF_RESET(bin->lock);
+ }
+ }
+#undef MUTEX_PROF_RESET
+ return 0;
+}
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
+ arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
+ arenas_i(mib[2])->astats->bstats[mib[4]].ndalloc, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
-CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
-CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
- ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
+ arenas_i(mib[2])->astats->bstats[mib[4]].nrequests, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
+ arenas_i(mib[2])->astats->bstats[mib[4]].curregs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nfills,
+ arenas_i(mib[2])->astats->bstats[mib[4]].nfills, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nflushes,
+ arenas_i(mib[2])->astats->bstats[mib[4]].nflushes, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
+ arenas_i(mib[2])->astats->bstats[mib[4]].nslabs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
+ arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
+ arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
-{
-
- if (j > NBINS)
- return (NULL);
- return (super_stats_arenas_i_bins_j_node);
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+ size_t j) {
+ if (j > NBINS) {
+ return NULL;
+ }
+ return super_stats_arenas_i_bins_j_node;
}
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc,
- ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc,
- ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests,
- ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
- ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
+ ctl_arena_stats_read_u64(
+ &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
+ arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
-{
-
- if (j > nlclasses)
- return (NULL);
- return (super_stats_arenas_i_lruns_j_node);
+stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+ size_t j) {
+ if (j > NSIZES - NBINS) {
+ return NULL;
+ }
+ return super_stats_arenas_i_lextents_j_node;
}
static const ctl_named_node_t *
-stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
-{
- const ctl_named_node_t * ret;
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+ const ctl_named_node_t *ret;
+ size_t a;
- malloc_mutex_lock(&ctl_mtx);
- if (i > ctl_stats.narenas || ctl_stats.arenas[i].initialized == false) {
+ malloc_mutex_lock(tsdn, &ctl_mtx);
+ a = arenas_i2a_impl(i, true, true);
+ if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
ret = NULL;
goto label_return;
}
ret = super_stats_arenas_i_node;
label_return:
- malloc_mutex_unlock(&ctl_mtx);
- return (ret);
+ malloc_mutex_unlock(tsdn, &ctl_mtx);
+ return ret;
}
diff --git a/deps/jemalloc/src/div.c b/deps/jemalloc/src/div.c
new file mode 100644
index 000000000..808892a13
--- /dev/null
+++ b/deps/jemalloc/src/div.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/div.h"
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
+ *
+ * For any k, we have (here, all division is exact; not C-style rounding):
+ * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
+ * r = (-2^k) mod d.
+ *
+ * Expanding this out:
+ * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
+ * = floor(n / d + (r / d) * (n / 2^k)).
+ *
+ * The fractional part of n / d is 0 (because of the assumption that d divides n
+ * exactly), so we have:
+ * ... = n / d + floor((r / d) * (n / 2^k))
+ *
+ * So that our initial expression is equal to the quantity we seek, so long as
+ * (r / d) * (n / 2^k) < 1.
+ *
+ * r is a remainder mod d, so r < d and r / d < 1 always. We can make
+ * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
+ */
+
+void
+div_init(div_info_t *div_info, size_t d) {
+ /* Nonsensical. */
+ assert(d != 0);
+ /*
+ * This would make the value of magic too high to fit into a uint32_t
+ * (we would want magic = 2^32 exactly). This would mess with code gen
+ * on 32-bit machines.
+ */
+ assert(d != 1);
+
+ uint64_t two_to_k = ((uint64_t)1 << 32);
+ uint32_t magic = (uint32_t)(two_to_k / d);
+
+ /*
+ * We want magic = ceil(2^k / d), but C gives us floor. We have to
+ * increment it unless the result was exact (i.e. unless d is a power of
+ * two).
+ */
+ if (two_to_k % d != 0) {
+ magic++;
+ }
+ div_info->magic = magic;
+#ifdef JEMALLOC_DEBUG
+ div_info->d = d;
+#endif
+}
diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c
index 8c09b486e..09d6d7718 100644
--- a/deps/jemalloc/src/extent.c
+++ b/deps/jemalloc/src/extent.c
@@ -1,39 +1,2177 @@
-#define JEMALLOC_EXTENT_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_EXTENT_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
/******************************************************************************/
+/* Data. */
+
+rtree_t extents_rtree;
+/* Keyed by the address of the extent_t being protected. */
+mutex_pool_t extent_mutex_pool;
+
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
+static const bitmap_info_t extents_bitmap_info =
+ BITMAP_INFO_INITIALIZER(NPSIZES+1);
+
+static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool *commit,
+ unsigned arena_ind);
+static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, bool committed, unsigned arena_ind);
+static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, bool committed, unsigned arena_ind);
+static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained);
+static bool extent_decommit_default(extent_hooks_t *extent_hooks,
+ void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+#ifdef PAGES_CAN_PURGE_LAZY
+static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained);
+#ifdef PAGES_CAN_PURGE_FORCED
+static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
+ void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
+static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained);
+#ifdef JEMALLOC_MAPS_COALESCE
+static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t size_a, size_t size_b, bool committed,
+ unsigned arena_ind);
+#endif
+static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+ szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+ bool growing_retained);
+#ifdef JEMALLOC_MAPS_COALESCE
+static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
+ size_t size_a, void *addr_b, size_t size_b, bool committed,
+ unsigned arena_ind);
+#endif
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+ bool growing_retained);
+
+const extent_hooks_t extent_hooks_default = {
+ extent_alloc_default,
+ extent_dalloc_default,
+ extent_destroy_default,
+ extent_commit_default,
+ extent_decommit_default
+#ifdef PAGES_CAN_PURGE_LAZY
+ ,
+ extent_purge_lazy_default
+#else
+ ,
+ NULL
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+ ,
+ extent_purge_forced_default
+#else
+ ,
+ NULL
+#endif
+#ifdef JEMALLOC_MAPS_COALESCE
+ ,
+ extent_split_default,
+ extent_merge_default
+#endif
+};
+
+/* Used exclusively for gdump triggering. */
+static atomic_zu_t curpages;
+static atomic_zu_t highpages;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+ size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
+ bool *zero, bool *commit, bool growing_retained);
+static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+ extent_t *extent, bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent,
+ bool growing_retained);
+
+/******************************************************************************/
+
+ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link,
+ extent_esnead_comp)
+
+typedef enum {
+ lock_result_success,
+ lock_result_failure,
+ lock_result_no_extent
+} lock_result_t;
+
+static lock_result_t
+extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
+ extent_t **result) {
+ extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+ elm, true);
+
+ if (extent1 == NULL) {
+ return lock_result_no_extent;
+ }
+ /*
+ * It's possible that the extent changed out from under us, and with it
+ * the leaf->extent mapping. We have to recheck while holding the lock.
+ */
+ extent_lock(tsdn, extent1);
+ extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
+ &extents_rtree, elm, true);
+
+ if (extent1 == extent2) {
+ *result = extent1;
+ return lock_result_success;
+ } else {
+ extent_unlock(tsdn, extent1);
+ return lock_result_failure;
+ }
+}
+
+/*
+ * Returns a pool-locked extent_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+static extent_t *
+extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
+ extent_t *ret = NULL;
+ rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
+ rtree_ctx, (uintptr_t)addr, false, false);
+ if (elm == NULL) {
+ return NULL;
+ }
+ lock_result_t lock_result;
+ do {
+ lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret);
+ } while (lock_result == lock_result_failure);
+ return ret;
+}
+
+extent_t *
+extent_alloc(tsdn_t *tsdn, arena_t *arena) {
+ malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+ extent_t *extent = extent_avail_first(&arena->extent_avail);
+ if (extent == NULL) {
+ malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+ return base_alloc_extent(tsdn, arena->base);
+ }
+ extent_avail_remove(&arena->extent_avail, extent);
+ malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+ return extent;
+}
+
+void
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+ malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+ extent_avail_insert(&arena->extent_avail, extent);
+ malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+}
+
+extent_hooks_t *
+extent_hooks_get(arena_t *arena) {
+ return base_extent_hooks_get(arena->base);
+}
+
+extent_hooks_t *
+extent_hooks_set(tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
+ background_thread_info_t *info;
+ if (have_background_thread) {
+ info = arena_background_thread_info_get(arena);
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ }
+ extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
+ if (have_background_thread) {
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ }
+
+ return ret;
+}
+
+static void
+extent_hooks_assure_initialized(arena_t *arena,
+ extent_hooks_t **r_extent_hooks) {
+ if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
+ *r_extent_hooks = extent_hooks_get(arena);
+ }
+}
+
+#ifndef JEMALLOC_JET
+static
+#endif
+size_t
+extent_size_quantize_floor(size_t size) {
+ size_t ret;
+ pszind_t pind;
+
+ assert(size > 0);
+ assert((size & PAGE_MASK) == 0);
+
+ pind = sz_psz2ind(size - sz_large_pad + 1);
+ if (pind == 0) {
+ /*
+ * Avoid underflow. This short-circuit would also do the right
+ * thing for all sizes in the range for which there are
+ * PAGE-spaced size classes, but it's simplest to just handle
+ * the one case that would cause erroneous results.
+ */
+ return size;
+ }
+ ret = sz_pind2sz(pind - 1) + sz_large_pad;
+ assert(ret <= size);
+ return ret;
+}
+
+#ifndef JEMALLOC_JET
+static
+#endif
+size_t
+extent_size_quantize_ceil(size_t size) {
+ size_t ret;
+
+ assert(size > 0);
+ assert(size - sz_large_pad <= LARGE_MAXCLASS);
+ assert((size & PAGE_MASK) == 0);
+
+ ret = extent_size_quantize_floor(size);
+ if (ret < size) {
+ /*
+ * Skip a quantization that may have an adequately large extent,
+ * because under-sized extents may be mixed in. This only
+ * happens when an unusual size is requested, i.e. for aligned
+ * allocation, and is just one of several places where linear
+ * search would potentially find sufficiently aligned available
+ * memory somewhere lower.
+ */
+ ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
+ sz_large_pad;
+ }
+ return ret;
+}
+
+/* Generate pairing heap functions. */
+ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
+
+bool
+extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+ bool delay_coalesce) {
+ if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+ for (unsigned i = 0; i < NPSIZES+1; i++) {
+ extent_heap_new(&extents->heaps[i]);
+ }
+ bitmap_init(extents->bitmap, &extents_bitmap_info, true);
+ extent_list_init(&extents->lru);
+ atomic_store_zu(&extents->npages, 0, ATOMIC_RELAXED);
+ extents->state = state;
+ extents->delay_coalesce = delay_coalesce;
+ return false;
+}
+
+extent_state_t
+extents_state_get(const extents_t *extents) {
+ return extents->state;
+}
+
+size_t
+extents_npages_get(extents_t *extents) {
+ return atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+}
+
+static void
+extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+ malloc_mutex_assert_owner(tsdn, &extents->mtx);
+ assert(extent_state_get(extent) == extents->state);
+
+ size_t size = extent_size_get(extent);
+ size_t psz = extent_size_quantize_floor(size);
+ pszind_t pind = sz_psz2ind(psz);
+ if (extent_heap_empty(&extents->heaps[pind])) {
+ bitmap_unset(extents->bitmap, &extents_bitmap_info,
+ (size_t)pind);
+ }
+ extent_heap_insert(&extents->heaps[pind], extent);
+ extent_list_append(&extents->lru, extent);
+ size_t npages = size >> LG_PAGE;
+ /*
+ * All modifications to npages hold the mutex (as asserted above), so we
+ * don't need an atomic fetch-add; we can get by with a load followed by
+ * a store.
+ */
+ size_t cur_extents_npages =
+ atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+ atomic_store_zu(&extents->npages, cur_extents_npages + npages,
+ ATOMIC_RELAXED);
+}
+
+static void
+extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+ malloc_mutex_assert_owner(tsdn, &extents->mtx);
+ assert(extent_state_get(extent) == extents->state);
+
+ size_t size = extent_size_get(extent);
+ size_t psz = extent_size_quantize_floor(size);
+ pszind_t pind = sz_psz2ind(psz);
+ extent_heap_remove(&extents->heaps[pind], extent);
+ if (extent_heap_empty(&extents->heaps[pind])) {
+ bitmap_set(extents->bitmap, &extents_bitmap_info,
+ (size_t)pind);
+ }
+ extent_list_remove(&extents->lru, extent);
+ size_t npages = size >> LG_PAGE;
+ /*
+ * As in extents_insert_locked, we hold extents->mtx and so don't need
+ * atomic operations for updating extents->npages.
+ */
+ size_t cur_extents_npages =
+ atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+ assert(cur_extents_npages >= npages);
+ atomic_store_zu(&extents->npages,
+ cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+}
+
+/*
+ * Find an extent with size [min_size, max_size) to satisfy the alignment
+ * requirement. For each size, try only the first extent in the heap.
+ */
+static extent_t *
+extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
+ size_t alignment) {
+ pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
+ pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
+
+ for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
+ &extents_bitmap_info, (size_t)pind); i < pind_max; i =
+ (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+ (size_t)i+1)) {
+ assert(i < NPSIZES);
+ assert(!extent_heap_empty(&extents->heaps[i]));
+ extent_t *extent = extent_heap_first(&extents->heaps[i]);
+ uintptr_t base = (uintptr_t)extent_base_get(extent);
+ size_t candidate_size = extent_size_get(extent);
+ assert(candidate_size >= min_size);
+
+ uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+ PAGE_CEILING(alignment));
+ if (base > next_align || base + candidate_size <= next_align) {
+ /* Overflow or not crossing the next alignment. */
+ continue;
+ }
+
+ size_t leadsize = next_align - base;
+ if (candidate_size - leadsize >= min_size) {
+ return extent;
+ }
+ }
+
+ return NULL;
+}
+
+/* Do any-best-fit extent selection, i.e. select any extent that best fits. */
+static extent_t *
+extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ size_t size) {
+ pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+ pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+ (size_t)pind);
+ if (i < NPSIZES+1) {
+ /*
+ * In order to reduce fragmentation, avoid reusing and splitting
+ * large extents for much smaller sizes.
+ */
+ if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+ return NULL;
+ }
+ assert(!extent_heap_empty(&extents->heaps[i]));
+ extent_t *extent = extent_heap_first(&extents->heaps[i]);
+ assert(extent_size_get(extent) >= size);
+ return extent;
+ }
+
+ return NULL;
+}
+
+/*
+ * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
+ * large enough.
+ */
+static extent_t *
+extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ size_t size) {
+ extent_t *ret = NULL;
+
+ pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+ for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
+ &extents_bitmap_info, (size_t)pind); i < NPSIZES+1; i =
+ (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+ (size_t)i+1)) {
+ assert(!extent_heap_empty(&extents->heaps[i]));
+ extent_t *extent = extent_heap_first(&extents->heaps[i]);
+ assert(extent_size_get(extent) >= size);
+ if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+ ret = extent;
+ }
+ if (i == NPSIZES) {
+ break;
+ }
+ assert(i < NPSIZES);
+ }
+
+ return ret;
+}
+
+/*
+ * Do {best,first}-fit extent selection, where the selection policy choice is
+ * based on extents->delay_coalesce. Best-fit selection requires less
+ * searching, but its layout policy is less stable and may cause higher virtual
+ * memory fragmentation as a side effect.
+ */
+static extent_t *
+extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ size_t esize, size_t alignment) {
+ malloc_mutex_assert_owner(tsdn, &extents->mtx);
+
+ size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
+ /* Beware size_t wrap-around. */
+ if (max_size < esize) {
+ return NULL;
+ }
+
+ extent_t *extent = extents->delay_coalesce ?
+ extents_best_fit_locked(tsdn, arena, extents, max_size) :
+ extents_first_fit_locked(tsdn, arena, extents, max_size);
+
+ if (alignment > PAGE && extent == NULL) {
+ /*
+ * max_size guarantees the alignment requirement but is rather
+ * pessimistic. Next we try to satisfy the aligned allocation
+ * with sizes in [esize, max_size).
+ */
+ extent = extents_fit_alignment(extents, esize, max_size,
+ alignment);
+ }
+
+ return extent;
+}
+
+static bool
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+ extent_t *extent) {
+ extent_state_set(extent, extent_state_active);
+ bool coalesced;
+ extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
+ extents, extent, &coalesced, false);
+ extent_state_set(extent, extents_state_get(extents));
+
+ if (!coalesced) {
+ return true;
+ }
+ extents_insert_locked(tsdn, extents, extent);
+ return false;
+}
+
+extent_t *
+extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+ assert(size + pad != 0);
+ assert(alignment != 0);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents,
+ new_addr, size, pad, alignment, slab, szind, zero, commit, false);
+ assert(extent == NULL || extent_dumpable_get(extent));
+ return extent;
+}
+
+void
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, extent_t *extent) {
+ assert(extent_base_get(extent) != NULL);
+ assert(extent_size_get(extent) != 0);
+ assert(extent_dumpable_get(extent));
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_addr_set(extent, extent_base_get(extent));
+ extent_zeroed_set(extent, false);
+
+ extent_record(tsdn, arena, r_extent_hooks, extents, extent, false);
+}
+
+extent_t *
+extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, size_t npages_min) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ malloc_mutex_lock(tsdn, &extents->mtx);
+
+ /*
+ * Get the LRU coalesced extent, if any. If coalescing was delayed,
+ * the loop will iterate until the LRU extent is fully coalesced.
+ */
+ extent_t *extent;
+ while (true) {
+ /* Get the LRU extent, if any. */
+ extent = extent_list_first(&extents->lru);
+ if (extent == NULL) {
+ goto label_return;
+ }
+ /* Check the eviction limit. */
+ size_t extents_npages = atomic_load_zu(&extents->npages,
+ ATOMIC_RELAXED);
+ if (extents_npages <= npages_min) {
+ extent = NULL;
+ goto label_return;
+ }
+ extents_remove_locked(tsdn, extents, extent);
+ if (!extents->delay_coalesce) {
+ break;
+ }
+ /* Try to coalesce. */
+ if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
+ rtree_ctx, extents, extent)) {
+ break;
+ }
+ /*
+ * The LRU extent was just coalesced and the result placed in
+ * the LRU at its neighbor's position. Start over.
+ */
+ }
+
+ /*
+ * Either mark the extent active or deregister it to protect against
+ * concurrent operations.
+ */
+ switch (extents_state_get(extents)) {
+ case extent_state_active:
+ not_reached();
+ case extent_state_dirty:
+ case extent_state_muzzy:
+ extent_state_set(extent, extent_state_active);
+ break;
+ case extent_state_retained:
+ extent_deregister(tsdn, extent);
+ break;
+ default:
+ not_reached();
+ }
+
+label_return:
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+ return extent;
+}
+
+static void
+extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, extent_t *extent, bool growing_retained) {
+ /*
+ * Leak extent after making sure its pages have already been purged, so
+ * that this is only a virtual memory leak.
+ */
+ if (extents_state_get(extents) == extent_state_dirty) {
+ if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
+ extent, 0, extent_size_get(extent), growing_retained)) {
+ extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
+ extent, 0, extent_size_get(extent),
+ growing_retained);
+ }
+ }
+ extent_dalloc(tsdn, arena, extent);
+}
+
+void
+extents_prefork(tsdn_t *tsdn, extents_t *extents) {
+ malloc_mutex_prefork(tsdn, &extents->mtx);
+}
+
+void
+extents_postfork_parent(tsdn_t *tsdn, extents_t *extents) {
+ malloc_mutex_postfork_parent(tsdn, &extents->mtx);
+}
+
+void
+extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
+ malloc_mutex_postfork_child(tsdn, &extents->mtx);
+}
+
+static void
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ extent_t *extent) {
+ assert(extent_arena_get(extent) == arena);
+ assert(extent_state_get(extent) == extent_state_active);
+
+ extent_state_set(extent, extents_state_get(extents));
+ extents_insert_locked(tsdn, extents, extent);
+}
+
+static void
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ extent_t *extent) {
+ malloc_mutex_lock(tsdn, &extents->mtx);
+ extent_deactivate_locked(tsdn, arena, extents, extent);
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+}
+
+static void
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+ extent_t *extent) {
+ assert(extent_arena_get(extent) == arena);
+ assert(extent_state_get(extent) == extents_state_get(extents));
+
+ extents_remove_locked(tsdn, extents, extent);
+ extent_state_set(extent, extent_state_active);
+}
+
+static bool
+extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+ const extent_t *extent, bool dependent, bool init_missing,
+ rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+ *r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_base_get(extent), dependent, init_missing);
+ if (!dependent && *r_elm_a == NULL) {
+ return true;
+ }
+ assert(*r_elm_a != NULL);
+
+ *r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_last_get(extent), dependent, init_missing);
+ if (!dependent && *r_elm_b == NULL) {
+ return true;
+ }
+ assert(*r_elm_b != NULL);
+
+ return false;
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+ rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
+ rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
+ if (elm_b != NULL) {
+ rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+ slab);
+ }
+}
+
+static void
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+ szind_t szind) {
+ assert(extent_slab_get(extent));
+
+ /* Register interior. */
+ for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+ rtree_write(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+ LG_PAGE), extent, szind, true);
+ }
+}
+
+static void
+extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+ cassert(config_prof);
+ /* prof_gdump() requirement. */
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ if (opt_prof && extent_state_get(extent) == extent_state_active) {
+ size_t nadd = extent_size_get(extent) >> LG_PAGE;
+ size_t cur = atomic_fetch_add_zu(&curpages, nadd,
+ ATOMIC_RELAXED) + nadd;
+ size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
+ while (cur > high && !atomic_compare_exchange_weak_zu(
+ &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+ /*
+ * Don't refresh cur, because it may have decreased
+ * since this thread lost the highpages update race.
+ * Note that high is updated in case of CAS failure.
+ */
+ }
+ if (cur > high && prof_gdump_get_unlocked()) {
+ prof_gdump(tsdn);
+ }
+ }
+}
+
+static void
+extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
+ cassert(config_prof);
+
+ if (opt_prof && extent_state_get(extent) == extent_state_active) {
+ size_t nsub = extent_size_get(extent) >> LG_PAGE;
+ assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
+ atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
+ }
+}
+
+static bool
+extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ rtree_leaf_elm_t *elm_a, *elm_b;
+
+ /*
+ * We need to hold the lock to protect against a concurrent coalesce
+ * operation that sees us in a partial state.
+ */
+ extent_lock(tsdn, extent);
+
+ if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+ &elm_a, &elm_b)) {
+ return true;
+ }
+
+ szind_t szind = extent_szind_get_maybe_invalid(extent);
+ bool slab = extent_slab_get(extent);
+ extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+ if (slab) {
+ extent_interior_register(tsdn, rtree_ctx, extent, szind);
+ }
+
+ extent_unlock(tsdn, extent);
+
+ if (config_prof && gdump_add) {
+ extent_gdump_add(tsdn, extent);
+ }
+
+ return false;
+}
+
+static bool
+extent_register(tsdn_t *tsdn, extent_t *extent) {
+ return extent_register_impl(tsdn, extent, true);
+}
+
+static bool
+extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
+ return extent_register_impl(tsdn, extent, false);
+}
+
+static void
+extent_reregister(tsdn_t *tsdn, extent_t *extent) {
+ bool err = extent_register(tsdn, extent);
+ assert(!err);
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior. This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent. We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
+static void
+extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+ extent_t *extent) {
+ size_t i;
+
+ assert(extent_slab_get(extent));
+
+ for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+ rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+ LG_PAGE));
+ }
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
+static void
+extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ rtree_leaf_elm_t *elm_a, *elm_b;
+ extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+ &elm_a, &elm_b);
+
+ extent_lock(tsdn, extent);
+
+ extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, NSIZES, false);
+ if (extent_slab_get(extent)) {
+ extent_interior_deregister(tsdn, rtree_ctx, extent);
+ extent_slab_set(extent, false);
+ }
+
+ extent_unlock(tsdn, extent);
+
+ if (config_prof && gdump) {
+ extent_gdump_sub(tsdn, extent);
+ }
+}
+
+static void
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+ extent_deregister_impl(tsdn, extent, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
+ extent_deregister_impl(tsdn, extent, false);
+}
+
+/*
+ * Tries to find and remove an extent from extents that can be used for the
+ * given allocation request.
+ */
+static extent_t *
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+ void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+ bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+ assert(alignment > 0);
+ if (config_debug && new_addr != NULL) {
+ /*
+ * Non-NULL new_addr has two use cases:
+ *
+ * 1) Recycle a known-extant extent, e.g. during purging.
+ * 2) Perform in-place expanding reallocation.
+ *
+ * Regardless of use case, new_addr must either refer to a
+ * non-existing extent, or to the base of an extant extent,
+ * since only active slabs support interior lookups (which of
+ * course cannot be recycled).
+ */
+ assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+ assert(pad == 0);
+ assert(alignment <= PAGE);
+ }
+
+ size_t esize = size + pad;
+ malloc_mutex_lock(tsdn, &extents->mtx);
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+ extent_t *extent;
+ if (new_addr != NULL) {
+ extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr);
+ if (extent != NULL) {
+ /*
+ * We might null-out extent to report an error, but we
+ * still need to unlock the associated mutex after.
+ */
+ extent_t *unlock_extent = extent;
+ assert(extent_base_get(extent) == new_addr);
+ if (extent_arena_get(extent) != arena ||
+ extent_size_get(extent) < esize ||
+ extent_state_get(extent) !=
+ extents_state_get(extents)) {
+ extent = NULL;
+ }
+ extent_unlock(tsdn, unlock_extent);
+ }
+ } else {
+ extent = extents_fit_locked(tsdn, arena, extents, esize,
+ alignment);
+ }
+ if (extent == NULL) {
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+ return NULL;
+ }
+
+ extent_activate_locked(tsdn, arena, extents, extent);
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+
+ return extent;
+}
+
+/*
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving extent pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any extents_t; it's the caller's
+ * job to ensure that they can be reused.
+ */
+typedef enum {
+ /*
+ * Split successfully. lead, extent, and trail, are modified to extents
+ * describing the ranges before, in, and after the given allocation.
+ */
+ extent_split_interior_ok,
+ /*
+ * The extent can't satisfy the given allocation request. None of the
+ * input extent_t *s are touched.
+ */
+ extent_split_interior_cant_alloc,
+ /*
+ * In a potentially invalid state. Must leak (if *to_leak is non-NULL),
+ * and salvage what's still salvageable (if *to_salvage is non-NULL).
+ * None of lead, extent, or trail are valid.
+ */
+ extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx,
+ /* The result of splitting, in case of success. */
+ extent_t **extent, extent_t **lead, extent_t **trail,
+ /* The mess to clean up, in case of error. */
+ extent_t **to_leak, extent_t **to_salvage,
+ void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+ szind_t szind, bool growing_retained) {
+ size_t esize = size + pad;
+ size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
+ PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+ assert(new_addr == NULL || leadsize == 0);
+ if (extent_size_get(*extent) < leadsize + esize) {
+ return extent_split_interior_cant_alloc;
+ }
+ size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+
+ *lead = NULL;
+ *trail = NULL;
+ *to_leak = NULL;
+ *to_salvage = NULL;
+
+ /* Split the lead. */
+ if (leadsize != 0) {
+ *lead = *extent;
+ *extent = extent_split_impl(tsdn, arena, r_extent_hooks,
+ *lead, leadsize, NSIZES, false, esize + trailsize, szind,
+ slab, growing_retained);
+ if (*extent == NULL) {
+ *to_leak = *lead;
+ *lead = NULL;
+ return extent_split_interior_error;
+ }
+ }
+
+ /* Split the trail. */
+ if (trailsize != 0) {
+ *trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
+ esize, szind, slab, trailsize, NSIZES, false,
+ growing_retained);
+ if (*trail == NULL) {
+ *to_leak = *extent;
+ *to_salvage = *lead;
+ *lead = NULL;
+ *extent = NULL;
+ return extent_split_interior_error;
+ }
+ }
+
+ if (leadsize == 0 && trailsize == 0) {
+ /*
+ * Splitting causes szind to be set as a side effect, but no
+ * splitting occurred.
+ */
+ extent_szind_set(*extent, szind);
+ if (szind != NSIZES) {
+ rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_addr_get(*extent), szind, slab);
+ if (slab && extent_size_get(*extent) > PAGE) {
+ rtree_szind_slab_update(tsdn, &extents_rtree,
+ rtree_ctx,
+ (uintptr_t)extent_past_get(*extent) -
+ (uintptr_t)PAGE, szind, slab);
+ }
+ }
+ }
+
+ return extent_split_interior_ok;
+}
+
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough). If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into extents.
+ */
+static extent_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+ void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+ szind_t szind, extent_t *extent, bool growing_retained) {
+ extent_t *lead;
+ extent_t *trail;
+ extent_t *to_leak;
+ extent_t *to_salvage;
+
+ extent_split_interior_result_t result = extent_split_interior(
+ tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+ &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
+ growing_retained);
+
+ if (result == extent_split_interior_ok) {
+ if (lead != NULL) {
+ extent_deactivate(tsdn, arena, extents, lead);
+ }
+ if (trail != NULL) {
+ extent_deactivate(tsdn, arena, extents, trail);
+ }
+ return extent;
+ } else {
+ /*
+ * We should have picked an extent that was large enough to
+ * fulfill our allocation request.
+ */
+ assert(result == extent_split_interior_error);
+ if (to_salvage != NULL) {
+ extent_deregister(tsdn, to_salvage);
+ }
+ if (to_leak != NULL) {
+ void *leak = extent_base_get(to_leak);
+ extent_deregister_no_gdump_sub(tsdn, to_leak);
+ extents_leak(tsdn, arena, r_extent_hooks, extents,
+ to_leak, growing_retained);
+ assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
+ == NULL);
+ }
+ return NULL;
+ }
+ unreachable();
+}
+
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given extents_t.
+ */
+static extent_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+ bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+ assert(new_addr == NULL || !slab);
+ assert(pad == 0 || !slab);
+ assert(!*zero || !slab);
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-static inline int
-extent_szad_comp(extent_node_t *a, extent_node_t *b)
-{
- int ret;
- size_t a_size = a->size;
- size_t b_size = b->size;
+ extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
+ rtree_ctx, extents, new_addr, size, pad, alignment, slab,
+ growing_retained);
+ if (extent == NULL) {
+ return NULL;
+ }
- ret = (a_size > b_size) - (a_size < b_size);
- if (ret == 0) {
- uintptr_t a_addr = (uintptr_t)a->addr;
- uintptr_t b_addr = (uintptr_t)b->addr;
+ extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
+ extents, new_addr, size, pad, alignment, slab, szind, extent,
+ growing_retained);
+ if (extent == NULL) {
+ return NULL;
+ }
- ret = (a_addr > b_addr) - (a_addr < b_addr);
+ if (*commit && !extent_committed_get(extent)) {
+ if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
+ 0, extent_size_get(extent), growing_retained)) {
+ extent_record(tsdn, arena, r_extent_hooks, extents,
+ extent, growing_retained);
+ return NULL;
+ }
+ extent_zeroed_set(extent, true);
}
- return (ret);
+ if (extent_committed_get(extent)) {
+ *commit = true;
+ }
+ if (extent_zeroed_get(extent)) {
+ *zero = true;
+ }
+
+ if (pad != 0) {
+ extent_addr_randomize(tsdn, extent, alignment);
+ }
+ assert(extent_state_get(extent) == extent_state_active);
+ if (slab) {
+ extent_slab_set(extent, slab);
+ extent_interior_register(tsdn, rtree_ctx, extent, szind);
+ }
+
+ if (*zero) {
+ void *addr = extent_base_get(extent);
+ size_t size = extent_size_get(extent);
+ if (!extent_zeroed_get(extent)) {
+ if (pages_purge_forced(addr, size)) {
+ memset(addr, 0, size);
+ }
+ } else if (config_debug) {
+ size_t *p = (size_t *)(uintptr_t)addr;
+ for (size_t i = 0; i < size / sizeof(size_t); i++) {
+ assert(p[i] == 0);
+ }
+ }
+ }
+ return extent;
}
-/* Generate red-black tree functions. */
-rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
- extent_szad_comp)
+/*
+ * If the caller specifies (!*zero), it is still possible to receive zeroed
+ * memory, in which case *zero is toggled to true. arena_extent_alloc() takes
+ * advantage of this to avoid demanding zeroed extents, but taking advantage of
+ * them if they are returned.
+ */
+static void *
+extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
+ void *ret;
+
+ assert(size != 0);
+ assert(alignment != 0);
-static inline int
-extent_ad_comp(extent_node_t *a, extent_node_t *b)
-{
- uintptr_t a_addr = (uintptr_t)a->addr;
- uintptr_t b_addr = (uintptr_t)b->addr;
+ /* "primary" dss. */
+ if (have_dss && dss_prec == dss_prec_primary && (ret =
+ extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+ commit)) != NULL) {
+ return ret;
+ }
+ /* mmap. */
+ if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
+ != NULL) {
+ return ret;
+ }
+ /* "secondary" dss. */
+ if (have_dss && dss_prec == dss_prec_secondary && (ret =
+ extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+ commit)) != NULL) {
+ return ret;
+ }
+
+ /* All strategies for allocation failed. */
+ return NULL;
+}
+
+static void *
+extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool *commit) {
+ void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+ commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
+ ATOMIC_RELAXED));
+ if (have_madvise_huge && ret) {
+ pages_set_thp_state(ret, size);
+ }
+ return ret;
+}
+
+static void *
+extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+ tsdn_t *tsdn;
+ arena_t *arena;
+
+ tsdn = tsdn_fetch();
+ arena = arena_get(tsdn, arena_ind, false);
+ /*
+ * The arena we're allocating on behalf of must have been initialized
+ * already.
+ */
+ assert(arena != NULL);
+
+ return extent_alloc_default_impl(tsdn, arena, new_addr, size,
+ alignment, zero, commit);
+}
+
+static void
+extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ if (arena == arena_get(tsd_tsdn(tsd), 0, false)) {
+ /*
+ * The only legitimate case of customized extent hooks for a0 is
+ * hooks with no allocation activities. One such example is to
+ * place metadata on pre-allocated resources such as huge pages.
+ * In that case, rely on reentrancy_level checks to catch
+ * infinite recursions.
+ */
+ pre_reentrancy(tsd, NULL);
+ } else {
+ pre_reentrancy(tsd, arena);
+ }
+}
- return ((a_addr > b_addr) - (a_addr < b_addr));
+static void
+extent_hook_post_reentrancy(tsdn_t *tsdn) {
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ post_reentrancy(tsd);
}
-/* Generate red-black tree functions. */
-rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
- extent_ad_comp)
+/*
+ * If virtual memory is retained, create increasingly larger extents from which
+ * to split requested extents in order to limit the total number of disjoint
+ * virtual memory ranges retained by each arena.
+ */
+static extent_t *
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, size_t size, size_t pad, size_t alignment,
+ bool slab, szind_t szind, bool *zero, bool *commit) {
+ malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
+ assert(pad == 0 || !slab);
+ assert(!*zero || !slab);
+
+ size_t esize = size + pad;
+ size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+ /* Beware size_t wrap-around. */
+ if (alloc_size_min < esize) {
+ goto label_err;
+ }
+ /*
+ * Find the next extent size in the series that would be large enough to
+ * satisfy this request.
+ */
+ pszind_t egn_skip = 0;
+ size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+ while (alloc_size < alloc_size_min) {
+ egn_skip++;
+ if (arena->extent_grow_next + egn_skip == NPSIZES) {
+ /* Outside legal range. */
+ goto label_err;
+ }
+ assert(arena->extent_grow_next + egn_skip < NPSIZES);
+ alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+ }
+
+ extent_t *extent = extent_alloc(tsdn, arena);
+ if (extent == NULL) {
+ goto label_err;
+ }
+ bool zeroed = false;
+ bool committed = false;
+
+ void *ptr;
+ if (*r_extent_hooks == &extent_hooks_default) {
+ ptr = extent_alloc_default_impl(tsdn, arena, NULL,
+ alloc_size, PAGE, &zeroed, &committed);
+ } else {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
+ alloc_size, PAGE, &zeroed, &committed,
+ arena_ind_get(arena));
+ extent_hook_post_reentrancy(tsdn);
+ }
+
+ extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
+ arena_extent_sn_next(arena), extent_state_active, zeroed,
+ committed, true);
+ if (ptr == NULL) {
+ extent_dalloc(tsdn, arena, extent);
+ goto label_err;
+ }
+
+ if (extent_register_no_gdump_add(tsdn, extent)) {
+ extents_leak(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, extent, true);
+ goto label_err;
+ }
+
+ if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
+ *zero = true;
+ }
+ if (extent_committed_get(extent)) {
+ *commit = true;
+ }
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ extent_t *lead;
+ extent_t *trail;
+ extent_t *to_leak;
+ extent_t *to_salvage;
+ extent_split_interior_result_t result = extent_split_interior(
+ tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+ &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
+ true);
+
+ if (result == extent_split_interior_ok) {
+ if (lead != NULL) {
+ extent_record(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, lead, true);
+ }
+ if (trail != NULL) {
+ extent_record(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, trail, true);
+ }
+ } else {
+ /*
+ * We should have allocated a sufficiently large extent; the
+ * cant_alloc case should not occur.
+ */
+ assert(result == extent_split_interior_error);
+ if (to_salvage != NULL) {
+ if (config_prof) {
+ extent_gdump_add(tsdn, to_salvage);
+ }
+ extent_record(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, to_salvage, true);
+ }
+ if (to_leak != NULL) {
+ extent_deregister_no_gdump_sub(tsdn, to_leak);
+ extents_leak(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, to_leak, true);
+ }
+ goto label_err;
+ }
+
+ if (*commit && !extent_committed_get(extent)) {
+ if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
+ extent_size_get(extent), true)) {
+ extent_record(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, extent, true);
+ goto label_err;
+ }
+ extent_zeroed_set(extent, true);
+ }
+
+ /*
+ * Increment extent_grow_next if doing so wouldn't exceed the allowed
+ * range.
+ */
+ if (arena->extent_grow_next + egn_skip + 1 <=
+ arena->retain_grow_limit) {
+ arena->extent_grow_next += egn_skip + 1;
+ } else {
+ arena->extent_grow_next = arena->retain_grow_limit;
+ }
+ /* All opportunities for failure are past. */
+ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+
+ if (config_prof) {
+ /* Adjust gdump stats now that extent is final size. */
+ extent_gdump_add(tsdn, extent);
+ }
+ if (pad != 0) {
+ extent_addr_randomize(tsdn, extent, alignment);
+ }
+ if (slab) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+ &rtree_ctx_fallback);
+
+ extent_slab_set(extent, true);
+ extent_interior_register(tsdn, rtree_ctx, extent, szind);
+ }
+ if (*zero && !extent_zeroed_get(extent)) {
+ void *addr = extent_base_get(extent);
+ size_t size = extent_size_get(extent);
+ if (pages_purge_forced(addr, size)) {
+ memset(addr, 0, size);
+ }
+ }
+
+ return extent;
+label_err:
+ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+ return NULL;
+}
+
+static extent_t *
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+ assert(size != 0);
+ assert(alignment != 0);
+
+ malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+
+ extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, new_addr, size, pad, alignment, slab,
+ szind, zero, commit, true);
+ if (extent != NULL) {
+ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+ if (config_prof) {
+ extent_gdump_add(tsdn, extent);
+ }
+ } else if (opt_retain && new_addr == NULL) {
+ extent = extent_grow_retained(tsdn, arena, r_extent_hooks, size,
+ pad, alignment, slab, szind, zero, commit);
+ /* extent_grow_retained() always releases extent_grow_mtx. */
+ } else {
+ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+ }
+ malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
+
+ return extent;
+}
+
+static extent_t *
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+ size_t esize = size + pad;
+ extent_t *extent = extent_alloc(tsdn, arena);
+ if (extent == NULL) {
+ return NULL;
+ }
+ void *addr;
+ if (*r_extent_hooks == &extent_hooks_default) {
+ /* Call directly to propagate tsdn. */
+ addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
+ alignment, zero, commit);
+ } else {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
+ esize, alignment, zero, commit, arena_ind_get(arena));
+ extent_hook_post_reentrancy(tsdn);
+ }
+ if (addr == NULL) {
+ extent_dalloc(tsdn, arena, extent);
+ return NULL;
+ }
+ extent_init(extent, arena, addr, esize, slab, szind,
+ arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
+ true);
+ if (pad != 0) {
+ extent_addr_randomize(tsdn, extent, alignment);
+ }
+ if (extent_register(tsdn, extent)) {
+ extents_leak(tsdn, arena, r_extent_hooks,
+ &arena->extents_retained, extent, false);
+ return NULL;
+ }
+
+ return extent;
+}
+
+extent_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+ size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
+ new_addr, size, pad, alignment, slab, szind, zero, commit);
+ if (extent == NULL) {
+ if (opt_retain && new_addr != NULL) {
+ /*
+ * When retain is enabled and new_addr is set, we do not
+ * attempt extent_alloc_wrapper_hard which does mmap
+ * that is very unlikely to succeed (unless it happens
+ * to be at the end).
+ */
+ return NULL;
+ }
+ extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
+ new_addr, size, pad, alignment, slab, szind, zero, commit);
+ }
+
+ assert(extent == NULL || extent_dumpable_get(extent));
+ return extent;
+}
+
+static bool
+extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
+ const extent_t *outer) {
+ assert(extent_arena_get(inner) == arena);
+ if (extent_arena_get(outer) != arena) {
+ return false;
+ }
+
+ assert(extent_state_get(inner) == extent_state_active);
+ if (extent_state_get(outer) != extents->state) {
+ return false;
+ }
+
+ if (extent_committed_get(inner) != extent_committed_get(outer)) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, extent_t *inner, extent_t *outer, bool forward,
+ bool growing_retained) {
+ assert(extent_can_coalesce(arena, extents, inner, outer));
+
+ extent_activate_locked(tsdn, arena, extents, outer);
+
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+ bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
+ forward ? inner : outer, forward ? outer : inner, growing_retained);
+ malloc_mutex_lock(tsdn, &extents->mtx);
+
+ if (err) {
+ extent_deactivate_locked(tsdn, arena, extents, outer);
+ }
+
+ return err;
+}
+
+static extent_t *
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+ extent_t *extent, bool *coalesced, bool growing_retained) {
+ /*
+ * Continue attempting to coalesce until failure, to protect against
+ * races with other threads that are thwarted by this one.
+ */
+ bool again;
+ do {
+ again = false;
+
+ /* Try to coalesce forward. */
+ extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
+ extent_past_get(extent));
+ if (next != NULL) {
+ /*
+ * extents->mtx only protects against races for
+ * like-state extents, so call extent_can_coalesce()
+ * before releasing next's pool lock.
+ */
+ bool can_coalesce = extent_can_coalesce(arena, extents,
+ extent, next);
+
+ extent_unlock(tsdn, next);
+
+ if (can_coalesce && !extent_coalesce(tsdn, arena,
+ r_extent_hooks, extents, extent, next, true,
+ growing_retained)) {
+ if (extents->delay_coalesce) {
+ /* Do minimal coalescing. */
+ *coalesced = true;
+ return extent;
+ }
+ again = true;
+ }
+ }
+
+ /* Try to coalesce backward. */
+ extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
+ extent_before_get(extent));
+ if (prev != NULL) {
+ bool can_coalesce = extent_can_coalesce(arena, extents,
+ extent, prev);
+ extent_unlock(tsdn, prev);
+
+ if (can_coalesce && !extent_coalesce(tsdn, arena,
+ r_extent_hooks, extents, extent, prev, false,
+ growing_retained)) {
+ extent = prev;
+ if (extents->delay_coalesce) {
+ /* Do minimal coalescing. */
+ *coalesced = true;
+ return extent;
+ }
+ again = true;
+ }
+ }
+ } while (again);
+
+ if (extents->delay_coalesce) {
+ *coalesced = false;
+ }
+ return extent;
+}
+
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given extents_t (coalesces, deregisters slab interiors, the heap operations).
+ */
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+ extents_t *extents, extent_t *extent, bool growing_retained) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ assert((extents_state_get(extents) != extent_state_dirty &&
+ extents_state_get(extents) != extent_state_muzzy) ||
+ !extent_zeroed_get(extent));
+
+ malloc_mutex_lock(tsdn, &extents->mtx);
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ extent_szind_set(extent, NSIZES);
+ if (extent_slab_get(extent)) {
+ extent_interior_deregister(tsdn, rtree_ctx, extent);
+ extent_slab_set(extent, false);
+ }
+
+ assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_base_get(extent), true) == extent);
+
+ if (!extents->delay_coalesce) {
+ extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
+ rtree_ctx, extents, extent, NULL, growing_retained);
+ } else if (extent_size_get(extent) >= LARGE_MINCLASS) {
+ /* Always coalesce large extents eagerly. */
+ bool coalesced;
+ size_t prev_size;
+ do {
+ prev_size = extent_size_get(extent);
+ assert(extent_state_get(extent) == extent_state_active);
+ extent = extent_try_coalesce(tsdn, arena,
+ r_extent_hooks, rtree_ctx, extents, extent,
+ &coalesced, growing_retained);
+ } while (coalesced &&
+ extent_size_get(extent) >= prev_size + LARGE_MINCLASS);
+ }
+ extent_deactivate_locked(tsdn, arena, extents, extent);
+
+ malloc_mutex_unlock(tsdn, &extents->mtx);
+}
+
+void
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+ extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ if (extent_register(tsdn, extent)) {
+ extents_leak(tsdn, arena, &extent_hooks,
+ &arena->extents_retained, extent, false);
+ return;
+ }
+ extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
+}
+
+static bool
+extent_dalloc_default_impl(void *addr, size_t size) {
+ if (!have_dss || !extent_in_dss(addr)) {
+ return extent_dalloc_mmap(addr, size);
+ }
+ return true;
+}
+
+static bool
+extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ bool committed, unsigned arena_ind) {
+ return extent_dalloc_default_impl(addr, size);
+}
+
+static bool
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent) {
+ bool err;
+
+ assert(extent_base_get(extent) != NULL);
+ assert(extent_size_get(extent) != 0);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_addr_set(extent, extent_base_get(extent));
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+ /* Try to deallocate. */
+ if (*r_extent_hooks == &extent_hooks_default) {
+ /* Call directly to propagate tsdn. */
+ err = extent_dalloc_default_impl(extent_base_get(extent),
+ extent_size_get(extent));
+ } else {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ err = ((*r_extent_hooks)->dalloc == NULL ||
+ (*r_extent_hooks)->dalloc(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent),
+ extent_committed_get(extent), arena_ind_get(arena)));
+ extent_hook_post_reentrancy(tsdn);
+ }
+
+ if (!err) {
+ extent_dalloc(tsdn, arena, extent);
+ }
+
+ return err;
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent) {
+ assert(extent_dumpable_get(extent));
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ /*
+ * Deregister first to avoid a race with other allocating threads, and
+ * reregister if deallocation fails.
+ */
+ extent_deregister(tsdn, extent);
+ if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent)) {
+ return;
+ }
+
+ extent_reregister(tsdn, extent);
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ /* Try to decommit; purge if that fails. */
+ bool zeroed;
+ if (!extent_committed_get(extent)) {
+ zeroed = true;
+ } else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+ 0, extent_size_get(extent))) {
+ zeroed = true;
+ } else if ((*r_extent_hooks)->purge_forced != NULL &&
+ !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent), 0,
+ extent_size_get(extent), arena_ind_get(arena))) {
+ zeroed = true;
+ } else if (extent_state_get(extent) == extent_state_muzzy ||
+ ((*r_extent_hooks)->purge_lazy != NULL &&
+ !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent), 0,
+ extent_size_get(extent), arena_ind_get(arena)))) {
+ zeroed = false;
+ } else {
+ zeroed = false;
+ }
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+ extent_zeroed_set(extent, zeroed);
+
+ if (config_prof) {
+ extent_gdump_sub(tsdn, extent);
+ }
+
+ extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained,
+ extent, false);
+}
+
+static void
+extent_destroy_default_impl(void *addr, size_t size) {
+ if (!have_dss || !extent_in_dss(addr)) {
+ pages_unmap(addr, size);
+ }
+}
+
+static void
+extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ bool committed, unsigned arena_ind) {
+ extent_destroy_default_impl(addr, size);
+}
+
+void
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent) {
+ assert(extent_base_get(extent) != NULL);
+ assert(extent_size_get(extent) != 0);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ /* Deregister first to avoid a race with other allocating threads. */
+ extent_deregister(tsdn, extent);
+
+ extent_addr_set(extent, extent_base_get(extent));
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+ /* Try to destroy; silently fail otherwise. */
+ if (*r_extent_hooks == &extent_hooks_default) {
+ /* Call directly to propagate tsdn. */
+ extent_destroy_default_impl(extent_base_get(extent),
+ extent_size_get(extent));
+ } else if ((*r_extent_hooks)->destroy != NULL) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ (*r_extent_hooks)->destroy(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent),
+ extent_committed_get(extent), arena_ind_get(arena));
+ extent_hook_post_reentrancy(tsdn);
+ }
+
+ extent_dalloc(tsdn, arena, extent);
+}
+
+static bool
+extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+ length);
+}
+
+static bool
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ bool err = ((*r_extent_hooks)->commit == NULL ||
+ (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
+ extent_size_get(extent), offset, length, arena_ind_get(arena)));
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+ extent_committed_set(extent, extent_committed_get(extent) || !err);
+ return err;
+}
+
+bool
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length) {
+ return extent_commit_impl(tsdn, arena, r_extent_hooks, extent, offset,
+ length, false);
+}
+
+static bool
+extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+ length);
+}
+
+bool
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ bool err = ((*r_extent_hooks)->decommit == NULL ||
+ (*r_extent_hooks)->decommit(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent), offset, length,
+ arena_ind_get(arena)));
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+ extent_committed_set(extent, extent_committed_get(extent) && err);
+ return err;
+}
+
+#ifdef PAGES_CAN_PURGE_LAZY
+static bool
+extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ assert(addr != NULL);
+ assert((offset & PAGE_MASK) == 0);
+ assert(length != 0);
+ assert((length & PAGE_MASK) == 0);
+
+ return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+ length);
+}
+#endif
+
+static bool
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ if ((*r_extent_hooks)->purge_lazy == NULL) {
+ return true;
+ }
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent), offset, length,
+ arena_ind_get(arena));
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+
+ return err;
+}
+
+bool
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length) {
+ return extent_purge_lazy_impl(tsdn, arena, r_extent_hooks, extent,
+ offset, length, false);
+}
+
+#ifdef PAGES_CAN_PURGE_FORCED
+static bool
+extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind) {
+ assert(addr != NULL);
+ assert((offset & PAGE_MASK) == 0);
+ assert(length != 0);
+ assert((length & PAGE_MASK) == 0);
+
+ return pages_purge_forced((void *)((uintptr_t)addr +
+ (uintptr_t)offset), length);
+}
+#endif
+
+static bool
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length, bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ if ((*r_extent_hooks)->purge_forced == NULL) {
+ return true;
+ }
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+ extent_base_get(extent), extent_size_get(extent), offset, length,
+ arena_ind_get(arena));
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+ return err;
+}
+
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+ size_t length) {
+ return extent_purge_forced_impl(tsdn, arena, r_extent_hooks, extent,
+ offset, length, false);
+}
+
+#ifdef JEMALLOC_MAPS_COALESCE
+static bool
+extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+ return !maps_coalesce;
+}
+#endif
+
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split. The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion). This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
+static extent_t *
+extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+ szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+ bool growing_retained) {
+ assert(extent_size_get(extent) == size_a + size_b);
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ if ((*r_extent_hooks)->split == NULL) {
+ return NULL;
+ }
+
+ extent_t *trail = extent_alloc(tsdn, arena);
+ if (trail == NULL) {
+ goto label_error_a;
+ }
+
+ extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
+ size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
+ extent_state_get(extent), extent_zeroed_get(extent),
+ extent_committed_get(extent), extent_dumpable_get(extent));
+
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
+ {
+ extent_t lead;
+
+ extent_init(&lead, arena, extent_addr_get(extent), size_a,
+ slab_a, szind_a, extent_sn_get(extent),
+ extent_state_get(extent), extent_zeroed_get(extent),
+ extent_committed_get(extent), extent_dumpable_get(extent));
+
+ extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
+ true, &lead_elm_a, &lead_elm_b);
+ }
+ rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
+ extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
+ &trail_elm_a, &trail_elm_b);
+
+ if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
+ || trail_elm_b == NULL) {
+ goto label_error_b;
+ }
+
+ extent_lock2(tsdn, extent, trail);
+
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ }
+ bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+ size_a + size_b, size_a, size_b, extent_committed_get(extent),
+ arena_ind_get(arena));
+ if (*r_extent_hooks != &extent_hooks_default) {
+ extent_hook_post_reentrancy(tsdn);
+ }
+ if (err) {
+ goto label_error_c;
+ }
+
+ extent_size_set(extent, size_a);
+ extent_szind_set(extent, szind_a);
+
+ extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+ szind_a, slab_a);
+ extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
+ szind_b, slab_b);
+
+ extent_unlock2(tsdn, extent, trail);
+
+ return trail;
+label_error_c:
+ extent_unlock2(tsdn, extent, trail);
+label_error_b:
+ extent_dalloc(tsdn, arena, trail);
+label_error_a:
+ return NULL;
+}
+
+extent_t *
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+ szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+ return extent_split_impl(tsdn, arena, r_extent_hooks, extent, size_a,
+ szind_a, slab_a, size_b, szind_b, slab_b, false);
+}
+
+static bool
+extent_merge_default_impl(void *addr_a, void *addr_b) {
+ if (!maps_coalesce) {
+ return true;
+ }
+ if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
+ return true;
+ }
+
+ return false;
+}
+
+#ifdef JEMALLOC_MAPS_COALESCE
+static bool
+extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+ void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+ return extent_merge_default_impl(addr_a, addr_b);
+}
+#endif
+
+static bool
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+ bool growing_retained) {
+ witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+ WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+ extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+ if ((*r_extent_hooks)->merge == NULL) {
+ return true;
+ }
+
+ bool err;
+ if (*r_extent_hooks == &extent_hooks_default) {
+ /* Call directly to propagate tsdn. */
+ err = extent_merge_default_impl(extent_base_get(a),
+ extent_base_get(b));
+ } else {
+ extent_hook_pre_reentrancy(tsdn, arena);
+ err = (*r_extent_hooks)->merge(*r_extent_hooks,
+ extent_base_get(a), extent_size_get(a), extent_base_get(b),
+ extent_size_get(b), extent_committed_get(a),
+ arena_ind_get(arena));
+ extent_hook_post_reentrancy(tsdn);
+ }
+
+ if (err) {
+ return true;
+ }
+
+ /*
+ * The rtree writes must happen while all the relevant elements are
+ * owned, so the following code uses decomposed helper functions rather
+ * than extent_{,de}register() to do things in the right order.
+ */
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+ extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+ &a_elm_b);
+ extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+ &b_elm_b);
+
+ extent_lock2(tsdn, a, b);
+
+ if (a_elm_b != NULL) {
+ rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+ NSIZES, false);
+ }
+ if (b_elm_b != NULL) {
+ rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+ NSIZES, false);
+ } else {
+ b_elm_b = b_elm_a;
+ }
+
+ extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+ extent_szind_set(a, NSIZES);
+ extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
+ extent_sn_get(a) : extent_sn_get(b));
+ extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+ extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, NSIZES, false);
+
+ extent_unlock2(tsdn, a, b);
+
+ extent_dalloc(tsdn, extent_arena_get(b), b);
+
+ return false;
+}
+
+bool
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+ extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
+ return extent_merge_impl(tsdn, arena, r_extent_hooks, a, b, false);
+}
+
+bool
+extent_boot(void) {
+ if (rtree_new(&extents_rtree, true)) {
+ return true;
+ }
+
+ if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
+ WITNESS_RANK_EXTENT_POOL)) {
+ return true;
+ }
+
+ if (have_dss) {
+ extent_dss_boot();
+ }
+
+ return false;
+}
diff --git a/deps/jemalloc/src/extent_dss.c b/deps/jemalloc/src/extent_dss.c
new file mode 100644
index 000000000..2b1ea9caf
--- /dev/null
+++ b/deps/jemalloc/src/extent_dss.c
@@ -0,0 +1,270 @@
+#define JEMALLOC_EXTENT_DSS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/spin.h"
+
+/******************************************************************************/
+/* Data. */
+
+const char *opt_dss = DSS_DEFAULT;
+
+const char *dss_prec_names[] = {
+ "disabled",
+ "primary",
+ "secondary",
+ "N/A"
+};
+
+/*
+ * Current dss precedence default, used when creating new arenas. NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
+ */
+static atomic_u_t dss_prec_default = ATOMIC_INIT(
+ (unsigned)DSS_PREC_DEFAULT);
+
+/* Base address of the DSS. */
+static void *dss_base;
+/* Atomic boolean indicating whether a thread is currently extending DSS. */
+static atomic_b_t dss_extending;
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static atomic_b_t dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
+static atomic_p_t dss_max;
+
+/******************************************************************************/
+
+static void *
+extent_dss_sbrk(intptr_t increment) {
+#ifdef JEMALLOC_DSS
+ return sbrk(increment);
+#else
+ not_implemented();
+ return NULL;
+#endif
+}
+
+dss_prec_t
+extent_dss_prec_get(void) {
+ dss_prec_t ret;
+
+ if (!have_dss) {
+ return dss_prec_disabled;
+ }
+ ret = (dss_prec_t)atomic_load_u(&dss_prec_default, ATOMIC_ACQUIRE);
+ return ret;
+}
+
+bool
+extent_dss_prec_set(dss_prec_t dss_prec) {
+ if (!have_dss) {
+ return (dss_prec != dss_prec_disabled);
+ }
+ atomic_store_u(&dss_prec_default, (unsigned)dss_prec, ATOMIC_RELEASE);
+ return false;
+}
+
+static void
+extent_dss_extending_start(void) {
+ spin_t spinner = SPIN_INITIALIZER;
+ while (true) {
+ bool expected = false;
+ if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
+ true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+ break;
+ }
+ spin_adaptive(&spinner);
+ }
+}
+
+static void
+extent_dss_extending_finish(void) {
+ assert(atomic_load_b(&dss_extending, ATOMIC_RELAXED));
+
+ atomic_store_b(&dss_extending, false, ATOMIC_RELEASE);
+}
+
+static void *
+extent_dss_max_update(void *new_addr) {
+ /*
+ * Get the current end of the DSS as max_cur and assure that dss_max is
+ * up to date.
+ */
+ void *max_cur = extent_dss_sbrk(0);
+ if (max_cur == (void *)-1) {
+ return NULL;
+ }
+ atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
+ /* Fixed new_addr can only be supported if it is at the edge of DSS. */
+ if (new_addr != NULL && max_cur != new_addr) {
+ return NULL;
+ }
+ return max_cur;
+}
+
+void *
+extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit) {
+ extent_t *gap;
+
+ cassert(have_dss);
+ assert(size > 0);
+ assert(alignment > 0);
+
+ /*
+ * sbrk() uses a signed increment argument, so take care not to
+ * interpret a large allocation request as a negative increment.
+ */
+ if ((intptr_t)size < 0) {
+ return NULL;
+ }
+
+ gap = extent_alloc(tsdn, arena);
+ if (gap == NULL) {
+ return NULL;
+ }
+
+ extent_dss_extending_start();
+ if (!atomic_load_b(&dss_exhausted, ATOMIC_ACQUIRE)) {
+ /*
+ * The loop is necessary to recover from races with other
+ * threads that are using the DSS for something other than
+ * malloc.
+ */
+ while (true) {
+ void *max_cur = extent_dss_max_update(new_addr);
+ if (max_cur == NULL) {
+ goto label_oom;
+ }
+
+ /*
+ * Compute how much page-aligned gap space (if any) is
+ * necessary to satisfy alignment. This space can be
+ * recycled for later use.
+ */
+ void *gap_addr_page = (void *)(PAGE_CEILING(
+ (uintptr_t)max_cur));
+ void *ret = (void *)ALIGNMENT_CEILING(
+ (uintptr_t)gap_addr_page, alignment);
+ size_t gap_size_page = (uintptr_t)ret -
+ (uintptr_t)gap_addr_page;
+ if (gap_size_page != 0) {
+ extent_init(gap, arena, gap_addr_page,
+ gap_size_page, false, NSIZES,
+ arena_extent_sn_next(arena),
+ extent_state_active, false, true, true);
+ }
+ /*
+ * Compute the address just past the end of the desired
+ * allocation space.
+ */
+ void *dss_next = (void *)((uintptr_t)ret + size);
+ if ((uintptr_t)ret < (uintptr_t)max_cur ||
+ (uintptr_t)dss_next < (uintptr_t)max_cur) {
+ goto label_oom; /* Wrap-around. */
+ }
+ /* Compute the increment, including subpage bytes. */
+ void *gap_addr_subpage = max_cur;
+ size_t gap_size_subpage = (uintptr_t)ret -
+ (uintptr_t)gap_addr_subpage;
+ intptr_t incr = gap_size_subpage + size;
+
+ assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
+ size);
+
+ /* Try to allocate. */
+ void *dss_prev = extent_dss_sbrk(incr);
+ if (dss_prev == max_cur) {
+ /* Success. */
+ atomic_store_p(&dss_max, dss_next,
+ ATOMIC_RELEASE);
+ extent_dss_extending_finish();
+
+ if (gap_size_page != 0) {
+ extent_dalloc_gap(tsdn, arena, gap);
+ } else {
+ extent_dalloc(tsdn, arena, gap);
+ }
+ if (!*commit) {
+ *commit = pages_decommit(ret, size);
+ }
+ if (*zero && *commit) {
+ extent_hooks_t *extent_hooks =
+ EXTENT_HOOKS_INITIALIZER;
+ extent_t extent;
+
+ extent_init(&extent, arena, ret, size,
+ size, false, NSIZES,
+ extent_state_active, false, true,
+ true);
+ if (extent_purge_forced_wrapper(tsdn,
+ arena, &extent_hooks, &extent, 0,
+ size)) {
+ memset(ret, 0, size);
+ }
+ }
+ return ret;
+ }
+ /*
+ * Failure, whether due to OOM or a race with a raw
+ * sbrk() call from outside the allocator.
+ */
+ if (dss_prev == (void *)-1) {
+ /* OOM. */
+ atomic_store_b(&dss_exhausted, true,
+ ATOMIC_RELEASE);
+ goto label_oom;
+ }
+ }
+ }
+label_oom:
+ extent_dss_extending_finish();
+ extent_dalloc(tsdn, arena, gap);
+ return NULL;
+}
+
+static bool
+extent_in_dss_helper(void *addr, void *max) {
+ return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
+ (uintptr_t)max);
+}
+
+bool
+extent_in_dss(void *addr) {
+ cassert(have_dss);
+
+ return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
+ ATOMIC_ACQUIRE));
+}
+
+bool
+extent_dss_mergeable(void *addr_a, void *addr_b) {
+ void *max;
+
+ cassert(have_dss);
+
+ if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
+ (uintptr_t)dss_base) {
+ return true;
+ }
+
+ max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
+ return (extent_in_dss_helper(addr_a, max) ==
+ extent_in_dss_helper(addr_b, max));
+}
+
+void
+extent_dss_boot(void) {
+ cassert(have_dss);
+
+ dss_base = extent_dss_sbrk(0);
+ atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
+ atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+ atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
+}
+
+/******************************************************************************/
diff --git a/deps/jemalloc/src/extent_mmap.c b/deps/jemalloc/src/extent_mmap.c
new file mode 100644
index 000000000..8d607dc80
--- /dev/null
+++ b/deps/jemalloc/src/extent_mmap.c
@@ -0,0 +1,42 @@
+#define JEMALLOC_EXTENT_MMAP_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+
+/******************************************************************************/
+/* Data. */
+
+bool opt_retain =
+#ifdef JEMALLOC_RETAIN
+ true
+#else
+ false
+#endif
+ ;
+
+/******************************************************************************/
+
+void *
+extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
+ bool *commit) {
+ void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
+ PAGE), commit);
+ if (ret == NULL) {
+ return NULL;
+ }
+ assert(ret != NULL);
+ if (*commit) {
+ *zero = true;
+ }
+ return ret;
+}
+
+bool
+extent_dalloc_mmap(void *addr, size_t size) {
+ if (!opt_retain) {
+ pages_unmap(addr, size);
+ }
+ return opt_retain;
+}
diff --git a/deps/jemalloc/src/hash.c b/deps/jemalloc/src/hash.c
index cfa4da027..7b2bdc2bd 100644
--- a/deps/jemalloc/src/hash.c
+++ b/deps/jemalloc/src/hash.c
@@ -1,2 +1,3 @@
-#define JEMALLOC_HASH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_HASH_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/deps/jemalloc/src/hooks.c b/deps/jemalloc/src/hooks.c
new file mode 100644
index 000000000..6266ecd47
--- /dev/null
+++ b/deps/jemalloc/src/hooks.c
@@ -0,0 +1,12 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+/*
+ * The hooks are a little bit screwy -- they're not genuinely exported in the
+ * sense that we want them available to end-users, but we do want them visible
+ * from outside the generated library, so that we can use them in test code.
+ */
+JEMALLOC_EXPORT
+void (*hooks_arena_new_hook)() = NULL;
+
+JEMALLOC_EXPORT
+void (*hooks_libc_hook)() = NULL;
diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c
deleted file mode 100644
index d72f21357..000000000
--- a/deps/jemalloc/src/huge.c
+++ /dev/null
@@ -1,347 +0,0 @@
-#define JEMALLOC_HUGE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Data. */
-
-uint64_t huge_nmalloc;
-uint64_t huge_ndalloc;
-size_t huge_allocated;
-
-malloc_mutex_t huge_mtx;
-
-/******************************************************************************/
-
-/* Tree of chunks that are stand-alone huge allocations. */
-static extent_tree_t huge;
-
-void *
-huge_malloc(size_t size, bool zero, dss_prec_t dss_prec)
-{
-
- return (huge_palloc(size, chunksize, zero, dss_prec));
-}
-
-void *
-huge_palloc(size_t size, size_t alignment, bool zero, dss_prec_t dss_prec)
-{
- void *ret;
- size_t csize;
- extent_node_t *node;
- bool is_zeroed;
-
- /* Allocate one or more contiguous chunks for this request. */
-
- csize = CHUNK_CEILING(size);
- if (csize == 0) {
- /* size is large enough to cause size_t wrap-around. */
- return (NULL);
- }
-
- /* Allocate an extent node with which to track the chunk. */
- node = base_node_alloc();
- if (node == NULL)
- return (NULL);
-
- /*
- * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
- * it is possible to make correct junk/zero fill decisions below.
- */
- is_zeroed = zero;
- ret = chunk_alloc(csize, alignment, false, &is_zeroed, dss_prec);
- if (ret == NULL) {
- base_node_dealloc(node);
- return (NULL);
- }
-
- /* Insert node into huge. */
- node->addr = ret;
- node->size = csize;
-
- malloc_mutex_lock(&huge_mtx);
- extent_tree_ad_insert(&huge, node);
- if (config_stats) {
- stats_cactive_add(csize);
- huge_nmalloc++;
- huge_allocated += csize;
- }
- malloc_mutex_unlock(&huge_mtx);
-
- if (config_fill && zero == false) {
- if (opt_junk)
- memset(ret, 0xa5, csize);
- else if (opt_zero && is_zeroed == false)
- memset(ret, 0, csize);
- }
-
- return (ret);
-}
-
-bool
-huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
-{
-
- /*
- * Avoid moving the allocation if the size class can be left the same.
- */
- if (oldsize > arena_maxclass
- && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
- && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
- assert(CHUNK_CEILING(oldsize) == oldsize);
- return (false);
- }
-
- /* Reallocation would require a move. */
- return (true);
-}
-
-void *
-huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
- size_t alignment, bool zero, bool try_tcache_dalloc, dss_prec_t dss_prec)
-{
- void *ret;
- size_t copysize;
-
- /* Try to avoid moving the allocation. */
- if (huge_ralloc_no_move(ptr, oldsize, size, extra) == false)
- return (ptr);
-
- /*
- * size and oldsize are different enough that we need to use a
- * different size class. In that case, fall back to allocating new
- * space and copying.
- */
- if (alignment > chunksize)
- ret = huge_palloc(size + extra, alignment, zero, dss_prec);
- else
- ret = huge_malloc(size + extra, zero, dss_prec);
-
- if (ret == NULL) {
- if (extra == 0)
- return (NULL);
- /* Try again, this time without extra. */
- if (alignment > chunksize)
- ret = huge_palloc(size, alignment, zero, dss_prec);
- else
- ret = huge_malloc(size, zero, dss_prec);
-
- if (ret == NULL)
- return (NULL);
- }
-
- /*
- * Copy at most size bytes (not size+extra), since the caller has no
- * expectation that the extra bytes will be reliably preserved.
- */
- copysize = (size < oldsize) ? size : oldsize;
-
-#ifdef JEMALLOC_MREMAP
- /*
- * Use mremap(2) if this is a huge-->huge reallocation, and neither the
- * source nor the destination are in dss.
- */
- if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr)
- == false && chunk_in_dss(ret) == false))) {
- size_t newsize = huge_salloc(ret);
-
- /*
- * Remove ptr from the tree of huge allocations before
- * performing the remap operation, in order to avoid the
- * possibility of another thread acquiring that mapping before
- * this one removes it from the tree.
- */
- huge_dalloc(ptr, false);
- if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
- ret) == MAP_FAILED) {
- /*
- * Assuming no chunk management bugs in the allocator,
- * the only documented way an error can occur here is
- * if the application changed the map type for a
- * portion of the old allocation. This is firmly in
- * undefined behavior territory, so write a diagnostic
- * message, and optionally abort.
- */
- char buf[BUFERROR_BUF];
-
- buferror(get_errno(), buf, sizeof(buf));
- malloc_printf("<jemalloc>: Error in mremap(): %s\n",
- buf);
- if (opt_abort)
- abort();
- memcpy(ret, ptr, copysize);
- chunk_dealloc_mmap(ptr, oldsize);
- } else if (config_fill && zero == false && opt_junk && oldsize
- < newsize) {
- /*
- * mremap(2) clobbers the original mapping, so
- * junk/zero filling is not preserved. There is no
- * need to zero fill here, since any trailing
- * uninititialized memory is demand-zeroed by the
- * kernel, but junk filling must be redone.
- */
- memset(ret + oldsize, 0xa5, newsize - oldsize);
- }
- } else
-#endif
- {
- memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
- }
- return (ret);
-}
-
-#ifdef JEMALLOC_JET
-#undef huge_dalloc_junk
-#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
-#endif
-static void
-huge_dalloc_junk(void *ptr, size_t usize)
-{
-
- if (config_fill && config_dss && opt_junk) {
- /*
- * Only bother junk filling if the chunk isn't about to be
- * unmapped.
- */
- if (config_munmap == false || (config_dss && chunk_in_dss(ptr)))
- memset(ptr, 0x5a, usize);
- }
-}
-#ifdef JEMALLOC_JET
-#undef huge_dalloc_junk
-#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
-huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
-#endif
-
-void
-huge_dalloc(void *ptr, bool unmap)
-{
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = ptr;
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
- assert(node->addr == ptr);
- extent_tree_ad_remove(&huge, node);
-
- if (config_stats) {
- stats_cactive_sub(node->size);
- huge_ndalloc++;
- huge_allocated -= node->size;
- }
-
- malloc_mutex_unlock(&huge_mtx);
-
- if (unmap)
- huge_dalloc_junk(node->addr, node->size);
-
- chunk_dealloc(node->addr, node->size, unmap);
-
- base_node_dealloc(node);
-}
-
-size_t
-huge_salloc(const void *ptr)
-{
- size_t ret;
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
-
- ret = node->size;
-
- malloc_mutex_unlock(&huge_mtx);
-
- return (ret);
-}
-
-dss_prec_t
-huge_dss_prec_get(arena_t *arena)
-{
-
- return (arena_dss_prec_get(choose_arena(arena)));
-}
-
-prof_ctx_t *
-huge_prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
-
- ret = node->prof_ctx;
-
- malloc_mutex_unlock(&huge_mtx);
-
- return (ret);
-}
-
-void
-huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
-{
- extent_node_t *node, key;
-
- malloc_mutex_lock(&huge_mtx);
-
- /* Extract from tree of huge allocations. */
- key.addr = __DECONST(void *, ptr);
- node = extent_tree_ad_search(&huge, &key);
- assert(node != NULL);
-
- node->prof_ctx = ctx;
-
- malloc_mutex_unlock(&huge_mtx);
-}
-
-bool
-huge_boot(void)
-{
-
- /* Initialize chunks data. */
- if (malloc_mutex_init(&huge_mtx))
- return (true);
- extent_tree_ad_new(&huge);
-
- if (config_stats) {
- huge_nmalloc = 0;
- huge_ndalloc = 0;
- huge_allocated = 0;
- }
-
- return (false);
-}
-
-void
-huge_prefork(void)
-{
-
- malloc_mutex_prefork(&huge_mtx);
-}
-
-void
-huge_postfork_parent(void)
-{
-
- malloc_mutex_postfork_parent(&huge_mtx);
-}
-
-void
-huge_postfork_child(void)
-{
-
- malloc_mutex_postfork_child(&huge_mtx);
-}
diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c
index 204778bc8..5b936cb48 100644
--- a/deps/jemalloc/src/jemalloc.c
+++ b/deps/jemalloc/src/jemalloc.c
@@ -1,15 +1,32 @@
-#define JEMALLOC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/spin.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/util.h"
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, arenas, arena_t *, NULL)
-malloc_tsd_data(, thread_allocated, thread_allocated_t,
- THREAD_ALLOCATED_INITIALIZER)
-
/* Runtime configuration options. */
-const char *je_malloc_conf;
+const char *je_malloc_conf
+#ifndef _WIN32
+ JEMALLOC_ATTR(weak)
+#endif
+ ;
bool opt_abort =
#ifdef JEMALLOC_DEBUG
true
@@ -17,30 +34,80 @@ bool opt_abort =
false
#endif
;
-bool opt_junk =
+bool opt_abort_conf =
+#ifdef JEMALLOC_DEBUG
+ true
+#else
+ false
+#endif
+ ;
+const char *opt_junk =
+#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
+ "true"
+#else
+ "false"
+#endif
+ ;
+bool opt_junk_alloc =
+#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
+ true
+#else
+ false
+#endif
+ ;
+bool opt_junk_free =
#if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
true
#else
false
#endif
;
-size_t opt_quarantine = ZU(0);
-bool opt_redzone = false;
+
bool opt_utrace = false;
-bool opt_valgrind = false;
bool opt_xmalloc = false;
bool opt_zero = false;
-size_t opt_narenas = 0;
+unsigned opt_narenas = 0;
unsigned ncpus;
-malloc_mutex_t arenas_lock;
-arena_t **arenas;
-unsigned narenas_total;
-unsigned narenas_auto;
-
-/* Set to true once the allocator has been initialized. */
-static bool malloc_initialized = false;
+/* Protects arenas initialization. */
+malloc_mutex_t arenas_lock;
+/*
+ * Arenas that are used to service external requests. Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ *
+ * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
+ * arenas. arenas[narenas_auto..narenas_total) are only used if the application
+ * takes some action to create them and allocate from them.
+ *
+ * Points to an arena_t.
+ */
+JEMALLOC_ALIGNED(CACHELINE)
+atomic_p_t arenas[MALLOCX_ARENA_LIMIT];
+static atomic_u_t narenas_total; /* Use narenas_total_*(). */
+static arena_t *a0; /* arenas[0]; read-only after initialization. */
+unsigned narenas_auto; /* Read-only after initialization. */
+
+typedef enum {
+ malloc_init_uninitialized = 3,
+ malloc_init_a0_initialized = 2,
+ malloc_init_recursible = 1,
+ malloc_init_initialized = 0 /* Common case --> jnz. */
+} malloc_init_t;
+static malloc_init_t malloc_init_state = malloc_init_uninitialized;
+
+/* False should be the common case. Set to true to trigger initialization. */
+bool malloc_slow = true;
+
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
+enum {
+ flag_opt_junk_alloc = (1U),
+ flag_opt_junk_free = (1U << 1),
+ flag_opt_zero = (1U << 2),
+ flag_opt_utrace = (1U << 3),
+ flag_opt_xmalloc = (1U << 4)
+};
+static uint8_t malloc_slow_flags;
#ifdef JEMALLOC_THREADED_INIT
/* Used to let the initializing thread recursively allocate. */
@@ -57,14 +124,30 @@ static bool malloc_initializer = NO_INITIALIZER;
/* Used to avoid initialization races. */
#ifdef _WIN32
+#if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+#else
static malloc_mutex_t init_lock;
+static bool init_lock_initialized = false;
JEMALLOC_ATTR(constructor)
static void WINAPI
-_init_init_lock(void)
-{
-
- malloc_mutex_init(&init_lock);
+_init_init_lock(void) {
+ /*
+ * If another constructor in the same binary is using mallctl to e.g.
+ * set up extent hooks, it may end up running before this one, and
+ * malloc_init_hard will crash trying to lock the uninitialized lock. So
+ * we force an initialization of the lock in malloc_init_hard as well.
+ * We don't try to care about atomicity of the accessed to the
+ * init_lock_initialized boolean, since it really only matters early in
+ * the process creation, before any separate thread normally starts
+ * doing anything.
+ */
+ if (!init_lock_initialized) {
+ malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT,
+ malloc_mutex_rank_exclusive);
+ }
+ init_lock_initialized = true;
}
#ifdef _MSC_VER
@@ -72,7 +155,7 @@ _init_init_lock(void)
JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
#endif
-
+#endif
#else
static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#endif
@@ -85,7 +168,7 @@ typedef struct {
#ifdef JEMALLOC_UTRACE
# define UTRACE(a, b, c) do { \
- if (opt_utrace) { \
+ if (unlikely(opt_utrace)) { \
int utrace_serrno = errno; \
malloc_utrace_t ut; \
ut.p = (a); \
@@ -99,12 +182,16 @@ typedef struct {
# define UTRACE(a, b, c)
#endif
+/* Whether encountered any invalid config options. */
+static bool had_conf_error = false;
+
/******************************************************************************/
/*
* Function prototypes for static functions that are referenced prior to
* definition.
*/
+static bool malloc_init_hard_a0(void);
static bool malloc_init_hard(void);
/******************************************************************************/
@@ -112,54 +199,337 @@ static bool malloc_init_hard(void);
* Begin miscellaneous support functions.
*/
+bool
+malloc_initialized(void) {
+ return (malloc_init_state == malloc_init_initialized);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_init_a0(void) {
+ if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
+ return malloc_init_hard_a0();
+ }
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_init(void) {
+ if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
+ return true;
+ }
+ return false;
+}
+
+/*
+ * The a0*() functions are used instead of i{d,}alloc() in situations that
+ * cannot tolerate TLS variable access.
+ */
+
+static void *
+a0ialloc(size_t size, bool zero, bool is_internal) {
+ if (unlikely(malloc_init_a0())) {
+ return NULL;
+ }
+
+ return iallocztm(TSDN_NULL, size, sz_size2index(size), zero, NULL,
+ is_internal, arena_get(TSDN_NULL, 0, true), true);
+}
+
+static void
+a0idalloc(void *ptr, bool is_internal) {
+ idalloctm(TSDN_NULL, ptr, NULL, NULL, is_internal, true);
+}
+
+void *
+a0malloc(size_t size) {
+ return a0ialloc(size, false, true);
+}
+
+void
+a0dalloc(void *ptr) {
+ a0idalloc(ptr, true);
+}
+
+/*
+ * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
+ * situations that cannot tolerate TLS variable access (TLS allocation and very
+ * early internal data structure initialization).
+ */
+
+void *
+bootstrap_malloc(size_t size) {
+ if (unlikely(size == 0)) {
+ size = 1;
+ }
+
+ return a0ialloc(size, false, false);
+}
+
+void *
+bootstrap_calloc(size_t num, size_t size) {
+ size_t num_size;
+
+ num_size = num * size;
+ if (unlikely(num_size == 0)) {
+ assert(num == 0 || size == 0);
+ num_size = 1;
+ }
+
+ return a0ialloc(num_size, true, false);
+}
+
+void
+bootstrap_free(void *ptr) {
+ if (unlikely(ptr == NULL)) {
+ return;
+ }
+
+ a0idalloc(ptr, false);
+}
+
+void
+arena_set(unsigned ind, arena_t *arena) {
+ atomic_store_p(&arenas[ind], arena, ATOMIC_RELEASE);
+}
+
+static void
+narenas_total_set(unsigned narenas) {
+ atomic_store_u(&narenas_total, narenas, ATOMIC_RELEASE);
+}
+
+static void
+narenas_total_inc(void) {
+ atomic_fetch_add_u(&narenas_total, 1, ATOMIC_RELEASE);
+}
+
+unsigned
+narenas_total_get(void) {
+ return atomic_load_u(&narenas_total, ATOMIC_ACQUIRE);
+}
+
/* Create a new arena and insert it into the arenas array at index ind. */
+static arena_t *
+arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+ arena_t *arena;
+
+ assert(ind <= narenas_total_get());
+ if (ind >= MALLOCX_ARENA_LIMIT) {
+ return NULL;
+ }
+ if (ind == narenas_total_get()) {
+ narenas_total_inc();
+ }
+
+ /*
+ * Another thread may have already initialized arenas[ind] if it's an
+ * auto arena.
+ */
+ arena = arena_get(tsdn, ind, false);
+ if (arena != NULL) {
+ assert(ind < narenas_auto);
+ return arena;
+ }
+
+ /* Actually initialize the arena. */
+ arena = arena_new(tsdn, ind, extent_hooks);
+
+ return arena;
+}
+
+static void
+arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
+ if (ind == 0) {
+ return;
+ }
+ if (have_background_thread) {
+ bool err;
+ malloc_mutex_lock(tsdn, &background_thread_lock);
+ err = background_thread_create(tsdn_tsd(tsdn), ind);
+ malloc_mutex_unlock(tsdn, &background_thread_lock);
+ if (err) {
+ malloc_printf("<jemalloc>: error in background thread "
+ "creation for arena %u. Abort.\n", ind);
+ abort();
+ }
+ }
+}
+
arena_t *
-arenas_extend(unsigned ind)
-{
- arena_t *ret;
+arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+ arena_t *arena;
+
+ malloc_mutex_lock(tsdn, &arenas_lock);
+ arena = arena_init_locked(tsdn, ind, extent_hooks);
+ malloc_mutex_unlock(tsdn, &arenas_lock);
+
+ arena_new_create_background_thread(tsdn, ind);
- ret = (arena_t *)base_alloc(sizeof(arena_t));
- if (ret != NULL && arena_new(ret, ind) == false) {
- arenas[ind] = ret;
- return (ret);
+ return arena;
+}
+
+static void
+arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
+ arena_t *arena = arena_get(tsd_tsdn(tsd), ind, false);
+ arena_nthreads_inc(arena, internal);
+
+ if (internal) {
+ tsd_iarena_set(tsd, arena);
+ } else {
+ tsd_arena_set(tsd, arena);
+ }
+}
+
+void
+arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
+ arena_t *oldarena, *newarena;
+
+ oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
+ newarena = arena_get(tsd_tsdn(tsd), newind, false);
+ arena_nthreads_dec(oldarena, false);
+ arena_nthreads_inc(newarena, false);
+ tsd_arena_set(tsd, newarena);
+}
+
+static void
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
+ arena_t *arena;
+
+ arena = arena_get(tsd_tsdn(tsd), ind, false);
+ arena_nthreads_dec(arena, internal);
+
+ if (internal) {
+ tsd_iarena_set(tsd, NULL);
+ } else {
+ tsd_arena_set(tsd, NULL);
}
- /* Only reached if there is an OOM error. */
+}
+
+arena_tdata_t *
+arena_tdata_get_hard(tsd_t *tsd, unsigned ind) {
+ arena_tdata_t *tdata, *arenas_tdata_old;
+ arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+ unsigned narenas_tdata_old, i;
+ unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
+ unsigned narenas_actual = narenas_total_get();
/*
- * OOM here is quite inconvenient to propagate, since dealing with it
- * would require a check for failure in the fast path. Instead, punt
- * by using arenas[0]. In practice, this is an extremely unlikely
- * failure.
+ * Dissociate old tdata array (and set up for deallocation upon return)
+ * if it's too small.
+ */
+ if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
+ arenas_tdata_old = arenas_tdata;
+ narenas_tdata_old = narenas_tdata;
+ arenas_tdata = NULL;
+ narenas_tdata = 0;
+ tsd_arenas_tdata_set(tsd, arenas_tdata);
+ tsd_narenas_tdata_set(tsd, narenas_tdata);
+ } else {
+ arenas_tdata_old = NULL;
+ narenas_tdata_old = 0;
+ }
+
+ /* Allocate tdata array if it's missing. */
+ if (arenas_tdata == NULL) {
+ bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
+ narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
+
+ if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
+ *arenas_tdata_bypassp = true;
+ arenas_tdata = (arena_tdata_t *)a0malloc(
+ sizeof(arena_tdata_t) * narenas_tdata);
+ *arenas_tdata_bypassp = false;
+ }
+ if (arenas_tdata == NULL) {
+ tdata = NULL;
+ goto label_return;
+ }
+ assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
+ tsd_arenas_tdata_set(tsd, arenas_tdata);
+ tsd_narenas_tdata_set(tsd, narenas_tdata);
+ }
+
+ /*
+ * Copy to tdata array. It's possible that the actual number of arenas
+ * has increased since narenas_total_get() was called above, but that
+ * causes no correctness issues unless two threads concurrently execute
+ * the arenas.create mallctl, which we trust mallctl synchronization to
+ * prevent.
*/
- malloc_write("<jemalloc>: Error initializing arena\n");
- if (opt_abort)
- abort();
- return (arenas[0]);
+ /* Copy/initialize tickers. */
+ for (i = 0; i < narenas_actual; i++) {
+ if (i < narenas_tdata_old) {
+ ticker_copy(&arenas_tdata[i].decay_ticker,
+ &arenas_tdata_old[i].decay_ticker);
+ } else {
+ ticker_init(&arenas_tdata[i].decay_ticker,
+ DECAY_NTICKS_PER_UPDATE);
+ }
+ }
+ if (narenas_tdata > narenas_actual) {
+ memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
+ * (narenas_tdata - narenas_actual));
+ }
+
+ /* Read the refreshed tdata array. */
+ tdata = &arenas_tdata[ind];
+label_return:
+ if (arenas_tdata_old != NULL) {
+ a0dalloc(arenas_tdata_old);
+ }
+ return tdata;
}
-/* Slow path, called only by choose_arena(). */
+/* Slow path, called only by arena_choose(). */
arena_t *
-choose_arena_hard(void)
-{
- arena_t *ret;
+arena_choose_hard(tsd_t *tsd, bool internal) {
+ arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
+
+ if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+ unsigned choose = percpu_arena_choose();
+ ret = arena_get(tsd_tsdn(tsd), choose, true);
+ assert(ret != NULL);
+ arena_bind(tsd, arena_ind_get(ret), false);
+ arena_bind(tsd, arena_ind_get(ret), true);
+
+ return ret;
+ }
if (narenas_auto > 1) {
- unsigned i, choose, first_null;
+ unsigned i, j, choose[2], first_null;
+ bool is_new_arena[2];
+
+ /*
+ * Determine binding for both non-internal and internal
+ * allocation.
+ *
+ * choose[0]: For application allocation.
+ * choose[1]: For internal metadata allocation.
+ */
+
+ for (j = 0; j < 2; j++) {
+ choose[j] = 0;
+ is_new_arena[j] = false;
+ }
- choose = 0;
first_null = narenas_auto;
- malloc_mutex_lock(&arenas_lock);
- assert(arenas[0] != NULL);
+ malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
+ assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
for (i = 1; i < narenas_auto; i++) {
- if (arenas[i] != NULL) {
+ if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
/*
* Choose the first arena that has the lowest
* number of threads assigned to it.
*/
- if (arenas[i]->nthreads <
- arenas[choose]->nthreads)
- choose = i;
+ for (j = 0; j < 2; j++) {
+ if (arena_nthreads_get(arena_get(
+ tsd_tsdn(tsd), i, false), !!j) <
+ arena_nthreads_get(arena_get(
+ tsd_tsdn(tsd), choose[j], false),
+ !!j)) {
+ choose[j] = i;
+ }
+ }
} else if (first_null == narenas_auto) {
/*
* Record the index of the first uninitialized
@@ -174,38 +544,99 @@ choose_arena_hard(void)
}
}
- if (arenas[choose]->nthreads == 0
- || first_null == narenas_auto) {
- /*
- * Use an unloaded arena, or the least loaded arena if
- * all arenas are already initialized.
- */
- ret = arenas[choose];
- } else {
- /* Initialize a new arena. */
- ret = arenas_extend(first_null);
+ for (j = 0; j < 2; j++) {
+ if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
+ choose[j], false), !!j) == 0 || first_null ==
+ narenas_auto) {
+ /*
+ * Use an unloaded arena, or the least loaded
+ * arena if all arenas are already initialized.
+ */
+ if (!!j == internal) {
+ ret = arena_get(tsd_tsdn(tsd),
+ choose[j], false);
+ }
+ } else {
+ arena_t *arena;
+
+ /* Initialize a new arena. */
+ choose[j] = first_null;
+ arena = arena_init_locked(tsd_tsdn(tsd),
+ choose[j],
+ (extent_hooks_t *)&extent_hooks_default);
+ if (arena == NULL) {
+ malloc_mutex_unlock(tsd_tsdn(tsd),
+ &arenas_lock);
+ return NULL;
+ }
+ is_new_arena[j] = true;
+ if (!!j == internal) {
+ ret = arena;
+ }
+ }
+ arena_bind(tsd, choose[j], !!j);
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
+
+ for (j = 0; j < 2; j++) {
+ if (is_new_arena[j]) {
+ assert(choose[j] > 0);
+ arena_new_create_background_thread(
+ tsd_tsdn(tsd), choose[j]);
+ }
}
- ret->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
+
} else {
- ret = arenas[0];
- malloc_mutex_lock(&arenas_lock);
- ret->nthreads++;
- malloc_mutex_unlock(&arenas_lock);
+ ret = arena_get(tsd_tsdn(tsd), 0, false);
+ arena_bind(tsd, 0, false);
+ arena_bind(tsd, 0, true);
}
- arenas_tsd_set(&ret);
+ return ret;
+}
+
+void
+iarena_cleanup(tsd_t *tsd) {
+ arena_t *iarena;
- return (ret);
+ iarena = tsd_iarena_get(tsd);
+ if (iarena != NULL) {
+ arena_unbind(tsd, arena_ind_get(iarena), true);
+ }
}
-static void
-stats_print_atexit(void)
-{
+void
+arena_cleanup(tsd_t *tsd) {
+ arena_t *arena;
+
+ arena = tsd_arena_get(tsd);
+ if (arena != NULL) {
+ arena_unbind(tsd, arena_ind_get(arena), false);
+ }
+}
+
+void
+arenas_tdata_cleanup(tsd_t *tsd) {
+ arena_tdata_t *arenas_tdata;
+
+ /* Prevent tsd->arenas_tdata from being (re)created. */
+ *tsd_arenas_tdata_bypassp_get(tsd) = true;
+
+ arenas_tdata = tsd_arenas_tdata_get(tsd);
+ if (arenas_tdata != NULL) {
+ tsd_arenas_tdata_set(tsd, NULL);
+ a0dalloc(arenas_tdata);
+ }
+}
- if (config_tcache && config_stats) {
+static void
+stats_print_atexit(void) {
+ if (config_stats) {
+ tsdn_t *tsdn;
unsigned narenas, i;
+ tsdn = tsdn_fetch();
+
/*
* Merge stats from extant threads. This is racy, since
* individual threads do not lock when recording tcache stats
@@ -214,25 +645,45 @@ stats_print_atexit(void)
* continue to allocate.
*/
for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
- arena_t *arena = arenas[i];
+ arena_t *arena = arena_get(tsdn, i, false);
if (arena != NULL) {
tcache_t *tcache;
- /*
- * tcache_stats_merge() locks bins, so if any
- * code is introduced that acquires both arena
- * and bin locks in the opposite order,
- * deadlocks may result.
- */
- malloc_mutex_lock(&arena->lock);
+ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
ql_foreach(tcache, &arena->tcache_ql, link) {
- tcache_stats_merge(tcache, arena);
+ tcache_stats_merge(tsdn, tcache, arena);
}
- malloc_mutex_unlock(&arena->lock);
+ malloc_mutex_unlock(tsdn,
+ &arena->tcache_ql_mtx);
}
}
}
- je_malloc_stats_print(NULL, NULL, NULL);
+ je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
+}
+
+/*
+ * Ensure that we don't hold any locks upon entry to or exit from allocator
+ * code (in a "broad" sense that doesn't count a reentrant allocation as an
+ * entrance or exit).
+ */
+JEMALLOC_ALWAYS_INLINE void
+check_entry_exit_locking(tsdn_t *tsdn) {
+ if (!config_debug) {
+ return;
+ }
+ if (tsdn_null(tsdn)) {
+ return;
+ }
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ /*
+ * It's possible we hold locks at entry/exit if we're in a nested
+ * allocation.
+ */
+ int8_t reentrancy_level = tsd_reentrancy_level_get(tsd);
+ if (reentrancy_level != 0) {
+ return;
+ }
+ witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
}
/*
@@ -243,69 +694,82 @@ stats_print_atexit(void)
* Begin initialization functions.
*/
+static char *
+jemalloc_secure_getenv(const char *name) {
+#ifdef JEMALLOC_HAVE_SECURE_GETENV
+ return secure_getenv(name);
+#else
+# ifdef JEMALLOC_HAVE_ISSETUGID
+ if (issetugid() != 0) {
+ return NULL;
+ }
+# endif
+ return getenv(name);
+#endif
+}
+
static unsigned
-malloc_ncpus(void)
-{
+malloc_ncpus(void) {
long result;
#ifdef _WIN32
SYSTEM_INFO si;
GetSystemInfo(&si);
result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
+ /*
+ * glibc >= 2.6 has the CPU_COUNT macro.
+ *
+ * glibc's sysconf() uses isspace(). glibc allocates for the first time
+ * *before* setting up the isspace tables. Therefore we need a
+ * different method to get the number of CPUs.
+ */
+ {
+ cpu_set_t set;
+
+ pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+ result = CPU_COUNT(&set);
+ }
#else
result = sysconf(_SC_NPROCESSORS_ONLN);
#endif
return ((result == -1) ? 1 : (unsigned)result);
}
-void
-arenas_cleanup(void *arg)
-{
- arena_t *arena = *(arena_t **)arg;
-
- malloc_mutex_lock(&arenas_lock);
- arena->nthreads--;
- malloc_mutex_unlock(&arenas_lock);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void
-malloc_thread_init(void)
-{
-
- /*
- * TSD initialization can't be safely done as a side effect of
- * deallocation, because it is possible for a thread to do nothing but
- * deallocate its TLS data via free(), in which case writing to TLS
- * would cause write-after-free memory corruption. The quarantine
- * facility *only* gets used as a side effect of deallocation, so make
- * a best effort attempt at initializing its TSD by hooking all
- * allocation events.
- */
- if (config_fill && opt_quarantine)
- quarantine_alloc_hook();
-}
-
-JEMALLOC_ALWAYS_INLINE_C bool
-malloc_init(void)
-{
+static void
+init_opt_stats_print_opts(const char *v, size_t vlen) {
+ size_t opts_len = strlen(opt_stats_print_opts);
+ assert(opts_len <= stats_print_tot_num_options);
+
+ for (size_t i = 0; i < vlen; i++) {
+ switch (v[i]) {
+#define OPTION(o, v, d, s) case o: break;
+ STATS_PRINT_OPTIONS
+#undef OPTION
+ default: continue;
+ }
- if (malloc_initialized == false && malloc_init_hard())
- return (true);
- malloc_thread_init();
+ if (strchr(opt_stats_print_opts, v[i]) != NULL) {
+ /* Ignore repeated. */
+ continue;
+ }
- return (false);
+ opt_stats_print_opts[opts_len++] = v[i];
+ opt_stats_print_opts[opts_len] = '\0';
+ assert(opts_len <= stats_print_tot_num_options);
+ }
+ assert(opts_len == strlen(opt_stats_print_opts));
}
static bool
malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
- char const **v_p, size_t *vlen_p)
-{
+ char const **v_p, size_t *vlen_p) {
bool accept;
const char *opts = *opts_p;
*k_p = opts;
- for (accept = false; accept == false;) {
+ for (accept = false; !accept;) {
switch (*opts) {
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
@@ -333,14 +797,14 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
malloc_write("<jemalloc>: Conf string ends "
"with key\n");
}
- return (true);
+ return true;
default:
malloc_write("<jemalloc>: Malformed conf string\n");
- return (true);
+ return true;
}
}
- for (accept = false; accept == false;) {
+ for (accept = false; !accept;) {
switch (*opts) {
case ',':
opts++;
@@ -369,46 +833,55 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
}
*opts_p = opts;
- return (false);
+ return false;
}
static void
-malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
- size_t vlen)
-{
+malloc_abort_invalid_conf(void) {
+ assert(opt_abort_conf);
+ malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
+ "value (see above).\n");
+ abort();
+}
+static void
+malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
+ size_t vlen) {
malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
(int)vlen, v);
+ /* If abort_conf is set, error out after processing all options. */
+ had_conf_error = true;
}
static void
-malloc_conf_init(void)
-{
+malloc_slow_flag_init(void) {
+ /*
+ * Combine the runtime options into malloc_slow for fast path. Called
+ * after processing all the options.
+ */
+ malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
+ | (opt_junk_free ? flag_opt_junk_free : 0)
+ | (opt_zero ? flag_opt_zero : 0)
+ | (opt_utrace ? flag_opt_utrace : 0)
+ | (opt_xmalloc ? flag_opt_xmalloc : 0);
+
+ malloc_slow = (malloc_slow_flags != 0);
+}
+
+static void
+malloc_conf_init(void) {
unsigned i;
char buf[PATH_MAX + 1];
const char *opts, *k, *v;
size_t klen, vlen;
- /*
- * Automatically configure valgrind before processing options. The
- * valgrind option remains in jemalloc 3.x for compatibility reasons.
- */
- if (config_valgrind) {
- opt_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false;
- if (config_fill && opt_valgrind) {
- opt_junk = false;
- assert(opt_zero == false);
- opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT;
- opt_redzone = true;
- }
- if (config_tcache && opt_valgrind)
- opt_tcache = false;
- }
-
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < 4; i++) {
/* Get runtime configuration. */
switch (i) {
case 0:
+ opts = config_malloc_conf;
+ break;
+ case 1:
if (je_malloc_conf != NULL) {
/*
* Use options that were compiled into the
@@ -421,8 +894,8 @@ malloc_conf_init(void)
opts = buf;
}
break;
- case 1: {
- int linklen = 0;
+ case 2: {
+ ssize_t linklen = 0;
#ifndef _WIN32
int saved_errno = errno;
const char *linkname =
@@ -441,14 +914,14 @@ malloc_conf_init(void)
if (linklen == -1) {
/* No configuration specified. */
linklen = 0;
- /* restore errno */
+ /* Restore errno. */
set_errno(saved_errno);
}
#endif
buf[linklen] = '\0';
opts = buf;
break;
- } case 2: {
+ } case 3: {
const char *envname =
#ifdef JEMALLOC_PREFIX
JEMALLOC_CPREFIX"MALLOC_CONF"
@@ -457,7 +930,7 @@ malloc_conf_init(void)
#endif
;
- if ((opts = getenv(envname)) != NULL) {
+ if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
/*
* Do nothing; opts is already initialized to
* the value of the MALLOC_CONF environment
@@ -475,27 +948,31 @@ malloc_conf_init(void)
opts = buf;
}
- while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
- &vlen) == false) {
-#define CONF_HANDLE_BOOL(o, n) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
- if (strncmp("true", v, vlen) == 0 && \
- vlen == sizeof("true")-1) \
+ while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
+ &vlen)) {
+#define CONF_MATCH(n) \
+ (sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) \
+ (sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n) \
+ if (CONF_MATCH(n)) { \
+ if (CONF_MATCH_VALUE("true")) { \
o = true; \
- else if (strncmp("false", v, vlen) == \
- 0 && vlen == sizeof("false")-1) \
+ } else if (CONF_MATCH_VALUE("false")) { \
o = false; \
- else { \
+ } else { \
malloc_conf_error( \
"Invalid conf value", \
k, klen, v, vlen); \
} \
continue; \
}
-#define CONF_HANDLE_SIZE_T(o, n, min, max, clip) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+#define CONF_MIN_no(um, min) false
+#define CONF_MIN_yes(um, min) ((um) < (min))
+#define CONF_MAX_no(um, max) false
+#define CONF_MAX_yes(um, max) ((um) > (max))
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip) \
+ if (CONF_MATCH(n)) { \
uintmax_t um; \
char *end; \
\
@@ -507,27 +984,40 @@ malloc_conf_init(void)
"Invalid conf value", \
k, klen, v, vlen); \
} else if (clip) { \
- if (min != 0 && um < min) \
- o = min; \
- else if (um > max) \
- o = max; \
- else \
- o = um; \
+ if (CONF_MIN_##check_min(um, \
+ (t)(min))) { \
+ o = (t)(min); \
+ } else if ( \
+ CONF_MAX_##check_max(um, \
+ (t)(max))) { \
+ o = (t)(max); \
+ } else { \
+ o = (t)um; \
+ } \
} else { \
- if ((min != 0 && um < min) || \
- um > max) { \
+ if (CONF_MIN_##check_min(um, \
+ (t)(min)) || \
+ CONF_MAX_##check_max(um, \
+ (t)(max))) { \
malloc_conf_error( \
"Out-of-range " \
"conf value", \
k, klen, v, vlen); \
- } else \
- o = um; \
+ } else { \
+ o = (t)um; \
+ } \
} \
continue; \
}
-#define CONF_HANDLE_SSIZE_T(o, n, min, max) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, \
+ clip) \
+ CONF_HANDLE_T_U(unsigned, o, n, min, max, \
+ check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip) \
+ CONF_HANDLE_T_U(size_t, o, n, min, max, \
+ check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max) \
+ if (CONF_MATCH(n)) { \
long l; \
char *end; \
\
@@ -538,18 +1028,18 @@ malloc_conf_init(void)
malloc_conf_error( \
"Invalid conf value", \
k, klen, v, vlen); \
- } else if (l < (ssize_t)min || l > \
- (ssize_t)max) { \
+ } else if (l < (ssize_t)(min) || l > \
+ (ssize_t)(max)) { \
malloc_conf_error( \
"Out-of-range conf value", \
k, klen, v, vlen); \
- } else \
+ } else { \
o = l; \
+ } \
continue; \
}
-#define CONF_HANDLE_CHAR_P(o, n, d) \
- if (sizeof(n)-1 == klen && strncmp(n, k, \
- klen) == 0) { \
+#define CONF_HANDLE_CHAR_P(o, n, d) \
+ if (CONF_MATCH(n)) { \
size_t cpylen = (vlen <= \
sizeof(o)-1) ? vlen : \
sizeof(o)-1; \
@@ -559,23 +1049,32 @@ malloc_conf_init(void)
}
CONF_HANDLE_BOOL(opt_abort, "abort")
- /*
- * Chunks always require at least one header page, plus
- * one data page in the absence of redzones, or three
- * pages in the presence of redzones. In order to
- * simplify options processing, fix the limit based on
- * config_fill.
- */
- CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
- (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1,
- true)
+ CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+ if (strncmp("metadata_thp", k, klen) == 0) {
+ int i;
+ bool match = false;
+ for (i = 0; i < metadata_thp_mode_limit; i++) {
+ if (strncmp(metadata_thp_mode_names[i],
+ v, vlen) == 0) {
+ opt_metadata_thp = i;
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ malloc_conf_error("Invalid conf value",
+ k, klen, v, vlen);
+ }
+ continue;
+ }
+ CONF_HANDLE_BOOL(opt_retain, "retain")
if (strncmp("dss", k, klen) == 0) {
int i;
bool match = false;
for (i = 0; i < dss_prec_limit; i++) {
if (strncmp(dss_prec_names[i], v, vlen)
== 0) {
- if (chunk_dss_prec_set(i)) {
+ if (extent_dss_prec_set(i)) {
malloc_conf_error(
"Error setting dss",
k, klen, v, vlen);
@@ -587,47 +1086,104 @@ malloc_conf_init(void)
}
}
}
- if (match == false) {
+ if (!match) {
malloc_conf_error("Invalid conf value",
k, klen, v, vlen);
}
continue;
}
- CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1,
- SIZE_T_MAX, false)
- CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
- -1, (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
+ UINT_MAX, yes, no, false)
+ CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
+ "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
+ QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
+ SSIZE_MAX);
+ CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
+ "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
+ QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
+ SSIZE_MAX);
CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+ if (CONF_MATCH("stats_print_opts")) {
+ init_opt_stats_print_opts(v, vlen);
+ continue;
+ }
if (config_fill) {
- CONF_HANDLE_BOOL(opt_junk, "junk")
- CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
- 0, SIZE_T_MAX, false)
- CONF_HANDLE_BOOL(opt_redzone, "redzone")
+ if (CONF_MATCH("junk")) {
+ if (CONF_MATCH_VALUE("true")) {
+ opt_junk = "true";
+ opt_junk_alloc = opt_junk_free =
+ true;
+ } else if (CONF_MATCH_VALUE("false")) {
+ opt_junk = "false";
+ opt_junk_alloc = opt_junk_free =
+ false;
+ } else if (CONF_MATCH_VALUE("alloc")) {
+ opt_junk = "alloc";
+ opt_junk_alloc = true;
+ opt_junk_free = false;
+ } else if (CONF_MATCH_VALUE("free")) {
+ opt_junk = "free";
+ opt_junk_alloc = false;
+ opt_junk_free = true;
+ } else {
+ malloc_conf_error(
+ "Invalid conf value", k,
+ klen, v, vlen);
+ }
+ continue;
+ }
CONF_HANDLE_BOOL(opt_zero, "zero")
}
if (config_utrace) {
CONF_HANDLE_BOOL(opt_utrace, "utrace")
}
- if (config_valgrind) {
- CONF_HANDLE_BOOL(opt_valgrind, "valgrind")
- }
if (config_xmalloc) {
CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
}
- if (config_tcache) {
- CONF_HANDLE_BOOL(opt_tcache, "tcache")
- CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
- "lg_tcache_max", -1,
- (sizeof(size_t) << 3) - 1)
+ CONF_HANDLE_BOOL(opt_tcache, "tcache")
+ CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+ "lg_extent_max_active_fit", 0,
+ (sizeof(size_t) << 3), yes, yes, false)
+ CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
+ -1, (sizeof(size_t) << 3) - 1)
+ if (strncmp("percpu_arena", k, klen) == 0) {
+ bool match = false;
+ for (int i = percpu_arena_mode_names_base; i <
+ percpu_arena_mode_names_limit; i++) {
+ if (strncmp(percpu_arena_mode_names[i],
+ v, vlen) == 0) {
+ if (!have_percpu_arena) {
+ malloc_conf_error(
+ "No getcpu support",
+ k, klen, v, vlen);
+ }
+ opt_percpu_arena = i;
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ malloc_conf_error("Invalid conf value",
+ k, klen, v, vlen);
+ }
+ continue;
}
+ CONF_HANDLE_BOOL(opt_background_thread,
+ "background_thread");
+ CONF_HANDLE_SIZE_T(opt_max_background_threads,
+ "max_background_threads", 1,
+ opt_max_background_threads, yes, yes,
+ true);
if (config_prof) {
CONF_HANDLE_BOOL(opt_prof, "prof")
CONF_HANDLE_CHAR_P(opt_prof_prefix,
"prof_prefix", "jeprof")
CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
- CONF_HANDLE_SSIZE_T(opt_lg_prof_sample,
- "lg_prof_sample", 0,
- (sizeof(uint64_t) << 3) - 1)
+ CONF_HANDLE_BOOL(opt_prof_thread_active_init,
+ "prof_thread_active_init")
+ CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
+ "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
+ - 1, no, yes, true)
CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
"lg_prof_interval", -1,
@@ -636,206 +1192,384 @@ malloc_conf_init(void)
CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
}
+ if (config_log) {
+ if (CONF_MATCH("log")) {
+ size_t cpylen = (
+ vlen <= sizeof(log_var_names) ?
+ vlen : sizeof(log_var_names) - 1);
+ strncpy(log_var_names, v, cpylen);
+ log_var_names[cpylen] = '\0';
+ continue;
+ }
+ }
+ if (CONF_MATCH("thp")) {
+ bool match = false;
+ for (int i = 0; i < thp_mode_names_limit; i++) {
+ if (strncmp(thp_mode_names[i],v, vlen)
+ == 0) {
+ if (!have_madvise_huge) {
+ malloc_conf_error(
+ "No THP support",
+ k, klen, v, vlen);
+ }
+ opt_thp = i;
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ malloc_conf_error("Invalid conf value",
+ k, klen, v, vlen);
+ }
+ continue;
+ }
malloc_conf_error("Invalid conf pair", k, klen, v,
vlen);
+#undef CONF_MATCH
+#undef CONF_MATCH_VALUE
#undef CONF_HANDLE_BOOL
+#undef CONF_MIN_no
+#undef CONF_MIN_yes
+#undef CONF_MAX_no
+#undef CONF_MAX_yes
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_UNSIGNED
#undef CONF_HANDLE_SIZE_T
#undef CONF_HANDLE_SSIZE_T
#undef CONF_HANDLE_CHAR_P
}
+ if (opt_abort_conf && had_conf_error) {
+ malloc_abort_invalid_conf();
+ }
}
+ atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
}
static bool
-malloc_init_hard(void)
-{
- arena_t *init_arenas[1];
-
- malloc_mutex_lock(&init_lock);
- if (malloc_initialized || IS_INITIALIZER) {
+malloc_init_hard_needed(void) {
+ if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
+ malloc_init_recursible)) {
/*
* Another thread initialized the allocator before this one
* acquired init_lock, or this thread is the initializing
* thread, and it is recursively allocating.
*/
- malloc_mutex_unlock(&init_lock);
- return (false);
+ return false;
}
#ifdef JEMALLOC_THREADED_INIT
- if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) {
+ if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
/* Busy-wait until the initializing thread completes. */
+ spin_t spinner = SPIN_INITIALIZER;
do {
- malloc_mutex_unlock(&init_lock);
- CPU_SPINWAIT;
- malloc_mutex_lock(&init_lock);
- } while (malloc_initialized == false);
- malloc_mutex_unlock(&init_lock);
- return (false);
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ spin_adaptive(&spinner);
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+ } while (!malloc_initialized());
+ return false;
}
#endif
+ return true;
+}
+
+static bool
+malloc_init_hard_a0_locked() {
malloc_initializer = INITIALIZER;
- malloc_tsd_boot();
- if (config_prof)
+ if (config_prof) {
prof_boot0();
-
+ }
malloc_conf_init();
-
if (opt_stats_print) {
/* Print statistics at exit. */
if (atexit(stats_print_atexit) != 0) {
malloc_write("<jemalloc>: Error in atexit()\n");
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
}
}
-
- if (base_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ if (pages_boot()) {
+ return true;
}
-
- if (chunk_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ if (base_boot(TSDN_NULL)) {
+ return true;
+ }
+ if (extent_boot()) {
+ return true;
}
-
if (ctl_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ return true;
}
-
- if (config_prof)
+ if (config_prof) {
prof_boot1();
-
- arena_boot();
-
- if (config_tcache && tcache_boot0()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
}
-
- if (huge_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ arena_boot();
+ if (tcache_boot(TSDN_NULL)) {
+ return true;
}
-
- if (malloc_mutex_init(&arenas_lock)) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
+ malloc_mutex_rank_exclusive)) {
+ return true;
}
-
/*
* Create enough scaffolding to allow recursive allocation in
* malloc_ncpus().
*/
- narenas_total = narenas_auto = 1;
- arenas = init_arenas;
+ narenas_auto = 1;
memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
-
/*
* Initialize one arena here. The rest are lazily created in
- * choose_arena_hard().
+ * arena_choose_hard().
*/
- arenas_extend(0);
- if (arenas[0] == NULL) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
-
- /* Initialize allocation counters before any allocations can occur. */
- if (config_stats && thread_allocated_tsd_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
-
- if (arenas_tsd_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
+ == NULL) {
+ return true;
}
+ a0 = arena_get(TSDN_NULL, 0, false);
+ malloc_init_state = malloc_init_a0_initialized;
- if (config_tcache && tcache_boot1()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+ return false;
+}
- if (config_fill && quarantine_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+static bool
+malloc_init_hard_a0(void) {
+ bool ret;
- if (config_prof && prof_boot2()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
- }
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+ ret = malloc_init_hard_a0_locked();
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ return ret;
+}
- malloc_mutex_unlock(&init_lock);
- /**********************************************************************/
- /* Recursive allocation may follow. */
+/* Initialize data structures which may trigger recursive allocation. */
+static bool
+malloc_init_hard_recursible(void) {
+ malloc_init_state = malloc_init_recursible;
ncpus = malloc_ncpus();
-#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
- && !defined(_WIN32))
- /* LinuxThreads's pthread_atfork() allocates. */
+#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
+ && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
+ !defined(__native_client__))
+ /* LinuxThreads' pthread_atfork() allocates. */
if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
jemalloc_postfork_child) != 0) {
malloc_write("<jemalloc>: Error in pthread_atfork()\n");
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
+ return true;
}
#endif
- /* Done recursively allocating. */
- /**********************************************************************/
- malloc_mutex_lock(&init_lock);
+ if (background_thread_boot0()) {
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned
+malloc_narenas_default(void) {
+ assert(ncpus > 0);
+ /*
+ * For SMP systems, create more than one arena per CPU by
+ * default.
+ */
+ if (ncpus > 1) {
+ return ncpus << 2;
+ } else {
+ return 1;
+ }
+}
+
+static percpu_arena_mode_t
+percpu_arena_as_initialized(percpu_arena_mode_t mode) {
+ assert(!malloc_initialized());
+ assert(mode <= percpu_arena_disabled);
- if (mutex_boot()) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+ if (mode != percpu_arena_disabled) {
+ mode += percpu_arena_mode_enabled_base;
}
+ return mode;
+}
+
+static bool
+malloc_init_narenas(void) {
+ assert(ncpus > 0);
+
+ if (opt_percpu_arena != percpu_arena_disabled) {
+ if (!have_percpu_arena || malloc_getcpu() < 0) {
+ opt_percpu_arena = percpu_arena_disabled;
+ malloc_printf("<jemalloc>: perCPU arena getcpu() not "
+ "available. Setting narenas to %u.\n", opt_narenas ?
+ opt_narenas : malloc_narenas_default());
+ if (opt_abort) {
+ abort();
+ }
+ } else {
+ if (ncpus >= MALLOCX_ARENA_LIMIT) {
+ malloc_printf("<jemalloc>: narenas w/ percpu"
+ "arena beyond limit (%d)\n", ncpus);
+ if (opt_abort) {
+ abort();
+ }
+ return true;
+ }
+ /* NB: opt_percpu_arena isn't fully initialized yet. */
+ if (percpu_arena_as_initialized(opt_percpu_arena) ==
+ per_phycpu_arena && ncpus % 2 != 0) {
+ malloc_printf("<jemalloc>: invalid "
+ "configuration -- per physical CPU arena "
+ "with odd number (%u) of CPUs (no hyper "
+ "threading?).\n", ncpus);
+ if (opt_abort)
+ abort();
+ }
+ unsigned n = percpu_arena_ind_limit(
+ percpu_arena_as_initialized(opt_percpu_arena));
+ if (opt_narenas < n) {
+ /*
+ * If narenas is specified with percpu_arena
+ * enabled, actual narenas is set as the greater
+ * of the two. percpu_arena_choose will be free
+ * to use any of the arenas based on CPU
+ * id. This is conservative (at a small cost)
+ * but ensures correctness.
+ *
+ * If for some reason the ncpus determined at
+ * boot is not the actual number (e.g. because
+ * of affinity setting from numactl), reserving
+ * narenas this way provides a workaround for
+ * percpu_arena.
+ */
+ opt_narenas = n;
+ }
+ }
+ }
if (opt_narenas == 0) {
- /*
- * For SMP systems, create more than one arena per CPU by
- * default.
- */
- if (ncpus > 1)
- opt_narenas = ncpus << 2;
- else
- opt_narenas = 1;
+ opt_narenas = malloc_narenas_default();
}
+ assert(opt_narenas > 0);
+
narenas_auto = opt_narenas;
/*
- * Make sure that the arenas array can be allocated. In practice, this
- * limit is enough to allow the allocator to function, but the ctl
- * machinery will fail to allocate memory at far lower limits.
+ * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
*/
- if (narenas_auto > chunksize / sizeof(arena_t *)) {
- narenas_auto = chunksize / sizeof(arena_t *);
+ if (narenas_auto >= MALLOCX_ARENA_LIMIT) {
+ narenas_auto = MALLOCX_ARENA_LIMIT - 1;
malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
narenas_auto);
}
- narenas_total = narenas_auto;
+ narenas_total_set(narenas_auto);
+
+ return false;
+}
+
+static void
+malloc_init_percpu(void) {
+ opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
+}
- /* Allocate and initialize arenas. */
- arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total);
- if (arenas == NULL) {
- malloc_mutex_unlock(&init_lock);
- return (true);
+static bool
+malloc_init_hard_finish(void) {
+ if (malloc_mutex_boot()) {
+ return true;
}
- /*
- * Zero the array. In practice, this should always be pre-zeroed,
- * since it was just mmap()ed, but let's be sure.
- */
- memset(arenas, 0, sizeof(arena_t *) * narenas_total);
- /* Copy the pointer to the one arena that was already initialized. */
- arenas[0] = init_arenas[0];
- malloc_initialized = true;
- malloc_mutex_unlock(&init_lock);
+ malloc_init_state = malloc_init_initialized;
+ malloc_slow_flag_init();
- return (false);
+ return false;
+}
+
+static void
+malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) {
+ malloc_mutex_assert_owner(tsdn, &init_lock);
+ malloc_mutex_unlock(tsdn, &init_lock);
+ if (reentrancy_set) {
+ assert(!tsdn_null(tsdn));
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ assert(tsd_reentrancy_level_get(tsd) > 0);
+ post_reentrancy(tsd);
+ }
+}
+
+static bool
+malloc_init_hard(void) {
+ tsd_t *tsd;
+
+#if defined(_WIN32) && _WIN32_WINNT < 0x0600
+ _init_init_lock();
+#endif
+ malloc_mutex_lock(TSDN_NULL, &init_lock);
+
+#define UNLOCK_RETURN(tsdn, ret, reentrancy) \
+ malloc_init_hard_cleanup(tsdn, reentrancy); \
+ return ret;
+
+ if (!malloc_init_hard_needed()) {
+ UNLOCK_RETURN(TSDN_NULL, false, false)
+ }
+
+ if (malloc_init_state != malloc_init_a0_initialized &&
+ malloc_init_hard_a0_locked()) {
+ UNLOCK_RETURN(TSDN_NULL, true, false)
+ }
+
+ malloc_mutex_unlock(TSDN_NULL, &init_lock);
+ /* Recursive allocation relies on functional tsd. */
+ tsd = malloc_tsd_boot0();
+ if (tsd == NULL) {
+ return true;
+ }
+ if (malloc_init_hard_recursible()) {
+ return true;
+ }
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+ /* Set reentrancy level to 1 during init. */
+ pre_reentrancy(tsd, NULL);
+ /* Initialize narenas before prof_boot2 (for allocation). */
+ if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+ if (config_prof && prof_boot2(tsd)) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+
+ malloc_init_percpu();
+
+ if (malloc_init_hard_finish()) {
+ UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+ }
+ post_reentrancy(tsd);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
+
+ witness_assert_lockless(witness_tsd_tsdn(
+ tsd_witness_tsdp_get_unsafe(tsd)));
+ malloc_tsd_boot1();
+ /* Update TSD after tsd_boot1. */
+ tsd = tsd_fetch();
+ if (opt_background_thread) {
+ assert(have_background_thread);
+ /*
+ * Need to finish init & unlock first before creating background
+ * threads (pthread_create depends on malloc). ctl_init (which
+ * sets isthreaded) needs to be called without holding any lock.
+ */
+ background_thread_ctl_init(tsd_tsdn(tsd));
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+ bool err = background_thread_create(tsd, 0);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+ if (err) {
+ return true;
+ }
+ }
+#undef UNLOCK_RETURN
+ return false;
}
/*
@@ -843,469 +1577,831 @@ malloc_init_hard(void)
*/
/******************************************************************************/
/*
- * Begin malloc(3)-compatible functions.
+ * Begin allocation-path internal functions and data structures.
*/
-static void *
-imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
-
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = imalloc(SMALL_MAXCLASS+1);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
- } else
- p = imalloc(usize);
+/*
+ * Settings determined by the documented behavior of the allocation functions.
+ */
+typedef struct static_opts_s static_opts_t;
+struct static_opts_s {
+ /* Whether or not allocation size may overflow. */
+ bool may_overflow;
+ /* Whether or not allocations of size 0 should be treated as size 1. */
+ bool bump_empty_alloc;
+ /*
+ * Whether to assert that allocations are not of size 0 (after any
+ * bumping).
+ */
+ bool assert_nonempty_alloc;
- return (p);
-}
+ /*
+ * Whether or not to modify the 'result' argument to malloc in case of
+ * error.
+ */
+ bool null_out_result_on_error;
+ /* Whether to set errno when we encounter an error condition. */
+ bool set_errno_on_error;
-JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
+ /*
+ * The minimum valid alignment for functions requesting aligned storage.
+ */
+ size_t min_alignment;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = imalloc_prof_sample(usize, cnt);
- else
- p = imalloc(usize);
- if (p == NULL)
- return (NULL);
- prof_malloc(p, usize, cnt);
+ /* The error string to use if we oom. */
+ const char *oom_string;
+ /* The error string to use if the passed-in alignment is invalid. */
+ const char *invalid_alignment_string;
- return (p);
+ /*
+ * False if we're configured to skip some time-consuming operations.
+ *
+ * This isn't really a malloc "behavior", but it acts as a useful
+ * summary of several other static (or at least, static after program
+ * initialization) options.
+ */
+ bool slow;
+};
+
+JEMALLOC_ALWAYS_INLINE void
+static_opts_init(static_opts_t *static_opts) {
+ static_opts->may_overflow = false;
+ static_opts->bump_empty_alloc = false;
+ static_opts->assert_nonempty_alloc = false;
+ static_opts->null_out_result_on_error = false;
+ static_opts->set_errno_on_error = false;
+ static_opts->min_alignment = 0;
+ static_opts->oom_string = "";
+ static_opts->invalid_alignment_string = "";
+ static_opts->slow = false;
}
/*
- * MALLOC_BODY() is a macro rather than a function because its contents are in
- * the fast path, but inlining would cause reliability issues when determining
- * how many frames to discard from heap profiling backtraces.
+ * These correspond to the macros in jemalloc/jemalloc_macros.h. Broadly, we
+ * should have one constant here per magic value there. Note however that the
+ * representations need not be related.
*/
-#define MALLOC_BODY(ret, size, usize) do { \
- if (malloc_init()) \
- ret = NULL; \
- else { \
- if (config_prof && opt_prof) { \
- prof_thr_cnt_t *cnt; \
- \
- usize = s2u(size); \
- /* \
- * Call PROF_ALLOC_PREP() here rather than in \
- * imalloc_prof() so that imalloc_prof() can be \
- * inlined without introducing uncertainty \
- * about the number of backtrace frames to \
- * ignore. imalloc_prof() is in the fast path \
- * when heap profiling is enabled, so inlining \
- * is critical to performance. (For \
- * consistency all callers of PROF_ALLOC_PREP() \
- * are structured similarly, even though e.g. \
- * realloc() isn't called enough for inlining \
- * to be critical.) \
- */ \
- PROF_ALLOC_PREP(1, usize, cnt); \
- ret = imalloc_prof(usize, cnt); \
- } else { \
- if (config_stats || (config_valgrind && \
- opt_valgrind)) \
- usize = s2u(size); \
- ret = imalloc(size); \
- } \
- } \
-} while (0)
-
-void *
-je_malloc(size_t size)
-{
- void *ret;
- size_t usize JEMALLOC_CC_SILENCE_INIT(0);
-
- if (size == 0)
- size = 1;
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
+
+typedef struct dynamic_opts_s dynamic_opts_t;
+struct dynamic_opts_s {
+ void **result;
+ size_t num_items;
+ size_t item_size;
+ size_t alignment;
+ bool zero;
+ unsigned tcache_ind;
+ unsigned arena_ind;
+};
+
+JEMALLOC_ALWAYS_INLINE void
+dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
+ dynamic_opts->result = NULL;
+ dynamic_opts->num_items = 0;
+ dynamic_opts->item_size = 0;
+ dynamic_opts->alignment = 0;
+ dynamic_opts->zero = false;
+ dynamic_opts->tcache_ind = TCACHE_IND_AUTOMATIC;
+ dynamic_opts->arena_ind = ARENA_IND_AUTOMATIC;
+}
- MALLOC_BODY(ret, size, usize);
+/* ind is ignored if dopts->alignment > 0. */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+ size_t size, size_t usize, szind_t ind) {
+ tcache_t *tcache;
+ arena_t *arena;
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error in malloc(): "
- "out of memory\n");
- abort();
+ /* Fill in the tcache. */
+ if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
+ if (likely(!sopts->slow)) {
+ /* Getting tcache ptr unconditionally. */
+ tcache = tsd_tcachep_get(tsd);
+ assert(tcache == tcache_get(tsd));
+ } else {
+ tcache = tcache_get(tsd);
}
- set_errno(ENOMEM);
- }
- if (config_stats && ret != NULL) {
- assert(usize == isalloc(ret, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ } else if (dopts->tcache_ind == TCACHE_IND_NONE) {
+ tcache = NULL;
+ } else {
+ tcache = tcaches_get(tsd, dopts->tcache_ind);
}
- UTRACE(0, size, ret);
- JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
- return (ret);
-}
-static void *
-imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
+ /* Fill in the arena. */
+ if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
+ /*
+ * In case of automatic arena management, we defer arena
+ * computation until as late as we can, hoping to fill the
+ * allocation out of the tcache.
+ */
+ arena = NULL;
+ } else {
+ arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
+ }
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
- p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment,
- false);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
- } else
- p = ipalloc(usize, alignment, false);
+ if (unlikely(dopts->alignment != 0)) {
+ return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
+ dopts->zero, tcache, arena);
+ }
- return (p);
+ return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
+ arena, sopts->slow);
}
-JEMALLOC_ALWAYS_INLINE_C void *
-imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+ size_t usize, szind_t ind) {
+ void *ret;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = imemalign_prof_sample(alignment, usize, cnt);
- else
- p = ipalloc(usize, alignment, false);
- if (p == NULL)
- return (NULL);
- prof_malloc(p, usize, cnt);
+ /*
+ * For small allocations, sampling bumps the usize. If so, we allocate
+ * from the ind_large bucket.
+ */
+ szind_t ind_large;
+ size_t bumped_usize = usize;
+
+ if (usize <= SMALL_MAXCLASS) {
+ assert(((dopts->alignment == 0) ? sz_s2u(LARGE_MINCLASS) :
+ sz_sa2u(LARGE_MINCLASS, dopts->alignment))
+ == LARGE_MINCLASS);
+ ind_large = sz_size2index(LARGE_MINCLASS);
+ bumped_usize = sz_s2u(LARGE_MINCLASS);
+ ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
+ bumped_usize, ind_large);
+ if (unlikely(ret == NULL)) {
+ return NULL;
+ }
+ arena_prof_promote(tsd_tsdn(tsd), ret, usize);
+ } else {
+ ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
+ }
- return (p);
+ return ret;
}
-JEMALLOC_ATTR(nonnull(1))
-#ifdef JEMALLOC_PROF
/*
- * Avoid any uncertainty as to how many backtrace frames to ignore in
- * PROF_ALLOC_PREP().
+ * Returns true if the allocation will overflow, and false otherwise. Sets
+ * *size to the product either way.
*/
-JEMALLOC_NOINLINE
-#endif
-static int
-imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
-{
- int ret;
- size_t usize;
- void *result;
+JEMALLOC_ALWAYS_INLINE bool
+compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
+ size_t *size) {
+ /*
+ * This function is just num_items * item_size, except that we may have
+ * to check for overflow.
+ */
+
+ if (!may_overflow) {
+ assert(dopts->num_items == 1);
+ *size = dopts->item_size;
+ return false;
+ }
+
+ /* A size_t with its high-half bits all set to 1. */
+ static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
- assert(min_alignment != 0);
+ *size = dopts->item_size * dopts->num_items;
- if (malloc_init()) {
- result = NULL;
+ if (unlikely(*size == 0)) {
+ return (dopts->num_items != 0 && dopts->item_size != 0);
+ }
+
+ /*
+ * We got a non-zero size, but we don't know if we overflowed to get
+ * there. To avoid having to do a divide, we'll be clever and note that
+ * if both A and B can be represented in N/2 bits, then their product
+ * can be represented in N bits (without the possibility of overflow).
+ */
+ if (likely((high_bits & (dopts->num_items | dopts->item_size)) == 0)) {
+ return false;
+ }
+ if (likely(*size / dopts->item_size == dopts->num_items)) {
+ return false;
+ }
+ return true;
+}
+
+JEMALLOC_ALWAYS_INLINE int
+imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
+ /* Where the actual allocated memory will live. */
+ void *allocation = NULL;
+ /* Filled in by compute_size_with_overflow below. */
+ size_t size = 0;
+ /*
+ * For unaligned allocations, we need only ind. For aligned
+ * allocations, or in case of stats or profiling we need usize.
+ *
+ * These are actually dead stores, in that their values are reset before
+ * any branch on their value is taken. Sometimes though, it's
+ * convenient to pass them as arguments before this point. To avoid
+ * undefined behavior then, we initialize them with dummy stores.
+ */
+ szind_t ind = 0;
+ size_t usize = 0;
+
+ /* Reentrancy is only checked on slow path. */
+ int8_t reentrancy_level;
+
+ /* Compute the amount of memory the user wants. */
+ if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
+ &size))) {
goto label_oom;
- } else {
- if (size == 0)
- size = 1;
+ }
- /* Make sure that alignment is a large enough power of 2. */
- if (((alignment - 1) & alignment) != 0
- || (alignment < min_alignment)) {
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error allocating "
- "aligned memory: invalid alignment\n");
- abort();
- }
- result = NULL;
- ret = EINVAL;
- goto label_return;
+ /* Validate the user input. */
+ if (sopts->bump_empty_alloc) {
+ if (unlikely(size == 0)) {
+ size = 1;
}
+ }
+
+ if (sopts->assert_nonempty_alloc) {
+ assert (size != 0);
+ }
- usize = sa2u(size, alignment);
- if (usize == 0) {
- result = NULL;
+ if (unlikely(dopts->alignment < sopts->min_alignment
+ || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
+ goto label_invalid_alignment;
+ }
+
+ /* This is the beginning of the "core" algorithm. */
+
+ if (dopts->alignment == 0) {
+ ind = sz_size2index(size);
+ if (unlikely(ind >= NSIZES)) {
goto label_oom;
}
+ if (config_stats || (config_prof && opt_prof)) {
+ usize = sz_index2size(ind);
+ assert(usize > 0 && usize <= LARGE_MAXCLASS);
+ }
+ } else {
+ usize = sz_sa2u(size, dopts->alignment);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ goto label_oom;
+ }
+ }
- if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ /*
+ * If we need to handle reentrancy, we can do it out of a
+ * known-initialized arena (i.e. arena 0).
+ */
+ reentrancy_level = tsd_reentrancy_level_get(tsd);
+ if (sopts->slow && unlikely(reentrancy_level > 0)) {
+ /*
+ * We should never specify particular arenas or tcaches from
+ * within our internal allocations.
+ */
+ assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
+ dopts->tcache_ind == TCACHE_IND_NONE);
+ assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
+ dopts->tcache_ind = TCACHE_IND_NONE;
+ /* We know that arena 0 has already been initialized. */
+ dopts->arena_ind = 0;
+ }
+
+ /* If profiling is on, get our profiling context. */
+ if (config_prof && opt_prof) {
+ /*
+ * Note that if we're going down this path, usize must have been
+ * initialized in the previous if statement.
+ */
+ prof_tctx_t *tctx = prof_alloc_prep(
+ tsd, usize, prof_active_get_unlocked(), true);
+
+ alloc_ctx_t alloc_ctx;
+ if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+ alloc_ctx.slab = (usize <= SMALL_MAXCLASS);
+ allocation = imalloc_no_sample(
+ sopts, dopts, tsd, usize, usize, ind);
+ } else if ((uintptr_t)tctx > (uintptr_t)1U) {
+ /*
+ * Note that ind might still be 0 here. This is fine;
+ * imalloc_sample ignores ind if dopts->alignment > 0.
+ */
+ allocation = imalloc_sample(
+ sopts, dopts, tsd, usize, ind);
+ alloc_ctx.slab = false;
+ } else {
+ allocation = NULL;
+ }
- PROF_ALLOC_PREP(2, usize, cnt);
- result = imemalign_prof(alignment, usize, cnt);
- } else
- result = ipalloc(usize, alignment, false);
- if (result == NULL)
+ if (unlikely(allocation == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
goto label_oom;
+ }
+ prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
+ } else {
+ /*
+ * If dopts->alignment > 0, then ind is still 0, but usize was
+ * computed in the previous if statement. Down the positive
+ * alignment path, imalloc_no_sample ignores ind and size
+ * (relying only on usize).
+ */
+ allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
+ ind);
+ if (unlikely(allocation == NULL)) {
+ goto label_oom;
+ }
}
- *memptr = result;
- ret = 0;
-label_return:
- if (config_stats && result != NULL) {
- assert(usize == isalloc(result, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
+ /*
+ * Allocation has been done at this point. We still have some
+ * post-allocation work to do though.
+ */
+ assert(dopts->alignment == 0
+ || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
+
+ if (config_stats) {
+ assert(usize == isalloc(tsd_tsdn(tsd), allocation));
+ *tsd_thread_allocatedp_get(tsd) += usize;
}
- UTRACE(0, size, result);
- return (ret);
+
+ if (sopts->slow) {
+ UTRACE(0, size, allocation);
+ }
+
+ /* Success! */
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ *dopts->result = allocation;
+ return 0;
+
label_oom:
- assert(result == NULL);
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error allocating aligned memory: "
- "out of memory\n");
+ if (unlikely(sopts->slow) && config_xmalloc && unlikely(opt_xmalloc)) {
+ malloc_write(sopts->oom_string);
+ abort();
+ }
+
+ if (sopts->slow) {
+ UTRACE(NULL, size, NULL);
+ }
+
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ if (sopts->set_errno_on_error) {
+ set_errno(ENOMEM);
+ }
+
+ if (sopts->null_out_result_on_error) {
+ *dopts->result = NULL;
+ }
+
+ return ENOMEM;
+
+ /*
+ * This label is only jumped to by one goto; we move it out of line
+ * anyways to avoid obscuring the non-error paths, and for symmetry with
+ * the oom case.
+ */
+label_invalid_alignment:
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
+ malloc_write(sopts->invalid_alignment_string);
abort();
}
- ret = ENOMEM;
- goto label_return;
+
+ if (sopts->set_errno_on_error) {
+ set_errno(EINVAL);
+ }
+
+ if (sopts->slow) {
+ UTRACE(NULL, size, NULL);
+ }
+
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ if (sopts->null_out_result_on_error) {
+ *dopts->result = NULL;
+ }
+
+ return EINVAL;
}
-int
-je_posix_memalign(void **memptr, size_t alignment, size_t size)
-{
- int ret = imemalign(memptr, alignment, size, sizeof(void *));
- JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
- config_prof), false);
- return (ret);
+/* Returns the errno-style error code of the allocation. */
+JEMALLOC_ALWAYS_INLINE int
+imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
+ if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
+ malloc_write(sopts->oom_string);
+ abort();
+ }
+ UTRACE(NULL, dopts->num_items * dopts->item_size, NULL);
+ set_errno(ENOMEM);
+ *dopts->result = NULL;
+
+ return ENOMEM;
+ }
+
+ /* We always need the tsd. Let's grab it right away. */
+ tsd_t *tsd = tsd_fetch();
+ assert(tsd);
+ if (likely(tsd_fast(tsd))) {
+ /* Fast and common path. */
+ tsd_assert_fast(tsd);
+ sopts->slow = false;
+ return imalloc_body(sopts, dopts, tsd);
+ } else {
+ sopts->slow = true;
+ return imalloc_body(sopts, dopts, tsd);
+ }
}
+/******************************************************************************/
+/*
+ * Begin malloc(3)-compatible functions.
+ */
-void *
-je_aligned_alloc(size_t alignment, size_t size)
-{
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+je_malloc(size_t size) {
void *ret;
- int err;
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
- if ((err = imemalign(&ret, alignment, size, 1)) != 0) {
- ret = NULL;
- set_errno(err);
- }
- JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
- false);
- return (ret);
+ LOG("core.malloc.entry", "size: %zu", size);
+
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
+
+ sopts.bump_empty_alloc = true;
+ sopts.null_out_result_on_error = true;
+ sopts.set_errno_on_error = true;
+ sopts.oom_string = "<jemalloc>: Error in malloc(): out of memory\n";
+
+ dopts.result = &ret;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+
+ imalloc(&sopts, &dopts);
+
+ LOG("core.malloc.exit", "result: %p", ret);
+
+ return ret;
}
-static void *
-icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+JEMALLOC_ATTR(nonnull(1))
+je_posix_memalign(void **memptr, size_t alignment, size_t size) {
+ int ret;
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
+
+ LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
+ "size: %zu", memptr, alignment, size);
+
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = icalloc(SMALL_MAXCLASS+1);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
- } else
- p = icalloc(usize);
+ sopts.bump_empty_alloc = true;
+ sopts.min_alignment = sizeof(void *);
+ sopts.oom_string =
+ "<jemalloc>: Error allocating aligned memory: out of memory\n";
+ sopts.invalid_alignment_string =
+ "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
- return (p);
+ dopts.result = memptr;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+ dopts.alignment = alignment;
+
+ ret = imalloc(&sopts, &dopts);
+
+ LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
+ *memptr);
+
+ return ret;
}
-JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(size_t usize, prof_thr_cnt_t *cnt)
-{
- void *p;
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
+je_aligned_alloc(size_t alignment, size_t size) {
+ void *ret;
+
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = icalloc_prof_sample(usize, cnt);
- else
- p = icalloc(usize);
- if (p == NULL)
- return (NULL);
- prof_malloc(p, usize, cnt);
+ LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
+ alignment, size);
- return (p);
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
+
+ sopts.bump_empty_alloc = true;
+ sopts.null_out_result_on_error = true;
+ sopts.set_errno_on_error = true;
+ sopts.min_alignment = 1;
+ sopts.oom_string =
+ "<jemalloc>: Error allocating aligned memory: out of memory\n";
+ sopts.invalid_alignment_string =
+ "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+ dopts.result = &ret;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+ dopts.alignment = alignment;
+
+ imalloc(&sopts, &dopts);
+
+ LOG("core.aligned_alloc.exit", "result: %p", ret);
+
+ return ret;
}
-void *
-je_calloc(size_t num, size_t size)
-{
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
+je_calloc(size_t num, size_t size) {
void *ret;
- size_t num_size;
- size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
- if (malloc_init()) {
- num_size = 0;
- ret = NULL;
- goto label_return;
- }
+ LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
- num_size = num * size;
- if (num_size == 0) {
- if (num == 0 || size == 0)
- num_size = 1;
- else {
- ret = NULL;
- goto label_return;
- }
- /*
- * Try to avoid division here. We know that it isn't possible to
- * overflow during multiplication if neither operand uses any of the
- * most significant half of the bits in a size_t.
- */
- } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
- && (num_size / size != num)) {
- /* size_t overflow. */
- ret = NULL;
- goto label_return;
- }
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
- if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
+ sopts.may_overflow = true;
+ sopts.bump_empty_alloc = true;
+ sopts.null_out_result_on_error = true;
+ sopts.set_errno_on_error = true;
+ sopts.oom_string = "<jemalloc>: Error in calloc(): out of memory\n";
- usize = s2u(num_size);
- PROF_ALLOC_PREP(1, usize, cnt);
- ret = icalloc_prof(usize, cnt);
- } else {
- if (config_stats || (config_valgrind && opt_valgrind))
- usize = s2u(num_size);
- ret = icalloc(num_size);
- }
+ dopts.result = &ret;
+ dopts.num_items = num;
+ dopts.item_size = size;
+ dopts.zero = true;
-label_return:
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error in calloc(): out of "
- "memory\n");
- abort();
- }
- set_errno(ENOMEM);
- }
- if (config_stats && ret != NULL) {
- assert(usize == isalloc(ret, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
- }
- UTRACE(0, num_size, ret);
- JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
- return (ret);
+ imalloc(&sopts, &dopts);
+
+ LOG("core.calloc.exit", "result: %p", ret);
+
+ return ret;
}
static void *
-irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt)
-{
+irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
+ prof_tctx_t *tctx) {
void *p;
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
- } else
- p = iralloc(oldptr, usize, 0, 0, false);
+ if (tctx == NULL) {
+ return NULL;
+ }
+ if (usize <= SMALL_MAXCLASS) {
+ p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
+ if (p == NULL) {
+ return NULL;
+ }
+ arena_prof_promote(tsd_tsdn(tsd), p, usize);
+ } else {
+ p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+ }
- return (p);
+ return p;
}
-JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt)
-{
+JEMALLOC_ALWAYS_INLINE void *
+irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
+ alloc_ctx_t *alloc_ctx) {
void *p;
- prof_ctx_t *old_ctx;
-
- old_ctx = prof_ctx_get(oldptr);
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = irealloc_prof_sample(oldptr, usize, cnt);
- else
- p = iralloc(oldptr, usize, 0, 0, false);
- if (p == NULL)
- return (NULL);
- prof_realloc(p, usize, cnt, old_usize, old_ctx);
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
+
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
+ tctx = prof_alloc_prep(tsd, usize, prof_active, true);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+ p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
+ } else {
+ p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+ }
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, true);
+ return NULL;
+ }
+ prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
+ old_tctx);
- return (p);
+ return p;
}
-JEMALLOC_INLINE_C void
-ifree(void *ptr)
-{
+JEMALLOC_ALWAYS_INLINE void
+ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
+ if (!slow_path) {
+ tsd_assert_fast(tsd);
+ }
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ if (tsd_reentrancy_level_get(tsd) != 0) {
+ assert(slow_path);
+ }
+
+ assert(ptr != NULL);
+ assert(malloc_initialized() || IS_INITIALIZER);
+
+ alloc_ctx_t alloc_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind != NSIZES);
+
size_t usize;
- UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
+ if (config_prof && opt_prof) {
+ usize = sz_index2size(alloc_ctx.szind);
+ prof_free(tsd, ptr, usize, &alloc_ctx);
+ } else if (config_stats) {
+ usize = sz_index2size(alloc_ctx.szind);
+ }
+ if (config_stats) {
+ *tsd_thread_deallocatedp_get(tsd) += usize;
+ }
+
+ if (likely(!slow_path)) {
+ idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
+ false);
+ } else {
+ idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
+ true);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
+ if (!slow_path) {
+ tsd_assert_fast(tsd);
+ }
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ if (tsd_reentrancy_level_get(tsd) != 0) {
+ assert(slow_path);
+ }
assert(ptr != NULL);
- assert(malloc_initialized || IS_INITIALIZER);
+ assert(malloc_initialized() || IS_INITIALIZER);
+
+ alloc_ctx_t alloc_ctx, *ctx;
+ if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
+ /*
+ * When cache_oblivious is disabled and ptr is not page aligned,
+ * the allocation was not sampled -- usize can be used to
+ * determine szind directly.
+ */
+ alloc_ctx.szind = sz_size2index(usize);
+ alloc_ctx.slab = true;
+ ctx = &alloc_ctx;
+ if (config_debug) {
+ alloc_ctx_t dbg_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
+ rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
+ &dbg_ctx.slab);
+ assert(dbg_ctx.szind == alloc_ctx.szind);
+ assert(dbg_ctx.slab == alloc_ctx.slab);
+ }
+ } else if (config_prof && opt_prof) {
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind == sz_size2index(usize));
+ ctx = &alloc_ctx;
+ } else {
+ ctx = NULL;
+ }
if (config_prof && opt_prof) {
- usize = isalloc(ptr, config_prof);
- prof_free(ptr, usize);
- } else if (config_stats || config_valgrind)
- usize = isalloc(ptr, config_prof);
- if (config_stats)
- thread_allocated_tsd_get()->deallocated += usize;
- if (config_valgrind && opt_valgrind)
- rzsize = p2rz(ptr);
- iqalloc(ptr);
- JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+ prof_free(tsd, ptr, usize, ctx);
+ }
+ if (config_stats) {
+ *tsd_thread_deallocatedp_get(tsd) += usize;
+ }
+
+ if (likely(!slow_path)) {
+ isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
+ } else {
+ isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
+ }
}
-void *
-je_realloc(void *ptr, size_t size)
-{
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_realloc(void *ptr, size_t size) {
void *ret;
+ tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
size_t old_usize = 0;
- UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- if (size == 0) {
+ LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+
+ if (unlikely(size == 0)) {
if (ptr != NULL) {
/* realloc(ptr, 0) is equivalent to free(ptr). */
UTRACE(ptr, 0, 0);
- ifree(ptr);
- return (NULL);
+ tcache_t *tcache;
+ tsd_t *tsd = tsd_fetch();
+ if (tsd_reentrancy_level_get(tsd) == 0) {
+ tcache = tcache_get(tsd);
+ } else {
+ tcache = NULL;
+ }
+ ifree(tsd, ptr, tcache, true);
+
+ LOG("core.realloc.exit", "result: %p", NULL);
+ return NULL;
}
size = 1;
}
- if (ptr != NULL) {
- assert(malloc_initialized || IS_INITIALIZER);
- malloc_thread_init();
+ if (likely(ptr != NULL)) {
+ assert(malloc_initialized() || IS_INITIALIZER);
+ tsd_t *tsd = tsd_fetch();
- if ((config_prof && opt_prof) || config_stats ||
- (config_valgrind && opt_valgrind))
- old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
- old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ alloc_ctx_t alloc_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind != NSIZES);
+ old_usize = sz_index2size(alloc_ctx.szind);
+ assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
- usize = s2u(size);
- PROF_ALLOC_PREP(1, usize, cnt);
- ret = irealloc_prof(ptr, old_usize, usize, cnt);
+ usize = sz_s2u(size);
+ ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
+ NULL : irealloc_prof(tsd, ptr, old_usize, usize,
+ &alloc_ctx);
} else {
- if (config_stats || (config_valgrind && opt_valgrind))
- usize = s2u(size);
- ret = iralloc(ptr, size, 0, 0, false);
+ if (config_stats) {
+ usize = sz_s2u(size);
+ }
+ ret = iralloc(tsd, ptr, old_usize, size, 0, false);
}
+ tsdn = tsd_tsdn(tsd);
} else {
/* realloc(NULL, size) is equivalent to malloc(size). */
- MALLOC_BODY(ret, size, usize);
+ void *ret = je_malloc(size);
+ LOG("core.realloc.exit", "result: %p", ret);
+ return ret;
}
- if (ret == NULL) {
- if (config_xmalloc && opt_xmalloc) {
+ if (unlikely(ret == NULL)) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in realloc(): "
"out of memory\n");
abort();
}
set_errno(ENOMEM);
}
- if (config_stats && ret != NULL) {
- thread_allocated_t *ta;
- assert(usize == isalloc(ret, config_prof));
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ if (config_stats && likely(ret != NULL)) {
+ tsd_t *tsd;
+
+ assert(usize == isalloc(tsdn, ret));
+ tsd = tsdn_tsd(tsdn);
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
UTRACE(ptr, size, ret);
- JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize,
- false);
- return (ret);
+ check_entry_exit_locking(tsdn);
+
+ LOG("core.realloc.exit", "result: %p", ret);
+ return ret;
}
-void
-je_free(void *ptr)
-{
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free(void *ptr) {
+ LOG("core.free.entry", "ptr: %p", ptr);
UTRACE(ptr, 0, 0);
- if (ptr != NULL)
- ifree(ptr);
+ if (likely(ptr != NULL)) {
+ /*
+ * We avoid setting up tsd fully (e.g. tcache, arena binding)
+ * based on only free() calls -- other activities trigger the
+ * minimal to full transition. This is because free() may
+ * happen during thread shutdown after tls deallocation: if a
+ * thread never had any malloc activities until then, a
+ * fully-setup tsd won't be destructed properly.
+ */
+ tsd_t *tsd = tsd_fetch_min();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ tcache_t *tcache;
+ if (likely(tsd_fast(tsd))) {
+ tsd_assert_fast(tsd);
+ /* Unconditionally get tcache ptr on fast path. */
+ tcache = tsd_tcachep_get(tsd);
+ ifree(tsd, ptr, tcache, false);
+ } else {
+ if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
+ tcache = tcache_get(tsd);
+ } else {
+ tcache = NULL;
+ }
+ ifree(tsd, ptr, tcache, true);
+ }
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ }
+ LOG("core.free.exit", "");
}
/*
@@ -1317,36 +2413,76 @@ je_free(void *ptr)
*/
#ifdef JEMALLOC_OVERRIDE_MEMALIGN
-void *
-je_memalign(size_t alignment, size_t size)
-{
- void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
- imemalign(&ret, alignment, size, 1);
- JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
- return (ret);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
+je_memalign(size_t alignment, size_t size) {
+ void *ret;
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
+
+ LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
+ size);
+
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
+
+ sopts.bump_empty_alloc = true;
+ sopts.min_alignment = 1;
+ sopts.oom_string =
+ "<jemalloc>: Error allocating aligned memory: out of memory\n";
+ sopts.invalid_alignment_string =
+ "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+ sopts.null_out_result_on_error = true;
+
+ dopts.result = &ret;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+ dopts.alignment = alignment;
+
+ imalloc(&sopts, &dopts);
+
+ LOG("core.memalign.exit", "result: %p", ret);
+ return ret;
}
#endif
#ifdef JEMALLOC_OVERRIDE_VALLOC
-void *
-je_valloc(size_t size)
-{
- void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
- imemalign(&ret, PAGE, size, 1);
- JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
- return (ret);
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
+je_valloc(size_t size) {
+ void *ret;
+
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
+
+ LOG("core.valloc.entry", "size: %zu\n", size);
+
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
+
+ sopts.bump_empty_alloc = true;
+ sopts.null_out_result_on_error = true;
+ sopts.min_alignment = PAGE;
+ sopts.oom_string =
+ "<jemalloc>: Error allocating aligned memory: out of memory\n";
+ sopts.invalid_alignment_string =
+ "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+ dopts.result = &ret;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+ dopts.alignment = PAGE;
+
+ imalloc(&sopts, &dopts);
+
+ LOG("core.valloc.exit", "result: %p\n", ret);
+ return ret;
}
#endif
-/*
- * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
- * #define je_malloc malloc
- */
-#define malloc_is_malloc 1
-#define is_malloc_(a) malloc_is_ ## a
-#define is_malloc(a) is_malloc_(a)
-
-#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__))
+#if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
/*
* glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
* to inconsistently reference libc's malloc(3)-compatible functions
@@ -1356,11 +2492,47 @@ je_valloc(size_t size)
* passed an extra argument for the caller return address, which will be
* ignored.
*/
-JEMALLOC_EXPORT void (* __free_hook)(void *ptr) = je_free;
-JEMALLOC_EXPORT void *(* __malloc_hook)(size_t size) = je_malloc;
-JEMALLOC_EXPORT void *(* __realloc_hook)(void *ptr, size_t size) = je_realloc;
-JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) =
+JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
+JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
+JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
+# ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
je_memalign;
+# endif
+
+# ifdef CPU_COUNT
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+# define ALIAS(je_fn) __attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+# define PREALIAS(je_fn) ALIAS(je_fn)
+# ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
+void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+# endif
+# ifdef JEMALLOC_OVERRIDE___LIBC_FREE
+void __libc_free(void* ptr) PREALIAS(je_free);
+# endif
+# ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
+void *__libc_malloc(size_t size) PREALIAS(je_malloc);
+# endif
+# ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+# endif
+# ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
+void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+# endif
+# ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
+void *__libc_valloc(size_t size) PREALIAS(je_valloc);
+# endif
+# ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
+# endif
+# undef PREALIAS
+# undef ALIAS
+# endif
#endif
/*
@@ -1371,162 +2543,102 @@ JEMALLOC_EXPORT void *(* __memalign_hook)(size_t alignment, size_t size) =
* Begin non-standard functions.
*/
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena)
-{
-
- assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize,
- alignment)));
-
- if (alignment != 0)
- return (ipalloct(usize, alignment, zero, try_tcache, arena));
- else if (zero)
- return (icalloct(usize, try_tcache, arena));
- else
- return (imalloct(usize, try_tcache, arena));
-}
-
-static void *
-imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_thr_cnt_t *cnt)
-{
- void *p;
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+je_mallocx(size_t size, int flags) {
+ void *ret;
+ static_opts_t sopts;
+ dynamic_opts_t dopts;
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- size_t usize_promoted = (alignment == 0) ?
- s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment);
- assert(usize_promoted != 0);
- p = imallocx(usize_promoted, alignment, zero, try_tcache,
- arena);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
-
- return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_thr_cnt_t *cnt)
-{
- void *p;
+ LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
- if ((uintptr_t)cnt != (uintptr_t)1U) {
- p = imallocx_prof_sample(usize, alignment, zero, try_tcache,
- arena, cnt);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
- if (p == NULL)
- return (NULL);
- prof_malloc(p, usize, cnt);
+ static_opts_init(&sopts);
+ dynamic_opts_init(&dopts);
- return (p);
-}
+ sopts.assert_nonempty_alloc = true;
+ sopts.null_out_result_on_error = true;
+ sopts.oom_string = "<jemalloc>: Error in mallocx(): out of memory\n";
-void *
-je_mallocx(size_t size, int flags)
-{
- void *p;
- size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
- bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- arena_t *arena;
- bool try_tcache;
+ dopts.result = &ret;
+ dopts.num_items = 1;
+ dopts.item_size = size;
+ if (unlikely(flags != 0)) {
+ if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
+ dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+ }
- assert(size != 0);
+ dopts.zero = MALLOCX_ZERO_GET(flags);
- if (malloc_init())
- goto label_oom;
+ if ((flags & MALLOCX_TCACHE_MASK) != 0) {
+ if ((flags & MALLOCX_TCACHE_MASK)
+ == MALLOCX_TCACHE_NONE) {
+ dopts.tcache_ind = TCACHE_IND_NONE;
+ } else {
+ dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
+ }
+ } else {
+ dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
+ }
- if (arena_ind != UINT_MAX) {
- arena = arenas[arena_ind];
- try_tcache = false;
- } else {
- arena = NULL;
- try_tcache = true;
+ if ((flags & MALLOCX_ARENA_MASK) != 0)
+ dopts.arena_ind = MALLOCX_ARENA_GET(flags);
}
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
+ imalloc(&sopts, &dopts);
- if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
- PROF_ALLOC_PREP(1, usize, cnt);
- p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
- cnt);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
- if (p == NULL)
- goto label_oom;
-
- if (config_stats) {
- assert(usize == isalloc(p, config_prof));
- thread_allocated_tsd_get()->allocated += usize;
- }
- UTRACE(0, size, p);
- JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero);
- return (p);
-label_oom:
- if (config_xmalloc && opt_xmalloc) {
- malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
- abort();
- }
- UTRACE(0, size, 0);
- return (NULL);
+ LOG("core.mallocx.exit", "result: %p", ret);
+ return ret;
}
static void *
-irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
- bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena,
- prof_thr_cnt_t *cnt)
-{
+irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
+ size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+ prof_tctx_t *tctx) {
void *p;
- if (cnt == NULL)
- return (NULL);
- if (prof_promote && usize <= SMALL_MAXCLASS) {
- p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
- size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
- if (p == NULL)
- return (NULL);
- arena_prof_promoted(p, usize);
+ if (tctx == NULL) {
+ return NULL;
+ }
+ if (usize <= SMALL_MAXCLASS) {
+ p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
+ alignment, zero, tcache, arena);
+ if (p == NULL) {
+ return NULL;
+ }
+ arena_prof_promote(tsdn, p, usize);
} else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
+ tcache, arena);
}
- return (p);
+ return p;
}
-JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
- size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
- arena_t *arena, prof_thr_cnt_t *cnt)
-{
+JEMALLOC_ALWAYS_INLINE void *
+irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
+ size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+ arena_t *arena, alloc_ctx_t *alloc_ctx) {
void *p;
- prof_ctx_t *old_ctx;
-
- old_ctx = prof_ctx_get(oldptr);
- if ((uintptr_t)cnt != (uintptr_t)1U)
- p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
- try_tcache_alloc, try_tcache_dalloc, arena, cnt);
- else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
+
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
+ tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+ p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
+ *usize, alignment, zero, tcache, arena, tctx);
+ } else {
+ p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
+ zero, tcache, arena);
+ }
+ if (unlikely(p == NULL)) {
+ prof_alloc_rollback(tsd, tctx, false);
+ return NULL;
}
- if (p == NULL)
- return (NULL);
- if (p == oldptr && alignment != 0) {
+ if (p == old_ptr && alignment != 0) {
/*
* The allocation did not move, so it is possible that the size
* class is smaller than would guarantee the requested
@@ -1535,421 +2647,516 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
* be the same as the current usize because of in-place large
* reallocation. Therefore, query the actual value of usize.
*/
- *usize = isalloc(p, config_prof);
+ *usize = isalloc(tsd_tsdn(tsd), p);
}
- prof_realloc(p, *usize, cnt, old_usize, old_ctx);
+ prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
+ old_usize, old_tctx);
- return (p);
+ return p;
}
-void *
-je_rallocx(void *ptr, size_t size, int flags)
-{
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_rallocx(void *ptr, size_t size, int flags) {
void *p;
- size_t usize, old_usize;
- UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ tsd_t *tsd;
+ size_t usize;
+ size_t old_usize;
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- bool try_tcache_alloc, try_tcache_dalloc;
arena_t *arena;
+ tcache_t *tcache;
+
+ LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+ size, flags);
+
assert(ptr != NULL);
assert(size != 0);
- assert(malloc_initialized || IS_INITIALIZER);
- malloc_thread_init();
-
- if (arena_ind != UINT_MAX) {
- arena_chunk_t *chunk;
- try_tcache_alloc = false;
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache_dalloc = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
- arena = arenas[arena_ind];
+ assert(malloc_initialized() || IS_INITIALIZER);
+ tsd = tsd_fetch();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
+ if (unlikely(arena == NULL)) {
+ goto label_oom;
+ }
} else {
- try_tcache_alloc = true;
- try_tcache_dalloc = true;
arena = NULL;
}
- if ((config_prof && opt_prof) || config_stats ||
- (config_valgrind && opt_valgrind))
- old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_usize);
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
+ tcache = NULL;
+ } else {
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ }
+ } else {
+ tcache = tcache_get(tsd);
+ }
+ alloc_ctx_t alloc_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind != NSIZES);
+ old_usize = sz_index2size(alloc_ctx.szind);
+ assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
-
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
- PROF_ALLOC_PREP(1, usize, cnt);
- p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero,
- try_tcache_alloc, try_tcache_dalloc, arena, cnt);
- if (p == NULL)
+ usize = (alignment == 0) ?
+ sz_s2u(size) : sz_sa2u(size, alignment);
+ if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+ goto label_oom;
+ }
+ p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
+ zero, tcache, arena, &alloc_ctx);
+ if (unlikely(p == NULL)) {
goto label_oom;
+ }
} else {
- p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc,
- try_tcache_dalloc, arena);
- if (p == NULL)
+ p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
+ zero, tcache, arena);
+ if (unlikely(p == NULL)) {
goto label_oom;
- if (config_stats || (config_valgrind && opt_valgrind))
- usize = isalloc(p, config_prof);
+ }
+ if (config_stats) {
+ usize = isalloc(tsd_tsdn(tsd), p);
+ }
}
+ assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
if (config_stats) {
- thread_allocated_t *ta;
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
UTRACE(ptr, size, p);
- JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero);
- return (p);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ LOG("core.rallocx.exit", "result: %p", p);
+ return p;
label_oom:
- if (config_xmalloc && opt_xmalloc) {
+ if (config_xmalloc && unlikely(opt_xmalloc)) {
malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
abort();
}
UTRACE(ptr, size, 0);
- return (NULL);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ LOG("core.rallocx.exit", "result: %p", NULL);
+ return NULL;
}
-JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, bool zero, arena_t *arena)
-{
+JEMALLOC_ALWAYS_INLINE size_t
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
+ size_t extra, size_t alignment, bool zero) {
size_t usize;
- if (ixalloc(ptr, size, extra, alignment, zero))
- return (old_usize);
- usize = isalloc(ptr, config_prof);
+ if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) {
+ return old_usize;
+ }
+ usize = isalloc(tsdn, ptr);
- return (usize);
+ return usize;
}
static size_t
-ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, size_t max_usize, bool zero, arena_t *arena,
- prof_thr_cnt_t *cnt)
-{
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
+ size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) {
size_t usize;
- if (cnt == NULL)
- return (old_usize);
- /* Use minimum usize to determine whether promotion may happen. */
- if (prof_promote && ((alignment == 0) ? s2u(size) : sa2u(size,
- alignment)) <= SMALL_MAXCLASS) {
- if (ixalloc(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
- size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1),
- alignment, zero))
- return (old_usize);
- usize = isalloc(ptr, config_prof);
- if (max_usize < PAGE)
- arena_prof_promoted(ptr, usize);
- } else {
- usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
+ if (tctx == NULL) {
+ return old_usize;
}
+ usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
+ zero);
- return (usize);
+ return usize;
}
-JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra,
- size_t alignment, size_t max_usize, bool zero, arena_t *arena,
- prof_thr_cnt_t *cnt)
-{
- size_t usize;
- prof_ctx_t *old_ctx;
+JEMALLOC_ALWAYS_INLINE size_t
+ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
+ size_t usize_max, usize;
+ bool prof_active;
+ prof_tctx_t *old_tctx, *tctx;
- old_ctx = prof_ctx_get(ptr);
- if ((uintptr_t)cnt != (uintptr_t)1U) {
- usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
- alignment, zero, max_usize, arena, cnt);
+ prof_active = prof_active_get_unlocked();
+ old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+ /*
+ * usize isn't knowable before ixalloc() returns when extra is non-zero.
+ * Therefore, compute its maximum possible value and use that in
+ * prof_alloc_prep() to decide whether to capture a backtrace.
+ * prof_realloc() will use the actual usize to decide whether to sample.
+ */
+ if (alignment == 0) {
+ usize_max = sz_s2u(size+extra);
+ assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS);
} else {
- usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
+ usize_max = sz_sa2u(size+extra, alignment);
+ if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) {
+ /*
+ * usize_max is out of range, and chances are that
+ * allocation will fail, but use the maximum possible
+ * value and carry on with prof_alloc_prep(), just in
+ * case allocation succeeds.
+ */
+ usize_max = LARGE_MAXCLASS;
+ }
}
- if (usize == old_usize)
- return (usize);
- prof_realloc(ptr, usize, cnt, old_usize, old_ctx);
+ tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
- return (usize);
+ if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+ usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+ size, extra, alignment, zero, tctx);
+ } else {
+ usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+ extra, alignment, zero);
+ }
+ if (usize == old_usize) {
+ prof_alloc_rollback(tsd, tctx, false);
+ return usize;
+ }
+ prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
+ old_tctx);
+
+ return usize;
}
-size_t
-je_xallocx(void *ptr, size_t size, size_t extra, int flags)
-{
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
+ tsd_t *tsd;
size_t usize, old_usize;
- UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- arena_t *arena;
+
+ LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
+ "flags: %d", ptr, size, extra, flags);
assert(ptr != NULL);
assert(size != 0);
assert(SIZE_T_MAX - size >= extra);
- assert(malloc_initialized || IS_INITIALIZER);
- malloc_thread_init();
-
- if (arena_ind != UINT_MAX)
- arena = arenas[arena_ind];
- else
- arena = NULL;
-
- old_usize = isalloc(ptr, config_prof);
- if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_usize);
+ assert(malloc_initialized() || IS_INITIALIZER);
+ tsd = tsd_fetch();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ alloc_ctx_t alloc_ctx;
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+ rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+ assert(alloc_ctx.szind != NSIZES);
+ old_usize = sz_index2size(alloc_ctx.szind);
+ assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+ /*
+ * The API explicitly absolves itself of protecting against (size +
+ * extra) numerical overflow, but we may need to clamp extra to avoid
+ * exceeding LARGE_MAXCLASS.
+ *
+ * Ordinarily, size limit checking is handled deeper down, but here we
+ * have to check as part of (size + extra) clamping, since we need the
+ * clamped value in the above helper functions.
+ */
+ if (unlikely(size > LARGE_MAXCLASS)) {
+ usize = old_usize;
+ goto label_not_resized;
+ }
+ if (unlikely(LARGE_MAXCLASS - size < extra)) {
+ extra = LARGE_MAXCLASS - size;
+ }
if (config_prof && opt_prof) {
- prof_thr_cnt_t *cnt;
- /*
- * usize isn't knowable before ixalloc() returns when extra is
- * non-zero. Therefore, compute its maximum possible value and
- * use that in PROF_ALLOC_PREP() to decide whether to capture a
- * backtrace. prof_realloc() will use the actual usize to
- * decide whether to sample.
- */
- size_t max_usize = (alignment == 0) ? s2u(size+extra) :
- sa2u(size+extra, alignment);
- PROF_ALLOC_PREP(1, max_usize, cnt);
- usize = ixallocx_prof(ptr, old_usize, size, extra, alignment,
- max_usize, zero, arena, cnt);
+ usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
+ alignment, zero, &alloc_ctx);
} else {
- usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
- zero, arena);
+ usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+ extra, alignment, zero);
}
- if (usize == old_usize)
+ if (unlikely(usize == old_usize)) {
goto label_not_resized;
+ }
if (config_stats) {
- thread_allocated_t *ta;
- ta = thread_allocated_tsd_get();
- ta->allocated += usize;
- ta->deallocated += old_usize;
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ *tsd_thread_deallocatedp_get(tsd) += old_usize;
}
- JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero);
label_not_resized:
UTRACE(ptr, size, ptr);
- return (usize);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ LOG("core.xallocx.exit", "result: %zu", usize);
+ return usize;
}
-size_t
-je_sallocx(const void *ptr, int flags)
-{
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+JEMALLOC_ATTR(pure)
+je_sallocx(const void *ptr, UNUSED int flags) {
size_t usize;
+ tsdn_t *tsdn;
- assert(malloc_initialized || IS_INITIALIZER);
- malloc_thread_init();
+ LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
- if (config_ivsalloc)
- usize = ivsalloc(ptr, config_prof);
- else {
- assert(ptr != NULL);
- usize = isalloc(ptr, config_prof);
+ assert(malloc_initialized() || IS_INITIALIZER);
+ assert(ptr != NULL);
+
+ tsdn = tsdn_fetch();
+ check_entry_exit_locking(tsdn);
+
+ if (config_debug || force_ivsalloc) {
+ usize = ivsalloc(tsdn, ptr);
+ assert(force_ivsalloc || usize != 0);
+ } else {
+ usize = isalloc(tsdn, ptr);
}
- return (usize);
+ check_entry_exit_locking(tsdn);
+
+ LOG("core.sallocx.exit", "result: %zu", usize);
+ return usize;
}
-void
-je_dallocx(void *ptr, int flags)
-{
- size_t usize;
- UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- bool try_tcache;
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_dallocx(void *ptr, int flags) {
+ LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
assert(ptr != NULL);
- assert(malloc_initialized || IS_INITIALIZER);
-
- if (arena_ind != UINT_MAX) {
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- try_tcache = (chunk == ptr || chunk->arena !=
- arenas[arena_ind]);
- } else
- try_tcache = true;
+ assert(malloc_initialized() || IS_INITIALIZER);
+
+ tsd_t *tsd = tsd_fetch();
+ bool fast = tsd_fast(tsd);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ tcache_t *tcache;
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ /* Not allowed to be reentrant and specify a custom tcache. */
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
+ tcache = NULL;
+ } else {
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ }
+ } else {
+ if (likely(fast)) {
+ tcache = tsd_tcachep_get(tsd);
+ assert(tcache == tcache_get(tsd));
+ } else {
+ if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
+ tcache = tcache_get(tsd);
+ } else {
+ tcache = NULL;
+ }
+ }
+ }
UTRACE(ptr, 0, 0);
- if (config_stats || config_valgrind)
- usize = isalloc(ptr, config_prof);
- if (config_prof && opt_prof) {
- if (config_stats == false && config_valgrind == false)
- usize = isalloc(ptr, config_prof);
- prof_free(ptr, usize);
+ if (likely(fast)) {
+ tsd_assert_fast(tsd);
+ ifree(tsd, ptr, tcache, false);
+ } else {
+ ifree(tsd, ptr, tcache, true);
}
- if (config_stats)
- thread_allocated_tsd_get()->deallocated += usize;
- if (config_valgrind && opt_valgrind)
- rzsize = p2rz(ptr);
- iqalloct(ptr, try_tcache);
- JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ LOG("core.dallocx.exit", "");
}
-size_t
-je_nallocx(size_t size, int flags)
-{
+JEMALLOC_ALWAYS_INLINE size_t
+inallocx(tsdn_t *tsdn, size_t size, int flags) {
+ check_entry_exit_locking(tsdn);
+
size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
+ usize = sz_s2u(size);
+ } else {
+ usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+ }
+ check_entry_exit_locking(tsdn);
+ return usize;
+}
- assert(size != 0);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_sdallocx(void *ptr, size_t size, int flags) {
+ assert(ptr != NULL);
+ assert(malloc_initialized() || IS_INITIALIZER);
+
+ LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+ size, flags);
+
+ tsd_t *tsd = tsd_fetch();
+ bool fast = tsd_fast(tsd);
+ size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+ check_entry_exit_locking(tsd_tsdn(tsd));
+
+ tcache_t *tcache;
+ if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+ /* Not allowed to be reentrant and specify a custom tcache. */
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+ if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
+ tcache = NULL;
+ } else {
+ tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
+ }
+ } else {
+ if (likely(fast)) {
+ tcache = tsd_tcachep_get(tsd);
+ assert(tcache == tcache_get(tsd));
+ } else {
+ if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
+ tcache = tcache_get(tsd);
+ } else {
+ tcache = NULL;
+ }
+ }
+ }
- if (malloc_init())
- return (0);
+ UTRACE(ptr, 0, 0);
+ if (likely(fast)) {
+ tsd_assert_fast(tsd);
+ isfree(tsd, ptr, usize, tcache, false);
+ } else {
+ isfree(tsd, ptr, usize, tcache, true);
+ }
+ check_entry_exit_locking(tsd_tsdn(tsd));
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
- return (usize);
+ LOG("core.sdallocx.exit", "");
}
-int
-je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen)
-{
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+JEMALLOC_ATTR(pure)
+je_nallocx(size_t size, int flags) {
+ size_t usize;
+ tsdn_t *tsdn;
- if (malloc_init())
- return (EAGAIN);
+ assert(size != 0);
- return (ctl_byname(name, oldp, oldlenp, newp, newlen));
-}
+ if (unlikely(malloc_init())) {
+ LOG("core.nallocx.exit", "result: %zu", ZU(0));
+ return 0;
+ }
-int
-je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
-{
+ tsdn = tsdn_fetch();
+ check_entry_exit_locking(tsdn);
- if (malloc_init())
- return (EAGAIN);
+ usize = inallocx(tsdn, size, flags);
+ if (unlikely(usize > LARGE_MAXCLASS)) {
+ LOG("core.nallocx.exit", "result: %zu", ZU(0));
+ return 0;
+ }
- return (ctl_nametomib(name, mibp, miblenp));
+ check_entry_exit_locking(tsdn);
+ LOG("core.nallocx.exit", "result: %zu", usize);
+ return usize;
}
-int
-je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
-{
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen) {
+ int ret;
+ tsd_t *tsd;
- if (malloc_init())
- return (EAGAIN);
+ LOG("core.mallctl.entry", "name: %s", name);
- return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
-}
+ if (unlikely(malloc_init())) {
+ LOG("core.mallctl.exit", "result: %d", EAGAIN);
+ return EAGAIN;
+ }
-void
-je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *opts)
-{
+ tsd = tsd_fetch();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
+ check_entry_exit_locking(tsd_tsdn(tsd));
- stats_print(write_cb, cbopaque, opts);
+ LOG("core.mallctl.exit", "result: %d", ret);
+ return ret;
}
-size_t
-je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
-{
- size_t ret;
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
+ int ret;
+
+ LOG("core.mallctlnametomib.entry", "name: %s", name);
- assert(malloc_initialized || IS_INITIALIZER);
- malloc_thread_init();
+ if (unlikely(malloc_init())) {
+ LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN);
+ return EAGAIN;
+ }
- if (config_ivsalloc)
- ret = ivsalloc(ptr, config_prof);
- else
- ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0;
+ tsd_t *tsd = tsd_fetch();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ ret = ctl_nametomib(tsd, name, mibp, miblenp);
+ check_entry_exit_locking(tsd_tsdn(tsd));
- return (ret);
+ LOG("core.mallctlnametomib.exit", "result: %d", ret);
+ return ret;
}
-/*
- * End non-standard functions.
- */
-/******************************************************************************/
-/*
- * Begin experimental functions.
- */
-#ifdef JEMALLOC_EXPERIMENTAL
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen) {
+ int ret;
+ tsd_t *tsd;
-int
-je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
-{
- void *p;
+ LOG("core.mallctlbymib.entry", "");
- assert(ptr != NULL);
+ if (unlikely(malloc_init())) {
+ LOG("core.mallctlbymib.exit", "result: %d", EAGAIN);
+ return EAGAIN;
+ }
- p = je_mallocx(size, flags);
- if (p == NULL)
- return (ALLOCM_ERR_OOM);
- if (rsize != NULL)
- *rsize = isalloc(p, config_prof);
- *ptr = p;
- return (ALLOCM_SUCCESS);
+ tsd = tsd_fetch();
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+ check_entry_exit_locking(tsd_tsdn(tsd));
+ LOG("core.mallctlbymib.exit", "result: %d", ret);
+ return ret;
}
-int
-je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
-{
- int ret;
- bool no_move = flags & ALLOCM_NO_MOVE;
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+ const char *opts) {
+ tsdn_t *tsdn;
- assert(ptr != NULL);
- assert(*ptr != NULL);
- assert(size != 0);
- assert(SIZE_T_MAX - size >= extra);
+ LOG("core.malloc_stats_print.entry", "");
- if (no_move) {
- size_t usize = je_xallocx(*ptr, size, extra, flags);
- ret = (usize >= size) ? ALLOCM_SUCCESS : ALLOCM_ERR_NOT_MOVED;
- if (rsize != NULL)
- *rsize = usize;
- } else {
- void *p = je_rallocx(*ptr, size+extra, flags);
- if (p != NULL) {
- *ptr = p;
- ret = ALLOCM_SUCCESS;
- } else
- ret = ALLOCM_ERR_OOM;
- if (rsize != NULL)
- *rsize = isalloc(*ptr, config_prof);
- }
- return (ret);
+ tsdn = tsdn_fetch();
+ check_entry_exit_locking(tsdn);
+ stats_print(write_cb, cbopaque, opts);
+ check_entry_exit_locking(tsdn);
+ LOG("core.malloc_stats_print.exit", "");
}
-int
-je_sallocm(const void *ptr, size_t *rsize, int flags)
-{
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
+ size_t ret;
+ tsdn_t *tsdn;
- assert(rsize != NULL);
- *rsize = je_sallocx(ptr, flags);
- return (ALLOCM_SUCCESS);
-}
+ LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
-int
-je_dallocm(void *ptr, int flags)
-{
+ assert(malloc_initialized() || IS_INITIALIZER);
- je_dallocx(ptr, flags);
- return (ALLOCM_SUCCESS);
-}
+ tsdn = tsdn_fetch();
+ check_entry_exit_locking(tsdn);
-int
-je_nallocm(size_t *rsize, size_t size, int flags)
-{
- size_t usize;
+ if (unlikely(ptr == NULL)) {
+ ret = 0;
+ } else {
+ if (config_debug || force_ivsalloc) {
+ ret = ivsalloc(tsdn, ptr);
+ assert(force_ivsalloc || ret != 0);
+ } else {
+ ret = isalloc(tsdn, ptr);
+ }
+ }
- usize = je_nallocx(size, flags);
- if (usize == 0)
- return (ALLOCM_ERR_OOM);
- if (rsize != NULL)
- *rsize = usize;
- return (ALLOCM_SUCCESS);
+ check_entry_exit_locking(tsdn);
+ LOG("core.malloc_usable_size.exit", "result: %zu", ret);
+ return ret;
}
-#endif
/*
- * End experimental functions.
+ * End non-standard functions.
*/
/******************************************************************************/
/*
@@ -1966,17 +3173,17 @@ je_nallocm(size_t *rsize, size_t size, int flags)
* fork/malloc races via the following functions it registers during
* initialization using pthread_atfork(), but of course that does no good if
* the allocator isn't fully initialized at fork time. The following library
- * constructor is a partial solution to this problem. It may still possible to
- * trigger the deadlock described above, but doing so would involve forking via
- * a library constructor that runs before jemalloc's runs.
+ * constructor is a partial solution to this problem. It may still be possible
+ * to trigger the deadlock described above, but doing so would involve forking
+ * via a library constructor that runs before jemalloc's runs.
*/
+#ifndef JEMALLOC_JET
JEMALLOC_ATTR(constructor)
static void
-jemalloc_constructor(void)
-{
-
+jemalloc_constructor(void) {
malloc_init();
}
+#endif
#ifndef JEMALLOC_MUTEX_INIT_CB
void
@@ -1986,25 +3193,69 @@ JEMALLOC_EXPORT void
_malloc_prefork(void)
#endif
{
- unsigned i;
+ tsd_t *tsd;
+ unsigned i, j, narenas;
+ arena_t *arena;
#ifdef JEMALLOC_MUTEX_INIT_CB
- if (malloc_initialized == false)
+ if (!malloc_initialized()) {
return;
+ }
#endif
- assert(malloc_initialized);
+ assert(malloc_initialized());
+
+ tsd = tsd_fetch();
+ narenas = narenas_total_get();
+
+ witness_prefork(tsd_witness_tsdp_get(tsd));
/* Acquire all mutexes in a safe order. */
- ctl_prefork();
- prof_prefork();
- malloc_mutex_prefork(&arenas_lock);
- for (i = 0; i < narenas_total; i++) {
- if (arenas[i] != NULL)
- arena_prefork(arenas[i]);
+ ctl_prefork(tsd_tsdn(tsd));
+ tcache_prefork(tsd_tsdn(tsd));
+ malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+ if (have_background_thread) {
+ background_thread_prefork0(tsd_tsdn(tsd));
+ }
+ prof_prefork0(tsd_tsdn(tsd));
+ if (have_background_thread) {
+ background_thread_prefork1(tsd_tsdn(tsd));
}
- chunk_prefork();
- base_prefork();
- huge_prefork();
+ /* Break arena prefork into stages to preserve lock order. */
+ for (i = 0; i < 8; i++) {
+ for (j = 0; j < narenas; j++) {
+ if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
+ NULL) {
+ switch (i) {
+ case 0:
+ arena_prefork0(tsd_tsdn(tsd), arena);
+ break;
+ case 1:
+ arena_prefork1(tsd_tsdn(tsd), arena);
+ break;
+ case 2:
+ arena_prefork2(tsd_tsdn(tsd), arena);
+ break;
+ case 3:
+ arena_prefork3(tsd_tsdn(tsd), arena);
+ break;
+ case 4:
+ arena_prefork4(tsd_tsdn(tsd), arena);
+ break;
+ case 5:
+ arena_prefork5(tsd_tsdn(tsd), arena);
+ break;
+ case 6:
+ arena_prefork6(tsd_tsdn(tsd), arena);
+ break;
+ case 7:
+ arena_prefork7(tsd_tsdn(tsd), arena);
+ break;
+ default: not_reached();
+ }
+ }
+ }
+ }
+ prof_prefork1(tsd_tsdn(tsd));
}
#ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2015,97 +3266,72 @@ JEMALLOC_EXPORT void
_malloc_postfork(void)
#endif
{
- unsigned i;
+ tsd_t *tsd;
+ unsigned i, narenas;
#ifdef JEMALLOC_MUTEX_INIT_CB
- if (malloc_initialized == false)
+ if (!malloc_initialized()) {
return;
+ }
#endif
- assert(malloc_initialized);
+ assert(malloc_initialized());
+
+ tsd = tsd_fetch();
+ witness_postfork_parent(tsd_witness_tsdp_get(tsd));
/* Release all mutexes, now that fork() has completed. */
- huge_postfork_parent();
- base_postfork_parent();
- chunk_postfork_parent();
- for (i = 0; i < narenas_total; i++) {
- if (arenas[i] != NULL)
- arena_postfork_parent(arenas[i]);
+ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+ arena_t *arena;
+
+ if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
+ arena_postfork_parent(tsd_tsdn(tsd), arena);
+ }
+ }
+ prof_postfork_parent(tsd_tsdn(tsd));
+ if (have_background_thread) {
+ background_thread_postfork_parent(tsd_tsdn(tsd));
}
- malloc_mutex_postfork_parent(&arenas_lock);
- prof_postfork_parent();
- ctl_postfork_parent();
+ malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+ tcache_postfork_parent(tsd_tsdn(tsd));
+ ctl_postfork_parent(tsd_tsdn(tsd));
}
void
-jemalloc_postfork_child(void)
-{
- unsigned i;
+jemalloc_postfork_child(void) {
+ tsd_t *tsd;
+ unsigned i, narenas;
- assert(malloc_initialized);
+ assert(malloc_initialized());
+ tsd = tsd_fetch();
+
+ witness_postfork_child(tsd_witness_tsdp_get(tsd));
/* Release all mutexes, now that fork() has completed. */
- huge_postfork_child();
- base_postfork_child();
- chunk_postfork_child();
- for (i = 0; i < narenas_total; i++) {
- if (arenas[i] != NULL)
- arena_postfork_child(arenas[i]);
+ for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+ arena_t *arena;
+
+ if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
+ arena_postfork_child(tsd_tsdn(tsd), arena);
+ }
}
- malloc_mutex_postfork_child(&arenas_lock);
- prof_postfork_child();
- ctl_postfork_child();
+ prof_postfork_child(tsd_tsdn(tsd));
+ if (have_background_thread) {
+ background_thread_postfork_child(tsd_tsdn(tsd));
+ }
+ malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+ tcache_postfork_child(tsd_tsdn(tsd));
+ ctl_postfork_child(tsd_tsdn(tsd));
}
/******************************************************************************/
-/*
- * The following functions are used for TLS allocation/deallocation in static
- * binaries on FreeBSD. The primary difference between these and i[mcd]alloc()
- * is that these avoid accessing TLS variables.
- */
-
-static void *
-a0alloc(size_t size, bool zero)
-{
- if (malloc_init())
- return (NULL);
-
- if (size == 0)
- size = 1;
-
- if (size <= arena_maxclass)
- return (arena_malloc(arenas[0], size, zero, false));
- else
- return (huge_malloc(size, zero, huge_dss_prec_get(arenas[0])));
-}
-
-void *
-a0malloc(size_t size)
-{
-
- return (a0alloc(size, false));
-}
-
-void *
-a0calloc(size_t num, size_t size)
-{
-
- return (a0alloc(num * size, true));
-}
-
-void
-a0free(void *ptr)
-{
- arena_chunk_t *chunk;
-
- if (ptr == NULL)
- return;
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr)
- arena_dalloc(chunk->arena, chunk, ptr, false);
- else
- huge_dalloc(ptr, true);
+/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation.
+ * returns 0 if the allocation is in the currently active run,
+ * or when it is not causing any frag issue (large or huge bin)
+ * returns the bin utilization and run utilization both in fixed point 16:16.
+ * If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */
+JEMALLOC_EXPORT int JEMALLOC_NOTHROW
+get_defrag_hint(void* ptr, int *bin_util, int *run_util) {
+ assert(ptr != NULL);
+ return iget_defrag_hint(TSDN_NULL, ptr, bin_util, run_util);
}
-
-/******************************************************************************/
diff --git a/deps/jemalloc/src/jemalloc_cpp.cpp b/deps/jemalloc/src/jemalloc_cpp.cpp
new file mode 100644
index 000000000..f0ceddae3
--- /dev/null
+++ b/deps/jemalloc/src/jemalloc_cpp.cpp
@@ -0,0 +1,141 @@
+#include <mutex>
+#include <new>
+
+#define JEMALLOC_CPP_CPP_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+// All operators in this file are exported.
+
+// Possibly alias hidden versions of malloc and sdallocx to avoid an extra plt
+// thunk?
+//
+// extern __typeof (sdallocx) sdallocx_int
+// __attribute ((alias ("sdallocx"),
+// visibility ("hidden")));
+//
+// ... but it needs to work with jemalloc namespaces.
+
+void *operator new(std::size_t size);
+void *operator new[](std::size_t size);
+void *operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr) noexcept;
+void operator delete[](void *ptr) noexcept;
+void operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void operator delete[](void *ptr, const std::nothrow_t &) noexcept;
+
+#if __cpp_sized_deallocation >= 201309
+/* C++14's sized-delete operators. */
+void operator delete(void *ptr, std::size_t size) noexcept;
+void operator delete[](void *ptr, std::size_t size) noexcept;
+#endif
+
+JEMALLOC_NOINLINE
+static void *
+handleOOM(std::size_t size, bool nothrow) {
+ void *ptr = nullptr;
+
+ while (ptr == nullptr) {
+ std::new_handler handler;
+ // GCC-4.8 and clang 4.0 do not have std::get_new_handler.
+ {
+ static std::mutex mtx;
+ std::lock_guard<std::mutex> lock(mtx);
+
+ handler = std::set_new_handler(nullptr);
+ std::set_new_handler(handler);
+ }
+ if (handler == nullptr)
+ break;
+
+ try {
+ handler();
+ } catch (const std::bad_alloc &) {
+ break;
+ }
+
+ ptr = je_malloc(size);
+ }
+
+ if (ptr == nullptr && !nothrow)
+ std::__throw_bad_alloc();
+ return ptr;
+}
+
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+ void *ptr = je_malloc(size);
+ if (likely(ptr != nullptr))
+ return ptr;
+
+ return handleOOM(size, IsNoExcept);
+}
+
+void *
+operator new(std::size_t size) {
+ return newImpl<false>(size);
+}
+
+void *
+operator new[](std::size_t size) {
+ return newImpl<false>(size);
+}
+
+void *
+operator new(std::size_t size, const std::nothrow_t &) noexcept {
+ return newImpl<true>(size);
+}
+
+void *
+operator new[](std::size_t size, const std::nothrow_t &) noexcept {
+ return newImpl<true>(size);
+}
+
+void
+operator delete(void *ptr) noexcept {
+ je_free(ptr);
+}
+
+void
+operator delete[](void *ptr) noexcept {
+ je_free(ptr);
+}
+
+void
+operator delete(void *ptr, const std::nothrow_t &) noexcept {
+ je_free(ptr);
+}
+
+void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+ je_free(ptr);
+}
+
+#if __cpp_sized_deallocation >= 201309
+
+void
+operator delete(void *ptr, std::size_t size) noexcept {
+ if (unlikely(ptr == nullptr)) {
+ return;
+ }
+ je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+void operator delete[](void *ptr, std::size_t size) noexcept {
+ if (unlikely(ptr == nullptr)) {
+ return;
+ }
+ je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+#endif // __cpp_sized_deallocation
diff --git a/deps/jemalloc/src/large.c b/deps/jemalloc/src/large.c
new file mode 100644
index 000000000..27a2c6798
--- /dev/null
+++ b/deps/jemalloc/src/large.c
@@ -0,0 +1,371 @@
+#define JEMALLOC_LARGE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/util.h"
+
+/******************************************************************************/
+
+void *
+large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
+ assert(usize == sz_s2u(usize));
+
+ return large_palloc(tsdn, arena, usize, CACHELINE, zero);
+}
+
+void *
+large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+ bool zero) {
+ size_t ausize;
+ extent_t *extent;
+ bool is_zeroed;
+ UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
+
+ assert(!tsdn_null(tsdn) || arena != NULL);
+
+ ausize = sz_sa2u(usize, alignment);
+ if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
+ return NULL;
+ }
+
+ if (config_fill && unlikely(opt_zero)) {
+ zero = true;
+ }
+ /*
+ * Copy zero into is_zeroed and pass the copy when allocating the
+ * extent, so that it is possible to make correct junk/zero fill
+ * decisions below, even if is_zeroed ends up true when zero is false.
+ */
+ is_zeroed = zero;
+ if (likely(!tsdn_null(tsdn))) {
+ arena = arena_choose(tsdn_tsd(tsdn), arena);
+ }
+ if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
+ arena, usize, alignment, &is_zeroed)) == NULL) {
+ return NULL;
+ }
+
+ /* See comments in arena_bin_slabs_full_insert(). */
+ if (!arena_is_auto(arena)) {
+ /* Insert extent into large. */
+ malloc_mutex_lock(tsdn, &arena->large_mtx);
+ extent_list_append(&arena->large, extent);
+ malloc_mutex_unlock(tsdn, &arena->large_mtx);
+ }
+ if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
+ prof_idump(tsdn);
+ }
+
+ if (zero) {
+ assert(is_zeroed);
+ } else if (config_fill && unlikely(opt_junk_alloc)) {
+ memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
+ extent_usize_get(extent));
+ }
+
+ arena_decay_tick(tsdn, arena);
+ return extent_addr_get(extent);
+}
+
+static void
+large_dalloc_junk_impl(void *ptr, size_t size) {
+ memset(ptr, JEMALLOC_FREE_JUNK, size);
+}
+large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk = large_dalloc_junk_impl;
+
+static void
+large_dalloc_maybe_junk_impl(void *ptr, size_t size) {
+ if (config_fill && have_dss && unlikely(opt_junk_free)) {
+ /*
+ * Only bother junk filling if the extent isn't about to be
+ * unmapped.
+ */
+ if (opt_retain || (have_dss && extent_in_dss(ptr))) {
+ large_dalloc_junk(ptr, size);
+ }
+ }
+}
+large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
+ large_dalloc_maybe_junk_impl;
+
+static bool
+large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
+ arena_t *arena = extent_arena_get(extent);
+ size_t oldusize = extent_usize_get(extent);
+ extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+ size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
+
+ assert(oldusize > usize);
+
+ if (extent_hooks->split == NULL) {
+ return true;
+ }
+
+ /* Split excess pages. */
+ if (diff != 0) {
+ extent_t *trail = extent_split_wrapper(tsdn, arena,
+ &extent_hooks, extent, usize + sz_large_pad,
+ sz_size2index(usize), false, diff, NSIZES, false);
+ if (trail == NULL) {
+ return true;
+ }
+
+ if (config_fill && unlikely(opt_junk_free)) {
+ large_dalloc_maybe_junk(extent_addr_get(trail),
+ extent_size_get(trail));
+ }
+
+ arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
+ }
+
+ arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
+
+ return false;
+}
+
+static bool
+large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+ bool zero) {
+ arena_t *arena = extent_arena_get(extent);
+ size_t oldusize = extent_usize_get(extent);
+ extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+ size_t trailsize = usize - oldusize;
+
+ if (extent_hooks->merge == NULL) {
+ return true;
+ }
+
+ if (config_fill && unlikely(opt_zero)) {
+ zero = true;
+ }
+ /*
+ * Copy zero into is_zeroed_trail and pass the copy when allocating the
+ * extent, so that it is possible to make correct junk/zero fill
+ * decisions below, even if is_zeroed_trail ends up true when zero is
+ * false.
+ */
+ bool is_zeroed_trail = zero;
+ bool commit = true;
+ extent_t *trail;
+ bool new_mapping;
+ if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
+ CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL
+ || (trail = extents_alloc(tsdn, arena, &extent_hooks,
+ &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
+ CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) {
+ if (config_stats) {
+ new_mapping = false;
+ }
+ } else {
+ if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
+ extent_past_get(extent), trailsize, 0, CACHELINE, false,
+ NSIZES, &is_zeroed_trail, &commit)) == NULL) {
+ return true;
+ }
+ if (config_stats) {
+ new_mapping = true;
+ }
+ }
+
+ if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
+ extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
+ return true;
+ }
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+ szind_t szind = sz_size2index(usize);
+ extent_szind_set(extent, szind);
+ rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)extent_addr_get(extent), szind, false);
+
+ if (config_stats && new_mapping) {
+ arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
+ }
+
+ if (zero) {
+ if (config_cache_oblivious) {
+ /*
+ * Zero the trailing bytes of the original allocation's
+ * last page, since they are in an indeterminate state.
+ * There will always be trailing bytes, because ptr's
+ * offset from the beginning of the extent is a multiple
+ * of CACHELINE in [0 .. PAGE).
+ */
+ void *zbase = (void *)
+ ((uintptr_t)extent_addr_get(extent) + oldusize);
+ void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+ PAGE));
+ size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+ assert(nzero > 0);
+ memset(zbase, 0, nzero);
+ }
+ assert(is_zeroed_trail);
+ } else if (config_fill && unlikely(opt_junk_alloc)) {
+ memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
+ JEMALLOC_ALLOC_JUNK, usize - oldusize);
+ }
+
+ arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
+
+ return false;
+}
+
+bool
+large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+ size_t usize_max, bool zero) {
+ size_t oldusize = extent_usize_get(extent);
+
+ /* The following should have been caught by callers. */
+ assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
+ /* Both allocation sizes must be large to avoid a move. */
+ assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS);
+
+ if (usize_max > oldusize) {
+ /* Attempt to expand the allocation in-place. */
+ if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
+ zero)) {
+ arena_decay_tick(tsdn, extent_arena_get(extent));
+ return false;
+ }
+ /* Try again, this time with usize_min. */
+ if (usize_min < usize_max && usize_min > oldusize &&
+ large_ralloc_no_move_expand(tsdn, extent, usize_min,
+ zero)) {
+ arena_decay_tick(tsdn, extent_arena_get(extent));
+ return false;
+ }
+ }
+
+ /*
+ * Avoid moving the allocation if the existing extent size accommodates
+ * the new size.
+ */
+ if (oldusize >= usize_min && oldusize <= usize_max) {
+ arena_decay_tick(tsdn, extent_arena_get(extent));
+ return false;
+ }
+
+ /* Attempt to shrink the allocation in-place. */
+ if (oldusize > usize_max) {
+ if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
+ arena_decay_tick(tsdn, extent_arena_get(extent));
+ return false;
+ }
+ }
+ return true;
+}
+
+static void *
+large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+ size_t alignment, bool zero) {
+ if (alignment <= CACHELINE) {
+ return large_malloc(tsdn, arena, usize, zero);
+ }
+ return large_palloc(tsdn, arena, usize, alignment, zero);
+}
+
+void *
+large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+ size_t alignment, bool zero, tcache_t *tcache) {
+ size_t oldusize = extent_usize_get(extent);
+
+ /* The following should have been caught by callers. */
+ assert(usize > 0 && usize <= LARGE_MAXCLASS);
+ /* Both allocation sizes must be large to avoid a move. */
+ assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS);
+
+ /* Try to avoid moving the allocation. */
+ if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
+ return extent_addr_get(extent);
+ }
+
+ /*
+ * usize and old size are different enough that we need to use a
+ * different size class. In that case, fall back to allocating new
+ * space and copying.
+ */
+ void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
+ zero);
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ size_t copysize = (usize < oldusize) ? usize : oldusize;
+ memcpy(ret, extent_addr_get(extent), copysize);
+ isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
+ return ret;
+}
+
+/*
+ * junked_locked indicates whether the extent's data have been junk-filled, and
+ * whether the arena's large_mtx is currently held.
+ */
+static void
+large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+ bool junked_locked) {
+ if (!junked_locked) {
+ /* See comments in arena_bin_slabs_full_insert(). */
+ if (!arena_is_auto(arena)) {
+ malloc_mutex_lock(tsdn, &arena->large_mtx);
+ extent_list_remove(&arena->large, extent);
+ malloc_mutex_unlock(tsdn, &arena->large_mtx);
+ }
+ large_dalloc_maybe_junk(extent_addr_get(extent),
+ extent_usize_get(extent));
+ } else {
+ malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
+ if (!arena_is_auto(arena)) {
+ extent_list_remove(&arena->large, extent);
+ }
+ }
+ arena_extent_dalloc_large_prep(tsdn, arena, extent);
+}
+
+static void
+large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+ extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+ arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, extent);
+}
+
+void
+large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
+ large_dalloc_prep_impl(tsdn, extent_arena_get(extent), extent, true);
+}
+
+void
+large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
+ large_dalloc_finish_impl(tsdn, extent_arena_get(extent), extent);
+}
+
+void
+large_dalloc(tsdn_t *tsdn, extent_t *extent) {
+ arena_t *arena = extent_arena_get(extent);
+ large_dalloc_prep_impl(tsdn, arena, extent, false);
+ large_dalloc_finish_impl(tsdn, arena, extent);
+ arena_decay_tick(tsdn, arena);
+}
+
+size_t
+large_salloc(tsdn_t *tsdn, const extent_t *extent) {
+ return extent_usize_get(extent);
+}
+
+prof_tctx_t *
+large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
+ return extent_prof_tctx_get(extent);
+}
+
+void
+large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx) {
+ extent_prof_tctx_set(extent, tctx);
+}
+
+void
+large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
+ large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
+}
diff --git a/deps/jemalloc/src/log.c b/deps/jemalloc/src/log.c
new file mode 100644
index 000000000..778902fb9
--- /dev/null
+++ b/deps/jemalloc/src/log.c
@@ -0,0 +1,78 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/log.h"
+
+char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+atomic_b_t log_init_done = ATOMIC_INIT(false);
+
+/*
+ * Returns true if we were able to pick out a segment. Fills in r_segment_end
+ * with a pointer to the first character after the end of the string.
+ */
+static const char *
+log_var_extract_segment(const char* segment_begin) {
+ const char *end;
+ for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
+ }
+ return end;
+}
+
+static bool
+log_var_matches_segment(const char *segment_begin, const char *segment_end,
+ const char *log_var_begin, const char *log_var_end) {
+ assert(segment_begin <= segment_end);
+ assert(log_var_begin < log_var_end);
+
+ ptrdiff_t segment_len = segment_end - segment_begin;
+ ptrdiff_t log_var_len = log_var_end - log_var_begin;
+ /* The special '.' segment matches everything. */
+ if (segment_len == 1 && *segment_begin == '.') {
+ return true;
+ }
+ if (segment_len == log_var_len) {
+ return strncmp(segment_begin, log_var_begin, segment_len) == 0;
+ } else if (segment_len < log_var_len) {
+ return strncmp(segment_begin, log_var_begin, segment_len) == 0
+ && log_var_begin[segment_len] == '.';
+ } else {
+ return false;
+ }
+}
+
+unsigned
+log_var_update_state(log_var_t *log_var) {
+ const char *log_var_begin = log_var->name;
+ const char *log_var_end = log_var->name + strlen(log_var->name);
+
+ /* Pointer to one before the beginning of the current segment. */
+ const char *segment_begin = log_var_names;
+
+ /*
+ * If log_init done is false, we haven't parsed the malloc conf yet. To
+ * avoid log-spew, we default to not displaying anything.
+ */
+ if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) {
+ return LOG_INITIALIZED_NOT_ENABLED;
+ }
+
+ while (true) {
+ const char *segment_end = log_var_extract_segment(
+ segment_begin);
+ assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
+ if (log_var_matches_segment(segment_begin, segment_end,
+ log_var_begin, log_var_end)) {
+ atomic_store_u(&log_var->state, LOG_ENABLED,
+ ATOMIC_RELAXED);
+ return LOG_ENABLED;
+ }
+ if (*segment_end == '\0') {
+ /* Hit the end of the segment string with no match. */
+ atomic_store_u(&log_var->state,
+ LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED);
+ return LOG_INITIALIZED_NOT_ENABLED;
+ }
+ /* Otherwise, skip the delimiter and continue. */
+ segment_begin = segment_end + 1;
+ }
+}
diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/malloc_io.c
index 93a19fd16..7bdc13f95 100644
--- a/deps/jemalloc/src/util.c
+++ b/deps/jemalloc/src/malloc_io.c
@@ -1,59 +1,76 @@
-#define assert(e) do { \
+#define JEMALLOC_MALLOC_IO_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
+#ifdef assert
+# undef assert
+#endif
+#ifdef not_reached
+# undef not_reached
+#endif
+#ifdef not_implemented
+# undef not_implemented
+#endif
+#ifdef assert_not_implemented
+# undef assert_not_implemented
+#endif
+
+/*
+ * Define simple versions of assertion macros that won't recurse in case
+ * of assertion failures in malloc_*printf().
+ */
+#define assert(e) do { \
if (config_debug && !(e)) { \
malloc_write("<jemalloc>: Failed assertion\n"); \
abort(); \
} \
} while (0)
-#define not_reached() do { \
+#define not_reached() do { \
if (config_debug) { \
malloc_write("<jemalloc>: Unreachable code reached\n"); \
abort(); \
} \
+ unreachable(); \
} while (0)
-#define not_implemented() do { \
+#define not_implemented() do { \
if (config_debug) { \
malloc_write("<jemalloc>: Not implemented\n"); \
abort(); \
} \
} while (0)
-#define JEMALLOC_UTIL_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define assert_not_implemented(e) do { \
+ if (unlikely(config_debug && !(e))) { \
+ not_implemented(); \
+ } \
+} while (0)
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-static void wrtmessage(void *cbopaque, const char *s);
-#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
-static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+static void wrtmessage(void *cbopaque, const char *s);
+#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
size_t *slen_p);
-#define D2S_BUFSIZE (1 + U2S_BUFSIZE)
-static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
-#define O2S_BUFSIZE (1 + U2S_BUFSIZE)
-static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
-#define X2S_BUFSIZE (2 + U2S_BUFSIZE)
-static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+#define D2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define O2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define X2S_BUFSIZE (2 + U2S_BUFSIZE)
+static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
size_t *slen_p);
/******************************************************************************/
/* malloc_message() setup. */
static void
-wrtmessage(void *cbopaque, const char *s)
-{
-
-#ifdef SYS_write
- /*
- * Use syscall(2) rather than write(2) when possible in order to avoid
- * the possibility of memory allocation within libc. This is necessary
- * on FreeBSD; most operating systems do not have this problem though.
- */
- UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
-#else
- UNUSED int result = write(STDERR_FILENO, s, strlen(s));
-#endif
+wrtmessage(void *cbopaque, const char *s) {
+ malloc_write_fd(STDERR_FILENO, s, strlen(s));
}
JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
@@ -63,13 +80,12 @@ JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
* je_malloc_message(...) throughout the code.
*/
void
-malloc_write(const char *s)
-{
-
- if (je_malloc_message != NULL)
+malloc_write(const char *s) {
+ if (je_malloc_message != NULL) {
je_malloc_message(NULL, s);
- else
+ } else {
wrtmessage(NULL, s);
+ }
}
/*
@@ -77,30 +93,27 @@ malloc_write(const char *s)
* provide a wrapper.
*/
int
-buferror(int err, char *buf, size_t buflen)
-{
-
+buferror(int err, char *buf, size_t buflen) {
#ifdef _WIN32
- FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0,
- (LPSTR)buf, buflen, NULL);
- return (0);
-#elif defined(_GNU_SOURCE)
+ FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
+ (LPSTR)buf, (DWORD)buflen, NULL);
+ return 0;
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
char *b = strerror_r(err, buf, buflen);
if (b != buf) {
strncpy(buf, b, buflen);
buf[buflen-1] = '\0';
}
- return (0);
+ return 0;
#else
- return (strerror_r(err, buf, buflen));
+ return strerror_r(err, buf, buflen);
#endif
}
uintmax_t
-malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
-{
+malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
uintmax_t ret, digit;
- int b;
+ unsigned b;
bool neg;
const char *p, *ns;
@@ -143,10 +156,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
switch (p[1]) {
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7':
- if (b == 0)
+ if (b == 0) {
b = 8;
- if (b == 8)
+ }
+ if (b == 8) {
p++;
+ }
break;
case 'X': case 'x':
switch (p[2]) {
@@ -156,10 +171,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
case 'F':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f':
- if (b == 0)
+ if (b == 0) {
b = 16;
- if (b == 16)
+ }
+ if (b == 16) {
p += 2;
+ }
break;
default:
break;
@@ -171,8 +188,9 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
goto label_return;
}
}
- if (b == 0)
+ if (b == 0) {
b = 10;
+ }
/* Convert. */
ret = 0;
@@ -190,8 +208,9 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
}
p++;
}
- if (neg)
- ret = -ret;
+ if (neg) {
+ ret = (uintmax_t)(-((intmax_t)ret));
+ }
if (p == ns) {
/* No conversion performed. */
@@ -205,15 +224,15 @@ label_return:
if (p == ns) {
/* No characters were converted. */
*endptr = (char *)nptr;
- } else
+ } else {
*endptr = (char *)p;
+ }
}
- return (ret);
+ return ret;
}
static char *
-u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
-{
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
unsigned i;
i = U2S_BUFSIZE - 1;
@@ -251,23 +270,25 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
}}
*slen_p = U2S_BUFSIZE - 1 - i;
- return (&s[i]);
+ return &s[i];
}
static char *
-d2s(intmax_t x, char sign, char *s, size_t *slen_p)
-{
+d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
bool neg;
- if ((neg = (x < 0)))
+ if ((neg = (x < 0))) {
x = -x;
+ }
s = u2s(x, 10, false, s, slen_p);
- if (neg)
+ if (neg) {
sign = '-';
+ }
switch (sign) {
case '-':
- if (neg == false)
+ if (!neg) {
break;
+ }
/* Fall through. */
case ' ':
case '+':
@@ -277,73 +298,70 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p)
break;
default: not_reached();
}
- return (s);
+ return s;
}
static char *
-o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
-{
-
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) {
s = u2s(x, 8, false, s, slen_p);
if (alt_form && *s != '0') {
s--;
(*slen_p)++;
*s = '0';
}
- return (s);
+ return s;
}
static char *
-x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
-{
-
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
s = u2s(x, 16, uppercase, s, slen_p);
if (alt_form) {
s -= 2;
(*slen_p) += 2;
memcpy(s, uppercase ? "0X" : "0x", 2);
}
- return (s);
+ return s;
}
-int
-malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
-{
- int ret;
+size_t
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
size_t i;
const char *f;
-#define APPEND_C(c) do { \
- if (i < size) \
+#define APPEND_C(c) do { \
+ if (i < size) { \
str[i] = (c); \
+ } \
i++; \
} while (0)
-#define APPEND_S(s, slen) do { \
+#define APPEND_S(s, slen) do { \
if (i < size) { \
size_t cpylen = (slen <= size - i) ? slen : size - i; \
memcpy(&str[i], s, cpylen); \
} \
i += slen; \
} while (0)
-#define APPEND_PADDED_S(s, slen, width, left_justify) do { \
+#define APPEND_PADDED_S(s, slen, width, left_justify) do { \
/* Left padding. */ \
size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ? \
(size_t)width - slen : 0); \
- if (left_justify == false && pad_len != 0) { \
+ if (!left_justify && pad_len != 0) { \
size_t j; \
- for (j = 0; j < pad_len; j++) \
+ for (j = 0; j < pad_len; j++) { \
APPEND_C(' '); \
+ } \
} \
/* Value. */ \
APPEND_S(s, slen); \
/* Right padding. */ \
if (left_justify && pad_len != 0) { \
size_t j; \
- for (j = 0; j < pad_len; j++) \
+ for (j = 0; j < pad_len; j++) { \
APPEND_C(' '); \
+ } \
} \
} while (0)
-#define GET_ARG_NUMERIC(val, len) do { \
+#define GET_ARG_NUMERIC(val, len) do { \
switch (len) { \
case '?': \
val = va_arg(ap, int); \
@@ -381,7 +399,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
case 'p': /* Synthetic; used for %p. */ \
val = va_arg(ap, uintptr_t); \
break; \
- default: not_reached(); \
+ default: \
+ not_reached(); \
+ val = 0; \
} \
} while (0)
@@ -398,25 +418,27 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
int prec = -1;
int width = -1;
unsigned char len = '?';
+ char *s;
+ size_t slen;
f++;
/* Flags. */
while (true) {
switch (*f) {
case '#':
- assert(alt_form == false);
+ assert(!alt_form);
alt_form = true;
break;
case '-':
- assert(left_justify == false);
+ assert(!left_justify);
left_justify = true;
break;
case ' ':
- assert(plus_space == false);
+ assert(!plus_space);
plus_space = true;
break;
case '+':
- assert(plus_plus == false);
+ assert(!plus_plus);
plus_plus = true;
break;
default: goto label_width;
@@ -447,10 +469,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
break;
}
/* Width/precision separator. */
- if (*f == '.')
+ if (*f == '.') {
f++;
- else
+ } else {
goto label_length;
+ }
/* Precision. */
switch (*f) {
case '*':
@@ -477,8 +500,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
if (*f == 'l') {
len = 'q';
f++;
- } else
+ } else {
len = 'l';
+ }
break;
case 'q': case 'j': case 't': case 'z':
len = *f;
@@ -488,8 +512,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
}
/* Conversion specifier. */
switch (*f) {
- char *s;
- size_t slen;
case '%':
/* %% */
APPEND_C(*f);
@@ -548,7 +570,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
assert(len == '?' || len == 'l');
assert_not_implemented(len != 'l');
s = va_arg(ap, char *);
- slen = (prec < 0) ? strlen(s) : prec;
+ slen = (prec < 0) ? strlen(s) : (size_t)prec;
APPEND_PADDED_S(s, slen, width, left_justify);
f++;
break;
@@ -571,37 +593,35 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
}}
}
label_out:
- if (i < size)
+ if (i < size) {
str[i] = '\0';
- else
+ } else {
str[size - 1] = '\0';
- ret = i;
+ }
#undef APPEND_C
#undef APPEND_S
#undef APPEND_PADDED_S
#undef GET_ARG_NUMERIC
- return (ret);
+ return i;
}
-JEMALLOC_ATTR(format(printf, 3, 4))
-int
-malloc_snprintf(char *str, size_t size, const char *format, ...)
-{
- int ret;
+JEMALLOC_FORMAT_PRINTF(3, 4)
+size_t
+malloc_snprintf(char *str, size_t size, const char *format, ...) {
+ size_t ret;
va_list ap;
va_start(ap, format);
ret = malloc_vsnprintf(str, size, format, ap);
va_end(ap);
- return (ret);
+ return ret;
}
void
malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *format, va_list ap)
-{
+ const char *format, va_list ap) {
char buf[MALLOC_PRINTF_BUFSIZE];
if (write_cb == NULL) {
@@ -623,11 +643,10 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
* Print to a callback function in such a way as to (hopefully) avoid memory
* allocation.
*/
-JEMALLOC_ATTR(format(printf, 3, 4))
+JEMALLOC_FORMAT_PRINTF(3, 4)
void
malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *format, ...)
-{
+ const char *format, ...) {
va_list ap;
va_start(ap, format);
@@ -636,13 +655,22 @@ malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
}
/* Print to stderr in such a way as to avoid memory allocation. */
-JEMALLOC_ATTR(format(printf, 1, 2))
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
-malloc_printf(const char *format, ...)
-{
+malloc_printf(const char *format, ...) {
va_list ap;
va_start(ap, format);
malloc_vcprintf(NULL, NULL, format, ap);
va_end(ap);
}
+
+/*
+ * Restore normal assertion macros, in order to make it possible to compile all
+ * C files as a single concatenation.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+#include "jemalloc/internal/assert.h"
diff --git a/deps/jemalloc/src/mb.c b/deps/jemalloc/src/mb.c
deleted file mode 100644
index dc2c0a256..000000000
--- a/deps/jemalloc/src/mb.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define JEMALLOC_MB_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c
index 788eca387..30222b3e5 100644
--- a/deps/jemalloc/src/mutex.c
+++ b/deps/jemalloc/src/mutex.c
@@ -1,12 +1,13 @@
-#define JEMALLOC_MUTEX_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-#include <dlfcn.h>
-#endif
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/spin.h"
#ifndef _CRT_SPINCOUNT
-#define _CRT_SPINCOUNT 4000
+#define _CRT_SPINCOUNT 4000
#endif
/******************************************************************************/
@@ -20,10 +21,6 @@ static bool postpone_init = true;
static malloc_mutex_t *postponed_mutexes = NULL;
#endif
-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static void pthread_create_once(void);
-#endif
-
/******************************************************************************/
/*
* We intercept pthread_create() calls in order to toggle isthreaded if the
@@ -31,33 +28,11 @@ static void pthread_create_once(void);
*/
#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
- void *(*)(void *), void *__restrict);
-
-static void
-pthread_create_once(void)
-{
-
- pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
- if (pthread_create_fptr == NULL) {
- malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
- "\"pthread_create\")\n");
- abort();
- }
-
- isthreaded = true;
-}
-
JEMALLOC_EXPORT int
pthread_create(pthread_t *__restrict thread,
const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
- void *__restrict arg)
-{
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-
- pthread_once(&once_control, pthread_create_once);
-
- return (pthread_create_fptr(thread, attr, start_routine, arg));
+ void *__restrict arg) {
+ return pthread_create_wrapper(thread, attr, start_routine, arg);
}
#endif
@@ -68,14 +43,108 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
void *(calloc_cb)(size_t, size_t));
#endif
-bool
-malloc_mutex_init(malloc_mutex_t *mutex)
-{
+void
+malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
+ mutex_prof_data_t *data = &mutex->prof_data;
+ UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER;
+
+ if (ncpus == 1) {
+ goto label_spin_done;
+ }
+
+ int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
+ do {
+ spin_cpu_spinwait();
+ if (!malloc_mutex_trylock_final(mutex)) {
+ data->n_spin_acquired++;
+ return;
+ }
+ } while (cnt++ < max_cnt);
+
+ if (!config_stats) {
+ /* Only spin is useful when stats is off. */
+ malloc_mutex_lock_final(mutex);
+ return;
+ }
+label_spin_done:
+ nstime_update(&before);
+ /* Copy before to after to avoid clock skews. */
+ nstime_t after;
+ nstime_copy(&after, &before);
+ uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
+ ATOMIC_RELAXED) + 1;
+ /* One last try as above two calls may take quite some cycles. */
+ if (!malloc_mutex_trylock_final(mutex)) {
+ atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
+ data->n_spin_acquired++;
+ return;
+ }
+
+ /* True slow path. */
+ malloc_mutex_lock_final(mutex);
+ /* Update more slow-path only counters. */
+ atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
+ nstime_update(&after);
+
+ nstime_t delta;
+ nstime_copy(&delta, &after);
+ nstime_subtract(&delta, &before);
+ data->n_wait_times++;
+ nstime_add(&data->tot_wait_time, &delta);
+ if (nstime_compare(&data->max_wait_time, &delta) < 0) {
+ nstime_copy(&data->max_wait_time, &delta);
+ }
+ if (n_thds > data->max_n_thds) {
+ data->max_n_thds = n_thds;
+ }
+}
+
+static void
+mutex_prof_data_init(mutex_prof_data_t *data) {
+ memset(data, 0, sizeof(mutex_prof_data_t));
+ nstime_init(&data->max_wait_time, 0);
+ nstime_init(&data->tot_wait_time, 0);
+ data->prev_owner = NULL;
+}
+
+void
+malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ malloc_mutex_assert_owner(tsdn, mutex);
+ mutex_prof_data_init(&mutex->prof_data);
+}
+
+static int
+mutex_addr_comp(const witness_t *witness1, void *mutex1,
+ const witness_t *witness2, void *mutex2) {
+ assert(mutex1 != NULL);
+ assert(mutex2 != NULL);
+ uintptr_t mu1int = (uintptr_t)mutex1;
+ uintptr_t mu2int = (uintptr_t)mutex2;
+ if (mu1int < mu2int) {
+ return -1;
+ } else if (mu1int == mu2int) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+ witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
+ mutex_prof_data_init(&mutex->prof_data);
#ifdef _WIN32
+# if _WIN32_WINNT >= 0x0600
+ InitializeSRWLock(&mutex->lock);
+# else
if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
- _CRT_SPINCOUNT))
- return (true);
+ _CRT_SPINCOUNT)) {
+ return true;
+ }
+# endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ mutex->lock = OS_UNFAIR_LOCK_INIT;
#elif (defined(JEMALLOC_OSSPIN))
mutex->lock = 0;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -83,67 +152,73 @@ malloc_mutex_init(malloc_mutex_t *mutex)
mutex->postponed_next = postponed_mutexes;
postponed_mutexes = mutex;
} else {
- if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
- 0)
- return (true);
+ if (_pthread_mutex_init_calloc_cb(&mutex->lock,
+ bootstrap_calloc) != 0) {
+ return true;
+ }
}
#else
pthread_mutexattr_t attr;
- if (pthread_mutexattr_init(&attr) != 0)
- return (true);
+ if (pthread_mutexattr_init(&attr) != 0) {
+ return true;
+ }
pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
pthread_mutexattr_destroy(&attr);
- return (true);
+ return true;
}
pthread_mutexattr_destroy(&attr);
#endif
- return (false);
+ if (config_debug) {
+ mutex->lock_order = lock_order;
+ if (lock_order == malloc_mutex_address_ordered) {
+ witness_init(&mutex->witness, name, rank,
+ mutex_addr_comp, mutex);
+ } else {
+ witness_init(&mutex->witness, name, rank, NULL, NULL);
+ }
+ }
+ return false;
}
void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
-{
-
- malloc_mutex_lock(mutex);
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ malloc_mutex_lock(tsdn, mutex);
}
void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
-{
-
- malloc_mutex_unlock(mutex);
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+ malloc_mutex_unlock(tsdn, mutex);
}
void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
-{
-
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
#ifdef JEMALLOC_MUTEX_INIT_CB
- malloc_mutex_unlock(mutex);
+ malloc_mutex_unlock(tsdn, mutex);
#else
- if (malloc_mutex_init(mutex)) {
+ if (malloc_mutex_init(mutex, mutex->witness.name,
+ mutex->witness.rank, mutex->lock_order)) {
malloc_printf("<jemalloc>: Error re-initializing mutex in "
"child\n");
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
}
#endif
}
bool
-mutex_boot(void)
-{
-
+malloc_mutex_boot(void) {
#ifdef JEMALLOC_MUTEX_INIT_CB
postpone_init = false;
while (postponed_mutexes != NULL) {
if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
- base_calloc) != 0)
- return (true);
+ bootstrap_calloc) != 0) {
+ return true;
+ }
postponed_mutexes = postponed_mutexes->postponed_next;
}
#endif
- return (false);
+ return false;
}
diff --git a/deps/jemalloc/src/mutex_pool.c b/deps/jemalloc/src/mutex_pool.c
new file mode 100644
index 000000000..f24d10e44
--- /dev/null
+++ b/deps/jemalloc/src/mutex_pool.c
@@ -0,0 +1,18 @@
+#define JEMALLOC_MUTEX_POOL_C_
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+
+bool
+mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
+ for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
+ if (malloc_mutex_init(&pool->mutexes[i], name, rank,
+ malloc_mutex_address_ordered)) {
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/deps/jemalloc/src/nstime.c b/deps/jemalloc/src/nstime.c
new file mode 100644
index 000000000..71db35396
--- /dev/null
+++ b/deps/jemalloc/src/nstime.c
@@ -0,0 +1,170 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/nstime.h"
+
+#include "jemalloc/internal/assert.h"
+
+#define BILLION UINT64_C(1000000000)
+#define MILLION UINT64_C(1000000)
+
+void
+nstime_init(nstime_t *time, uint64_t ns) {
+ time->ns = ns;
+}
+
+void
+nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
+ time->ns = sec * BILLION + nsec;
+}
+
+uint64_t
+nstime_ns(const nstime_t *time) {
+ return time->ns;
+}
+
+uint64_t
+nstime_msec(const nstime_t *time) {
+ return time->ns / MILLION;
+}
+
+uint64_t
+nstime_sec(const nstime_t *time) {
+ return time->ns / BILLION;
+}
+
+uint64_t
+nstime_nsec(const nstime_t *time) {
+ return time->ns % BILLION;
+}
+
+void
+nstime_copy(nstime_t *time, const nstime_t *source) {
+ *time = *source;
+}
+
+int
+nstime_compare(const nstime_t *a, const nstime_t *b) {
+ return (a->ns > b->ns) - (a->ns < b->ns);
+}
+
+void
+nstime_add(nstime_t *time, const nstime_t *addend) {
+ assert(UINT64_MAX - time->ns >= addend->ns);
+
+ time->ns += addend->ns;
+}
+
+void
+nstime_iadd(nstime_t *time, uint64_t addend) {
+ assert(UINT64_MAX - time->ns >= addend);
+
+ time->ns += addend;
+}
+
+void
+nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
+ assert(nstime_compare(time, subtrahend) >= 0);
+
+ time->ns -= subtrahend->ns;
+}
+
+void
+nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
+ assert(time->ns >= subtrahend);
+
+ time->ns -= subtrahend;
+}
+
+void
+nstime_imultiply(nstime_t *time, uint64_t multiplier) {
+ assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
+ 2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+
+ time->ns *= multiplier;
+}
+
+void
+nstime_idivide(nstime_t *time, uint64_t divisor) {
+ assert(divisor != 0);
+
+ time->ns /= divisor;
+}
+
+uint64_t
+nstime_divide(const nstime_t *time, const nstime_t *divisor) {
+ assert(divisor->ns != 0);
+
+ return time->ns / divisor->ns;
+}
+
+#ifdef _WIN32
+# define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+ FILETIME ft;
+ uint64_t ticks_100ns;
+
+ GetSystemTimeAsFileTime(&ft);
+ ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+ nstime_init(time, ticks_100ns * 100);
+}
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
+# define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+ nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
+# define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
+# define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+ nstime_init(time, mach_absolute_time());
+}
+#else
+# define NSTIME_MONOTONIC false
+static void
+nstime_get(nstime_t *time) {
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
+static bool
+nstime_monotonic_impl(void) {
+ return NSTIME_MONOTONIC;
+#undef NSTIME_MONOTONIC
+}
+nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
+
+static bool
+nstime_update_impl(nstime_t *time) {
+ nstime_t old_time;
+
+ nstime_copy(&old_time, time);
+ nstime_get(time);
+
+ /* Handle non-monotonic clocks. */
+ if (unlikely(nstime_compare(&old_time, time) > 0)) {
+ nstime_copy(time, &old_time);
+ return true;
+ }
+
+ return false;
+}
+nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
diff --git a/deps/jemalloc/src/pages.c b/deps/jemalloc/src/pages.c
new file mode 100644
index 000000000..26002692d
--- /dev/null
+++ b/deps/jemalloc/src/pages.c
@@ -0,0 +1,606 @@
+#define JEMALLOC_PAGES_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/pages.h"
+
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#ifdef __FreeBSD__
+#include <vm/vm_param.h>
+#endif
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+static size_t os_page;
+
+#ifndef _WIN32
+# define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+# define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int mmap_flags;
+#endif
+static bool os_overcommits;
+
+const char *thp_mode_names[] = {
+ "default",
+ "always",
+ "never",
+ "not supported"
+};
+thp_mode_t opt_thp = THP_MODE_DEFAULT;
+thp_mode_t init_system_thp_mode;
+
+/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
+static bool pages_can_purge_lazy_runtime = true;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void os_pages_unmap(void *addr, size_t size);
+
+/******************************************************************************/
+
+static void *
+os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+ assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+ assert(ALIGNMENT_CEILING(size, os_page) == size);
+ assert(size != 0);
+
+ if (os_overcommits) {
+ *commit = true;
+ }
+
+ void *ret;
+#ifdef _WIN32
+ /*
+ * If VirtualAlloc can't allocate at the given address when one is
+ * given, it fails and returns NULL.
+ */
+ ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
+ PAGE_READWRITE);
+#else
+ /*
+ * We don't use MAP_FIXED here, because it can cause the *replacement*
+ * of existing mappings, and we only want to create new mappings.
+ */
+ {
+ int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+ ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+ }
+ assert(ret != NULL);
+
+ if (ret == MAP_FAILED) {
+ ret = NULL;
+ } else if (addr != NULL && ret != addr) {
+ /*
+ * We succeeded in mapping memory, but not in the right place.
+ */
+ os_pages_unmap(ret, size);
+ ret = NULL;
+ }
+#endif
+ assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
+ ret == addr));
+ return ret;
+}
+
+static void *
+os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+ bool *commit) {
+ void *ret = (void *)((uintptr_t)addr + leadsize);
+
+ assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+ os_pages_unmap(addr, alloc_size);
+ void *new_addr = os_pages_map(ret, size, PAGE, commit);
+ if (new_addr == ret) {
+ return ret;
+ }
+ if (new_addr != NULL) {
+ os_pages_unmap(new_addr, size);
+ }
+ return NULL;
+#else
+ size_t trailsize = alloc_size - leadsize - size;
+
+ if (leadsize != 0) {
+ os_pages_unmap(addr, leadsize);
+ }
+ if (trailsize != 0) {
+ os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+ }
+ return ret;
+#endif
+}
+
+static void
+os_pages_unmap(void *addr, size_t size) {
+ assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+ assert(ALIGNMENT_CEILING(size, os_page) == size);
+
+#ifdef _WIN32
+ if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+ if (munmap(addr, size) == -1)
+#endif
+ {
+ char buf[BUFERROR_BUF];
+
+ buferror(get_errno(), buf, sizeof(buf));
+ malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+ "VirtualFree"
+#else
+ "munmap"
+#endif
+ "(): %s\n", buf);
+ if (opt_abort) {
+ abort();
+ }
+ }
+}
+
+static void *
+pages_map_slow(size_t size, size_t alignment, bool *commit) {
+ size_t alloc_size = size + alignment - os_page;
+ /* Beware size_t wrap-around. */
+ if (alloc_size < size) {
+ return NULL;
+ }
+
+ void *ret;
+ do {
+ void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
+ if (pages == NULL) {
+ return NULL;
+ }
+ size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
+ - (uintptr_t)pages;
+ ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
+ } while (ret == NULL);
+
+ assert(ret != NULL);
+ assert(PAGE_ADDR2BASE(ret) == ret);
+ return ret;
+}
+
+void *
+pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+ assert(alignment >= PAGE);
+ assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
+
+ /*
+ * Ideally, there would be a way to specify alignment to mmap() (like
+ * NetBSD has), but in the absence of such a feature, we have to work
+ * hard to efficiently create aligned mappings. The reliable, but
+ * slow method is to create a mapping that is over-sized, then trim the
+ * excess. However, that always results in one or two calls to
+ * os_pages_unmap(), and it can leave holes in the process's virtual
+ * memory map if memory grows downward.
+ *
+ * Optimistically try mapping precisely the right amount before falling
+ * back to the slow method, with the expectation that the optimistic
+ * approach works most of the time.
+ */
+
+ void *ret = os_pages_map(addr, size, os_page, commit);
+ if (ret == NULL || ret == addr) {
+ return ret;
+ }
+ assert(addr == NULL);
+ if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
+ os_pages_unmap(ret, size);
+ return pages_map_slow(size, alignment, commit);
+ }
+
+ assert(PAGE_ADDR2BASE(ret) == ret);
+ return ret;
+}
+
+void
+pages_unmap(void *addr, size_t size) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+
+ os_pages_unmap(addr, size);
+}
+
+static bool
+pages_commit_impl(void *addr, size_t size, bool commit) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+
+ if (os_overcommits) {
+ return true;
+ }
+
+#ifdef _WIN32
+ return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+ PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+ {
+ int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+ void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+ -1, 0);
+ if (result == MAP_FAILED) {
+ return true;
+ }
+ if (result != addr) {
+ /*
+ * We succeeded in mapping memory, but not in the right
+ * place.
+ */
+ os_pages_unmap(result, size);
+ return true;
+ }
+ return false;
+ }
+#endif
+}
+
+bool
+pages_commit(void *addr, size_t size) {
+ return pages_commit_impl(addr, size, true);
+}
+
+bool
+pages_decommit(void *addr, size_t size) {
+ return pages_commit_impl(addr, size, false);
+}
+
+bool
+pages_purge_lazy(void *addr, size_t size) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+
+ if (!pages_can_purge_lazy) {
+ return true;
+ }
+ if (!pages_can_purge_lazy_runtime) {
+ /*
+ * Built with lazy purge enabled, but detected it was not
+ * supported on the current system.
+ */
+ return true;
+ }
+
+#ifdef _WIN32
+ VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
+ return false;
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+ return (madvise(addr, size,
+# ifdef MADV_FREE
+ MADV_FREE
+# else
+ JEMALLOC_MADV_FREE
+# endif
+ ) != 0);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+ !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+ return (madvise(addr, size, MADV_DONTNEED) != 0);
+#else
+ not_reached();
+#endif
+}
+
+bool
+pages_purge_forced(void *addr, size_t size) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+
+ if (!pages_can_purge_forced) {
+ return true;
+ }
+
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+ defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+ return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_MAPS_COALESCE)
+ /* Try to overlay a new demand-zeroed mapping. */
+ return pages_commit(addr, size);
+#else
+ not_reached();
+#endif
+}
+
+static bool
+pages_huge_impl(void *addr, size_t size, bool aligned) {
+ if (aligned) {
+ assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+ assert(HUGEPAGE_CEILING(size) == size);
+ }
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+ return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#else
+ return true;
+#endif
+}
+
+bool
+pages_huge(void *addr, size_t size) {
+ return pages_huge_impl(addr, size, true);
+}
+
+static bool
+pages_huge_unaligned(void *addr, size_t size) {
+ return pages_huge_impl(addr, size, false);
+}
+
+static bool
+pages_nohuge_impl(void *addr, size_t size, bool aligned) {
+ if (aligned) {
+ assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+ assert(HUGEPAGE_CEILING(size) == size);
+ }
+
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+ return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
+#else
+ return false;
+#endif
+}
+
+bool
+pages_nohuge(void *addr, size_t size) {
+ return pages_nohuge_impl(addr, size, true);
+}
+
+static bool
+pages_nohuge_unaligned(void *addr, size_t size) {
+ return pages_nohuge_impl(addr, size, false);
+}
+
+bool
+pages_dontdump(void *addr, size_t size) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+ return madvise(addr, size, MADV_DONTDUMP) != 0;
+#else
+ return false;
+#endif
+}
+
+bool
+pages_dodump(void *addr, size_t size) {
+ assert(PAGE_ADDR2BASE(addr) == addr);
+ assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+ return madvise(addr, size, MADV_DODUMP) != 0;
+#else
+ return false;
+#endif
+}
+
+
+static size_t
+os_page_detect(void) {
+#ifdef _WIN32
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ return si.dwPageSize;
+#elif defined(__FreeBSD__)
+ return getpagesize();
+#else
+ long result = sysconf(_SC_PAGESIZE);
+ if (result == -1) {
+ return LG_PAGE;
+ }
+ return (size_t)result;
+#endif
+}
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void) {
+ int vm_overcommit;
+ size_t sz;
+
+ sz = sizeof(vm_overcommit);
+#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+ int mib[2];
+
+ mib[0] = CTL_VM;
+ mib[1] = VM_OVERCOMMIT;
+ if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
+ return false; /* Error. */
+ }
+#else
+ if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
+ return false; /* Error. */
+ }
+#endif
+
+ return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
+static bool
+os_overcommits_proc(void) {
+ int fd;
+ char buf[1];
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+ #if defined(O_CLOEXEC)
+ fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
+ O_CLOEXEC);
+ #else
+ fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+ if (fd != -1) {
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+ }
+ #endif
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+ #if defined(O_CLOEXEC)
+ fd = (int)syscall(SYS_openat,
+ AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+ #else
+ fd = (int)syscall(SYS_openat,
+ AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+ if (fd != -1) {
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+ }
+ #endif
+#else
+ #if defined(O_CLOEXEC)
+ fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+ #else
+ fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+ if (fd != -1) {
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+ }
+ #endif
+#endif
+
+ if (fd == -1) {
+ return false; /* Error. */
+ }
+
+ ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+ syscall(SYS_close, fd);
+#else
+ close(fd);
+#endif
+
+ if (nread < 1) {
+ return false; /* Error. */
+ }
+ /*
+ * /proc/sys/vm/overcommit_memory meanings:
+ * 0: Heuristic overcommit.
+ * 1: Always overcommit.
+ * 2: Never overcommit.
+ */
+ return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+void
+pages_set_thp_state (void *ptr, size_t size) {
+ if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
+ return;
+ }
+ assert(opt_thp != thp_mode_not_supported &&
+ init_system_thp_mode != thp_mode_not_supported);
+
+ if (opt_thp == thp_mode_always
+ && init_system_thp_mode != thp_mode_never) {
+ assert(init_system_thp_mode == thp_mode_default);
+ pages_huge_unaligned(ptr, size);
+ } else if (opt_thp == thp_mode_never) {
+ assert(init_system_thp_mode == thp_mode_default ||
+ init_system_thp_mode == thp_mode_always);
+ pages_nohuge_unaligned(ptr, size);
+ }
+}
+
+static void
+init_thp_state(void) {
+ if (!have_madvise_huge) {
+ if (metadata_thp_enabled() && opt_abort) {
+ malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
+ abort();
+ }
+ goto label_error;
+ }
+
+ static const char sys_state_madvise[] = "always [madvise] never\n";
+ static const char sys_state_always[] = "[always] madvise never\n";
+ static const char sys_state_never[] = "always madvise [never]\n";
+ char buf[sizeof(sys_state_madvise)];
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+ int fd = (int)syscall(SYS_open,
+ "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+ int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+ if (fd == -1) {
+ goto label_error;
+ }
+
+ ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+ syscall(SYS_close, fd);
+#else
+ close(fd);
+#endif
+
+ if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
+ init_system_thp_mode = thp_mode_default;
+ } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
+ init_system_thp_mode = thp_mode_always;
+ } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
+ init_system_thp_mode = thp_mode_never;
+ } else {
+ goto label_error;
+ }
+ return;
+label_error:
+ opt_thp = init_system_thp_mode = thp_mode_not_supported;
+}
+
+bool
+pages_boot(void) {
+ os_page = os_page_detect();
+ if (os_page > PAGE) {
+ malloc_write("<jemalloc>: Unsupported system page size\n");
+ if (opt_abort) {
+ abort();
+ }
+ return true;
+ }
+
+#ifndef _WIN32
+ mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+ os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+ os_overcommits = os_overcommits_proc();
+# ifdef MAP_NORESERVE
+ if (os_overcommits) {
+ mmap_flags |= MAP_NORESERVE;
+ }
+# endif
+#else
+ os_overcommits = false;
+#endif
+
+ init_thp_state();
+
+ /* Detect lazy purge runtime support. */
+ if (pages_can_purge_lazy) {
+ bool committed = false;
+ void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+ if (madv_free_page == NULL) {
+ return true;
+ }
+ assert(pages_can_purge_lazy_runtime);
+ if (pages_purge_lazy(madv_free_page, PAGE)) {
+ pages_can_purge_lazy_runtime = false;
+ }
+ os_pages_unmap(madv_free_page, PAGE);
+ }
+
+ return false;
+}
diff --git a/deps/jemalloc/src/prng.c b/deps/jemalloc/src/prng.c
new file mode 100644
index 000000000..83c04bf9b
--- /dev/null
+++ b/deps/jemalloc/src/prng.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_PRNG_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c
index 7722b7b43..13df641a0 100644
--- a/deps/jemalloc/src/prof.c
+++ b/deps/jemalloc/src/prof.c
@@ -1,27 +1,41 @@
-#define JEMALLOC_PROF_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+
/******************************************************************************/
#ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
+#define UNW_LOCAL_ONLY
#include <libunwind.h>
#endif
#ifdef JEMALLOC_PROF_LIBGCC
+/*
+ * We have a circular dependency -- jemalloc_internal.h tells us if we should
+ * use libgcc's unwinding functionality, but after we've included that, we've
+ * already hooked _Unwind_Backtrace. We'll temporarily disable hooking.
+ */
+#undef _Unwind_Backtrace
#include <unwind.h>
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
#endif
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
-
bool opt_prof = false;
bool opt_prof_active = true;
+bool opt_prof_thread_active_init = true;
size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
bool opt_prof_gdump = false;
-bool opt_prof_final = true;
+bool opt_prof_final = false;
bool opt_prof_leak = false;
bool opt_prof_accum = false;
char opt_prof_prefix[
@@ -31,25 +45,66 @@ char opt_prof_prefix[
#endif
1];
+/*
+ * Initialized as opt_prof_active, and accessed via
+ * prof_active_[gs]et{_unlocked,}().
+ */
+bool prof_active;
+static malloc_mutex_t prof_active_mtx;
+
+/*
+ * Initialized as opt_prof_thread_active_init, and accessed via
+ * prof_thread_active_init_[gs]et().
+ */
+static bool prof_thread_active_init;
+static malloc_mutex_t prof_thread_active_init_mtx;
+
+/*
+ * Initialized as opt_prof_gdump, and accessed via
+ * prof_gdump_[gs]et{_unlocked,}().
+ */
+bool prof_gdump_val;
+static malloc_mutex_t prof_gdump_mtx;
+
uint64_t prof_interval = 0;
-bool prof_promote;
+
+size_t lg_prof_sample;
/*
- * Table of mutexes that are shared among ctx's. These are leaf locks, so
- * there is no problem with using them for more than one ctx at the same time.
- * The primary motivation for this sharing though is that ctx's are ephemeral,
+ * Table of mutexes that are shared among gctx's. These are leaf locks, so
+ * there is no problem with using them for more than one gctx at the same time.
+ * The primary motivation for this sharing though is that gctx's are ephemeral,
* and destroying mutexes causes complications for systems that allocate when
* creating/destroying mutexes.
*/
-static malloc_mutex_t *ctx_locks;
-static unsigned cum_ctxs; /* Atomic counter. */
+static malloc_mutex_t *gctx_locks;
+static atomic_u_t cum_gctxs; /* Atomic counter. */
/*
- * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
+ * Table of mutexes that are shared among tdata's. No operations require
+ * holding multiple tdata locks, so there is no problem with using them for more
+ * than one tdata at the same time, even though a gctx lock may be acquired
+ * while holding a tdata lock.
+ */
+static malloc_mutex_t *tdata_locks;
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data
* structure that knows about all backtraces currently captured.
*/
-static ckh_t bt2ctx;
-static malloc_mutex_t bt2ctx_mtx;
+static ckh_t bt2gctx;
+/* Non static to enable profiling. */
+malloc_mutex_t bt2gctx_mtx;
+
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t tdatas;
+static malloc_mutex_t tdatas_mtx;
+
+static uint64_t next_thr_uid;
+static malloc_mutex_t next_thr_uid_mtx;
static malloc_mutex_t prof_dump_seq_mtx;
static uint64_t prof_dump_seq;
@@ -70,161 +125,242 @@ static char prof_dump_buf[
1
#endif
];
-static unsigned prof_dump_buf_end;
+static size_t prof_dump_buf_end;
static int prof_dump_fd;
/* Do not dump any profiles until bootstrapping is complete. */
static bool prof_booted = false;
/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
-void
-bt_init(prof_bt_t *bt, void **vec)
-{
+static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
+static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+ bool even_if_attached);
+static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+ bool even_if_attached);
+static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
- cassert(config_prof);
+/******************************************************************************/
+/* Red-black trees. */
- bt->vec = vec;
- bt->len = 0;
+static int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
+ uint64_t a_thr_uid = a->thr_uid;
+ uint64_t b_thr_uid = b->thr_uid;
+ int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+ if (ret == 0) {
+ uint64_t a_thr_discrim = a->thr_discrim;
+ uint64_t b_thr_discrim = b->thr_discrim;
+ ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+ b_thr_discrim);
+ if (ret == 0) {
+ uint64_t a_tctx_uid = a->tctx_uid;
+ uint64_t b_tctx_uid = b->tctx_uid;
+ ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+ b_tctx_uid);
+ }
+ }
+ return ret;
}
-static void
-bt_destroy(prof_bt_t *bt)
-{
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+ tctx_link, prof_tctx_comp)
- cassert(config_prof);
+static int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+ unsigned a_len = a->bt.len;
+ unsigned b_len = b->bt.len;
+ unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+ int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+ if (ret == 0) {
+ ret = (a_len > b_len) - (a_len < b_len);
+ }
+ return ret;
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+ prof_gctx_comp)
- idalloc(bt);
+static int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+ int ret;
+ uint64_t a_uid = a->thr_uid;
+ uint64_t b_uid = b->thr_uid;
+
+ ret = ((a_uid > b_uid) - (a_uid < b_uid));
+ if (ret == 0) {
+ uint64_t a_discrim = a->thr_discrim;
+ uint64_t b_discrim = b->thr_discrim;
+
+ ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+ }
+ return ret;
}
-static prof_bt_t *
-bt_dup(prof_bt_t *bt)
-{
- prof_bt_t *ret;
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+ prof_tdata_comp)
+
+/******************************************************************************/
+
+void
+prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
+ prof_tdata_t *tdata;
cassert(config_prof);
- /*
- * Create a single allocation that has space for vec immediately
- * following the prof_bt_t structure. The backtraces that get
- * stored in the backtrace caches are copied from stack-allocated
- * temporary variables, so size is known at creation time. Making this
- * a contiguous object improves cache locality.
- */
- ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
- (bt->len * sizeof(void *)));
- if (ret == NULL)
- return (NULL);
- ret->vec = (void **)((uintptr_t)ret +
- QUANTUM_CEILING(sizeof(prof_bt_t)));
- memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
- ret->len = bt->len;
+ if (updated) {
+ /*
+ * Compute a new sample threshold. This isn't very important in
+ * practice, because this function is rarely executed, so the
+ * potential for sample bias is minimal except in contrived
+ * programs.
+ */
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata != NULL) {
+ prof_sample_threshold_update(tdata);
+ }
+ }
- return (ret);
+ if ((uintptr_t)tctx > (uintptr_t)1U) {
+ malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+ tctx->prepared = false;
+ if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
+ prof_tctx_destroy(tsd, tctx);
+ } else {
+ malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+ }
+ }
}
-static inline void
-prof_enter(prof_tdata_t *prof_tdata)
-{
+void
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+ prof_tctx_t *tctx) {
+ prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
- cassert(config_prof);
+ malloc_mutex_lock(tsdn, tctx->tdata->lock);
+ tctx->cnts.curobjs++;
+ tctx->cnts.curbytes += usize;
+ if (opt_prof_accum) {
+ tctx->cnts.accumobjs++;
+ tctx->cnts.accumbytes += usize;
+ }
+ tctx->prepared = false;
+ malloc_mutex_unlock(tsdn, tctx->tdata->lock);
+}
+
+void
+prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
+ malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+ assert(tctx->cnts.curobjs > 0);
+ assert(tctx->cnts.curbytes >= usize);
+ tctx->cnts.curobjs--;
+ tctx->cnts.curbytes -= usize;
+
+ if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
+ prof_tctx_destroy(tsd, tctx);
+ } else {
+ malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+ }
+}
- assert(prof_tdata->enq == false);
- prof_tdata->enq = true;
+void
+bt_init(prof_bt_t *bt, void **vec) {
+ cassert(config_prof);
- malloc_mutex_lock(&bt2ctx_mtx);
+ bt->vec = vec;
+ bt->len = 0;
}
-static inline void
-prof_leave(prof_tdata_t *prof_tdata)
-{
- bool idump, gdump;
+static void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
+ cassert(config_prof);
+ assert(tdata == prof_tdata_get(tsd, false));
+
+ if (tdata != NULL) {
+ assert(!tdata->enq);
+ tdata->enq = true;
+ }
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+}
+static void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
cassert(config_prof);
+ assert(tdata == prof_tdata_get(tsd, false));
+
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
- malloc_mutex_unlock(&bt2ctx_mtx);
+ if (tdata != NULL) {
+ bool idump, gdump;
- assert(prof_tdata->enq);
- prof_tdata->enq = false;
- idump = prof_tdata->enq_idump;
- prof_tdata->enq_idump = false;
- gdump = prof_tdata->enq_gdump;
- prof_tdata->enq_gdump = false;
+ assert(tdata->enq);
+ tdata->enq = false;
+ idump = tdata->enq_idump;
+ tdata->enq_idump = false;
+ gdump = tdata->enq_gdump;
+ tdata->enq_gdump = false;
- if (idump)
- prof_idump();
- if (gdump)
- prof_gdump();
+ if (idump) {
+ prof_idump(tsd_tsdn(tsd));
+ }
+ if (gdump) {
+ prof_gdump(tsd_tsdn(tsd));
+ }
+ }
}
#ifdef JEMALLOC_PROF_LIBUNWIND
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
-{
- unw_context_t uc;
- unw_cursor_t cursor;
- unsigned i;
- int err;
+prof_backtrace(prof_bt_t *bt) {
+ int nframes;
cassert(config_prof);
assert(bt->len == 0);
assert(bt->vec != NULL);
- unw_getcontext(&uc);
- unw_init_local(&cursor, &uc);
-
- /* Throw away (nignore+1) stack frames, if that many exist. */
- for (i = 0; i < nignore + 1; i++) {
- err = unw_step(&cursor);
- if (err <= 0)
- return;
- }
-
- /*
- * Iterate over stack frames until there are no more, or until no space
- * remains in bt.
- */
- for (i = 0; i < PROF_BT_MAX; i++) {
- unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
- bt->len++;
- err = unw_step(&cursor);
- if (err <= 0)
- break;
+ nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+ if (nframes <= 0) {
+ return;
}
+ bt->len = nframes;
}
#elif (defined(JEMALLOC_PROF_LIBGCC))
static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
-
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
cassert(config_prof);
- return (_URC_NO_REASON);
+ return _URC_NO_REASON;
}
static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
+prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+ void *ip;
cassert(config_prof);
- if (data->nignore > 0)
- data->nignore--;
- else {
- data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
- data->bt->len++;
- if (data->bt->len == data->max)
- return (_URC_END_OF_STACK);
+ ip = (void *)_Unwind_GetIP(context);
+ if (ip == NULL) {
+ return _URC_END_OF_STACK;
+ }
+ data->bt->vec[data->bt->len] = ip;
+ data->bt->len++;
+ if (data->bt->len == data->max) {
+ return _URC_END_OF_STACK;
}
- return (_URC_NO_REASON);
+ return _URC_NO_REASON;
}
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
-{
- prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
+prof_backtrace(prof_bt_t *bt) {
+ prof_unwind_data_t data = {bt, PROF_BT_MAX};
cassert(config_prof);
@@ -232,25 +368,24 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
}
#elif (defined(JEMALLOC_PROF_GCC))
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
-{
-#define BT_FRAME(i) \
- if ((i) < nignore + PROF_BT_MAX) { \
+prof_backtrace(prof_bt_t *bt) {
+#define BT_FRAME(i) \
+ if ((i) < PROF_BT_MAX) { \
void *p; \
- if (__builtin_frame_address(i) == 0) \
+ if (__builtin_frame_address(i) == 0) { \
return; \
+ } \
p = __builtin_return_address(i); \
- if (p == NULL) \
+ if (p == NULL) { \
return; \
- if (i >= nignore) { \
- bt->vec[(i) - nignore] = p; \
- bt->len = (i) - nignore + 1; \
} \
- } else \
- return;
+ bt->vec[(i)] = p; \
+ bt->len = (i) + 1; \
+ } else { \
+ return; \
+ }
cassert(config_prof);
- assert(nignore <= 3);
BT_FRAME(0)
BT_FRAME(1)
@@ -392,330 +527,475 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore)
BT_FRAME(125)
BT_FRAME(126)
BT_FRAME(127)
-
- /* Extras to compensate for nignore. */
- BT_FRAME(128)
- BT_FRAME(129)
- BT_FRAME(130)
#undef BT_FRAME
}
#else
void
-prof_backtrace(prof_bt_t *bt, unsigned nignore)
-{
-
+prof_backtrace(prof_bt_t *bt) {
cassert(config_prof);
not_reached();
}
#endif
static malloc_mutex_t *
-prof_ctx_mutex_choose(void)
-{
- unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
+prof_gctx_mutex_choose(void) {
+ unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
- return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
+ return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
}
-static void
-prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
-{
+static malloc_mutex_t *
+prof_tdata_mutex_choose(uint64_t thr_uid) {
+ return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
+}
- ctx->bt = bt;
- ctx->lock = prof_ctx_mutex_choose();
+static prof_gctx_t *
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
+ /*
+ * Create a single allocation that has space for vec of length bt->len.
+ */
+ size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+ prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+ sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+ true);
+ if (gctx == NULL) {
+ return NULL;
+ }
+ gctx->lock = prof_gctx_mutex_choose();
/*
* Set nlimbo to 1, in order to avoid a race condition with
- * prof_ctx_merge()/prof_ctx_destroy().
+ * prof_tctx_destroy()/prof_gctx_try_destroy().
*/
- ctx->nlimbo = 1;
- ql_elm_new(ctx, dump_link);
- memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
- ql_new(&ctx->cnts_ql);
+ gctx->nlimbo = 1;
+ tctx_tree_new(&gctx->tctxs);
+ /* Duplicate bt. */
+ memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+ gctx->bt.vec = gctx->vec;
+ gctx->bt.len = bt->len;
+ return gctx;
}
static void
-prof_ctx_destroy(prof_ctx_t *ctx)
-{
- prof_tdata_t *prof_tdata;
-
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+ prof_tdata_t *tdata) {
cassert(config_prof);
/*
- * Check that ctx is still unused by any thread cache before destroying
- * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
- * condition with this function, as does prof_ctx_merge() in order to
- * avoid a race between the main body of prof_ctx_merge() and entry
+ * Check that gctx is still unused by any thread cache before destroying
+ * it. prof_lookup() increments gctx->nlimbo in order to avoid a race
+ * condition with this function, as does prof_tctx_destroy() in order to
+ * avoid a race between the main body of prof_tctx_destroy() and entry
* into this function.
*/
- prof_tdata = prof_tdata_get(false);
- assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
- prof_enter(prof_tdata);
- malloc_mutex_lock(ctx->lock);
- if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
- ctx->nlimbo == 1) {
- assert(ctx->cnt_merged.curbytes == 0);
- assert(ctx->cnt_merged.accumobjs == 0);
- assert(ctx->cnt_merged.accumbytes == 0);
- /* Remove ctx from bt2ctx. */
- if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+ prof_enter(tsd, tdata_self);
+ malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+ assert(gctx->nlimbo != 0);
+ if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+ /* Remove gctx from bt2gctx. */
+ if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
not_reached();
- prof_leave(prof_tdata);
- /* Destroy ctx. */
- malloc_mutex_unlock(ctx->lock);
- bt_destroy(ctx->bt);
- idalloc(ctx);
+ }
+ prof_leave(tsd, tdata_self);
+ /* Destroy gctx. */
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+ idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
} else {
/*
- * Compensate for increment in prof_ctx_merge() or
+ * Compensate for increment in prof_tctx_destroy() or
* prof_lookup().
*/
- ctx->nlimbo--;
- malloc_mutex_unlock(ctx->lock);
- prof_leave(prof_tdata);
+ gctx->nlimbo--;
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+ prof_leave(tsd, tdata_self);
}
}
-static void
-prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
-{
- bool destroy;
+static bool
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
+ malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
- cassert(config_prof);
+ if (opt_prof_accum) {
+ return false;
+ }
+ if (tctx->cnts.curobjs != 0) {
+ return false;
+ }
+ if (tctx->prepared) {
+ return false;
+ }
+ return true;
+}
- /* Merge cnt stats and detach from ctx. */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
- ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
- ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
- ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
- ql_remove(&ctx->cnts_ql, cnt, cnts_link);
- if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
- ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+ if (opt_prof_accum) {
+ return false;
+ }
+ if (!tctx_tree_empty(&gctx->tctxs)) {
+ return false;
+ }
+ if (gctx->nlimbo != 0) {
+ return false;
+ }
+ return true;
+}
+
+static void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+ prof_tdata_t *tdata = tctx->tdata;
+ prof_gctx_t *gctx = tctx->gctx;
+ bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+ assert(tctx->cnts.curobjs == 0);
+ assert(tctx->cnts.curbytes == 0);
+ assert(!opt_prof_accum);
+ assert(tctx->cnts.accumobjs == 0);
+ assert(tctx->cnts.accumbytes == 0);
+
+ ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+ destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+ malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+
+ malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ tctx_tree_remove(&gctx->tctxs, tctx);
+ destroy_tctx = true;
+ if (prof_gctx_should_destroy(gctx)) {
+ /*
+ * Increment gctx->nlimbo in order to keep another
+ * thread from winning the race to destroy gctx while
+ * this one has gctx->lock dropped. Without this, it
+ * would be possible for another thread to:
+ *
+ * 1) Sample an allocation associated with gctx.
+ * 2) Deallocate the sampled object.
+ * 3) Successfully prof_gctx_try_destroy(gctx).
+ *
+ * The result would be that gctx no longer exists by the
+ * time this thread accesses it in
+ * prof_gctx_try_destroy().
+ */
+ gctx->nlimbo++;
+ destroy_gctx = true;
+ } else {
+ destroy_gctx = false;
+ }
+ break;
+ case prof_tctx_state_dumping:
/*
- * Increment ctx->nlimbo in order to keep another thread from
- * winning the race to destroy ctx while this one has ctx->lock
- * dropped. Without this, it would be possible for another
- * thread to:
- *
- * 1) Sample an allocation associated with ctx.
- * 2) Deallocate the sampled object.
- * 3) Successfully prof_ctx_destroy(ctx).
- *
- * The result would be that ctx no longer exists by the time
- * this thread accesses it in prof_ctx_destroy().
+ * A dumping thread needs tctx to remain valid until dumping
+ * has finished. Change state such that the dumping thread will
+ * complete destruction during a late dump iteration phase.
*/
- ctx->nlimbo++;
- destroy = true;
- } else
- destroy = false;
- malloc_mutex_unlock(ctx->lock);
- if (destroy)
- prof_ctx_destroy(ctx);
+ tctx->state = prof_tctx_state_purgatory;
+ destroy_tctx = false;
+ destroy_gctx = false;
+ break;
+ default:
+ not_reached();
+ destroy_tctx = false;
+ destroy_gctx = false;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+ if (destroy_gctx) {
+ prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+ tdata);
+ }
+
+ malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+ if (destroy_tdata) {
+ prof_tdata_destroy(tsd, tdata, false);
+ }
+
+ if (destroy_tctx) {
+ idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
+ }
}
static bool
-prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
- prof_ctx_t **p_ctx, bool *p_new_ctx)
-{
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+ void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
union {
- prof_ctx_t *p;
+ prof_gctx_t *p;
void *v;
- } ctx;
+ } gctx, tgctx;
union {
prof_bt_t *p;
void *v;
} btkey;
- bool new_ctx;
+ bool new_gctx;
- prof_enter(prof_tdata);
- if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
+ prof_enter(tsd, tdata);
+ if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
/* bt has never been seen before. Insert it. */
- ctx.v = imalloc(sizeof(prof_ctx_t));
- if (ctx.v == NULL) {
- prof_leave(prof_tdata);
- return (true);
- }
- btkey.p = bt_dup(bt);
- if (btkey.v == NULL) {
- prof_leave(prof_tdata);
- idalloc(ctx.v);
- return (true);
+ prof_leave(tsd, tdata);
+ tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+ if (tgctx.v == NULL) {
+ return true;
}
- prof_ctx_init(ctx.p, btkey.p);
- if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
- /* OOM. */
- prof_leave(prof_tdata);
- idalloc(btkey.v);
- idalloc(ctx.v);
- return (true);
+ prof_enter(tsd, tdata);
+ if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+ gctx.p = tgctx.p;
+ btkey.p = &gctx.p->bt;
+ if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+ /* OOM. */
+ prof_leave(tsd, tdata);
+ idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+ true, true);
+ return true;
+ }
+ new_gctx = true;
+ } else {
+ new_gctx = false;
}
- new_ctx = true;
} else {
+ tgctx.v = NULL;
+ new_gctx = false;
+ }
+
+ if (!new_gctx) {
/*
* Increment nlimbo, in order to avoid a race condition with
- * prof_ctx_merge()/prof_ctx_destroy().
+ * prof_tctx_destroy()/prof_gctx_try_destroy().
*/
- malloc_mutex_lock(ctx.p->lock);
- ctx.p->nlimbo++;
- malloc_mutex_unlock(ctx.p->lock);
- new_ctx = false;
+ malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
+ gctx.p->nlimbo++;
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
+ new_gctx = false;
+
+ if (tgctx.v != NULL) {
+ /* Lost race to insert. */
+ idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+ true);
+ }
}
- prof_leave(prof_tdata);
+ prof_leave(tsd, tdata);
*p_btkey = btkey.v;
- *p_ctx = ctx.p;
- *p_new_ctx = new_ctx;
- return (false);
+ *p_gctx = gctx.p;
+ *p_new_gctx = new_gctx;
+ return false;
}
-prof_thr_cnt_t *
-prof_lookup(prof_bt_t *bt)
-{
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
union {
- prof_thr_cnt_t *p;
+ prof_tctx_t *p;
void *v;
} ret;
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
+ bool not_found;
cassert(config_prof);
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
- return (NULL);
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL) {
+ return NULL;
+ }
- if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
+ malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+ not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+ if (!not_found) { /* Note double negative! */
+ ret.p->prepared = true;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+ if (not_found) {
void *btkey;
- prof_ctx_t *ctx;
- bool new_ctx;
+ prof_gctx_t *gctx;
+ bool new_gctx, error;
/*
* This thread's cache lacks bt. Look for it in the global
* cache.
*/
- if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
- return (NULL);
+ if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+ &new_gctx)) {
+ return NULL;
+ }
- /* Link a prof_thd_cnt_t into ctx for this thread. */
- if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
- assert(ckh_count(&prof_tdata->bt2cnt) > 0);
- /*
- * Flush the least recently used cnt in order to keep
- * bt2cnt from becoming too large.
- */
- ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
- assert(ret.v != NULL);
- if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
- NULL, NULL))
- not_reached();
- ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
- prof_ctx_merge(ret.p->ctx, ret.p);
- /* ret can now be re-used. */
- } else {
- assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
- /* Allocate and partially initialize a new cnt. */
- ret.v = imalloc(sizeof(prof_thr_cnt_t));
- if (ret.p == NULL) {
- if (new_ctx)
- prof_ctx_destroy(ctx);
- return (NULL);
+ /* Link a prof_tctx_t into gctx for this thread. */
+ ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+ sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
+ arena_ichoose(tsd, NULL), true);
+ if (ret.p == NULL) {
+ if (new_gctx) {
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
}
- ql_elm_new(ret.p, cnts_link);
- ql_elm_new(ret.p, lru_link);
+ return NULL;
}
- /* Finish initializing ret. */
- ret.p->ctx = ctx;
- ret.p->epoch = 0;
+ ret.p->tdata = tdata;
+ ret.p->thr_uid = tdata->thr_uid;
+ ret.p->thr_discrim = tdata->thr_discrim;
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
- if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
- if (new_ctx)
- prof_ctx_destroy(ctx);
- idalloc(ret.v);
- return (NULL);
+ ret.p->gctx = gctx;
+ ret.p->tctx_uid = tdata->tctx_uid_next++;
+ ret.p->prepared = true;
+ ret.p->state = prof_tctx_state_initializing;
+ malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+ error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+ malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+ if (error) {
+ if (new_gctx) {
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+ }
+ idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
+ return NULL;
}
- ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
- malloc_mutex_lock(ctx->lock);
- ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
- ctx->nlimbo--;
- malloc_mutex_unlock(ctx->lock);
- } else {
- /* Move ret to the front of the LRU. */
- ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
- ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+ malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+ ret.p->state = prof_tctx_state_nominal;
+ tctx_tree_insert(&gctx->tctxs, ret.p);
+ gctx->nlimbo--;
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
}
- return (ret.p);
+ return ret.p;
+}
+
+/*
+ * The bodies of this function and prof_leakcheck() are compiled out unless heap
+ * profiling is enabled, so that it is possible to compile jemalloc with
+ * floating point support completely disabled. Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a workaround for
+ * versions of glibc that don't properly save/restore floating point registers
+ * during dynamic lazy symbol loading (which internally calls into whatever
+ * malloc implementation happens to be integrated into the application). Note
+ * that some compilers (e.g. gcc 4.8) may use floating point registers for fast
+ * memory moves, so jemalloc must be compiled with such optimizations disabled
+ * (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
+void
+prof_sample_threshold_update(prof_tdata_t *tdata) {
+#ifdef JEMALLOC_PROF
+ uint64_t r;
+ double u;
+
+ if (!config_prof) {
+ return;
+ }
+
+ if (lg_prof_sample == 0) {
+ tdata->bytes_until_sample = 0;
+ return;
+ }
+
+ /*
+ * Compute sample interval as a geometrically distributed random
+ * variable with mean (2^lg_prof_sample).
+ *
+ * __ __
+ * | log(u) | 1
+ * tdata->bytes_until_sample = | -------- |, where p = ---------------
+ * | log(1-p) | lg_prof_sample
+ * 2
+ *
+ * For more information on the math, see:
+ *
+ * Non-Uniform Random Variate Generation
+ * Luc Devroye
+ * Springer-Verlag, New York, 1986
+ * pp 500
+ * (http://luc.devroye.org/rnbookindex.html)
+ */
+ r = prng_lg_range_u64(&tdata->prng_state, 53);
+ u = (double)r * (1.0/9007199254740992.0L);
+ tdata->bytes_until_sample = (uint64_t)(log(u) /
+ log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+ + (uint64_t)1U;
+#endif
}
#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+ void *arg) {
+ size_t *tdata_count = (size_t *)arg;
+
+ (*tdata_count)++;
+
+ return NULL;
+}
+
+size_t
+prof_tdata_count(void) {
+ size_t tdata_count = 0;
+ tsdn_t *tsdn;
+
+ tsdn = tsdn_fetch();
+ malloc_mutex_lock(tsdn, &tdatas_mtx);
+ tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+ (void *)&tdata_count);
+ malloc_mutex_unlock(tsdn, &tdatas_mtx);
+
+ return tdata_count;
+}
+
size_t
-prof_bt_count(void)
-{
+prof_bt_count(void) {
size_t bt_count;
- prof_tdata_t *prof_tdata;
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
- return (0);
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL) {
+ return 0;
+ }
- prof_enter(prof_tdata);
- bt_count = ckh_count(&bt2ctx);
- prof_leave(prof_tdata);
+ malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+ bt_count = ckh_count(&bt2gctx);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
- return (bt_count);
+ return bt_count;
}
#endif
-#ifdef JEMALLOC_JET
-#undef prof_dump_open
-#define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
-#endif
static int
-prof_dump_open(bool propagate_err, const char *filename)
-{
+prof_dump_open_impl(bool propagate_err, const char *filename) {
int fd;
fd = creat(filename, 0644);
- if (fd == -1 && propagate_err == false) {
+ if (fd == -1 && !propagate_err) {
malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
filename);
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
}
- return (fd);
+ return fd;
}
-#ifdef JEMALLOC_JET
-#undef prof_dump_open
-#define prof_dump_open JEMALLOC_N(prof_dump_open)
-prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
-#endif
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
static bool
-prof_dump_flush(bool propagate_err)
-{
+prof_dump_flush(bool propagate_err) {
bool ret = false;
ssize_t err;
cassert(config_prof);
- err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+ err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
if (err == -1) {
- if (propagate_err == false) {
+ if (!propagate_err) {
malloc_write("<jemalloc>: write() failed during heap "
"profile flush\n");
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
}
ret = true;
}
prof_dump_buf_end = 0;
- return (ret);
+ return ret;
}
static bool
-prof_dump_close(bool propagate_err)
-{
+prof_dump_close(bool propagate_err) {
bool ret;
assert(prof_dump_fd != -1);
@@ -723,13 +1003,12 @@ prof_dump_close(bool propagate_err)
close(prof_dump_fd);
prof_dump_fd = -1;
- return (ret);
+ return ret;
}
static bool
-prof_dump_write(bool propagate_err, const char *s)
-{
- unsigned i, slen, n;
+prof_dump_write(bool propagate_err, const char *s) {
+ size_t i, slen, n;
cassert(config_prof);
@@ -737,9 +1016,11 @@ prof_dump_write(bool propagate_err, const char *s)
slen = strlen(s);
while (i < slen) {
/* Flush the buffer if it is full. */
- if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
- if (prof_dump_flush(propagate_err) && propagate_err)
- return (true);
+ if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+ if (prof_dump_flush(propagate_err) && propagate_err) {
+ return true;
+ }
+ }
if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
/* Finish writing. */
@@ -753,13 +1034,12 @@ prof_dump_write(bool propagate_err, const char *s)
i += n;
}
- return (false);
+ return false;
}
-JEMALLOC_ATTR(format(printf, 2, 3))
+JEMALLOC_FORMAT_PRINTF(2, 3)
static bool
-prof_dump_printf(bool propagate_err, const char *format, ...)
-{
+prof_dump_printf(bool propagate_err, const char *format, ...) {
bool ret;
va_list ap;
char buf[PROF_PRINTF_BUFSIZE];
@@ -769,179 +1049,409 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
va_end(ap);
ret = prof_dump_write(propagate_err, buf);
- return (ret);
+ return ret;
}
static void
-prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
- prof_ctx_list_t *ctx_ql)
-{
- prof_thr_cnt_t *thr_cnt;
- prof_cnt_t tcnt;
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
+ malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+ malloc_mutex_lock(tsdn, tctx->gctx->lock);
+ switch (tctx->state) {
+ case prof_tctx_state_initializing:
+ malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+ return;
+ case prof_tctx_state_nominal:
+ tctx->state = prof_tctx_state_dumping;
+ malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+
+ memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
+
+ tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+ tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+ if (opt_prof_accum) {
+ tdata->cnt_summed.accumobjs +=
+ tctx->dump_cnts.accumobjs;
+ tdata->cnt_summed.accumbytes +=
+ tctx->dump_cnts.accumbytes;
+ }
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
+ not_reached();
+ }
+}
+
+static void
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
+ malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+ gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+ gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+ if (opt_prof_accum) {
+ gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+ gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+ }
+}
+
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+ tsdn_t *tsdn = (tsdn_t *)arg;
+
+ malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ /* New since dumping started; ignore. */
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
+ prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
+ break;
+ default:
+ not_reached();
+ }
+
+ return NULL;
+}
+
+struct prof_tctx_dump_iter_arg_s {
+ tsdn_t *tsdn;
+ bool propagate_err;
+};
+
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+ struct prof_tctx_dump_iter_arg_s *arg =
+ (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+ malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+
+ switch (tctx->state) {
+ case prof_tctx_state_initializing:
+ case prof_tctx_state_nominal:
+ /* Not captured by this dump. */
+ break;
+ case prof_tctx_state_dumping:
+ case prof_tctx_state_purgatory:
+ if (prof_dump_printf(arg->propagate_err,
+ " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+ "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+ tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+ tctx->dump_cnts.accumbytes)) {
+ return tctx;
+ }
+ break;
+ default:
+ not_reached();
+ }
+ return NULL;
+}
+
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+ tsdn_t *tsdn = (tsdn_t *)arg;
+ prof_tctx_t *ret;
+
+ malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+ switch (tctx->state) {
+ case prof_tctx_state_nominal:
+ /* New since dumping started; ignore. */
+ break;
+ case prof_tctx_state_dumping:
+ tctx->state = prof_tctx_state_nominal;
+ break;
+ case prof_tctx_state_purgatory:
+ ret = tctx;
+ goto label_return;
+ default:
+ not_reached();
+ }
+
+ ret = NULL;
+label_return:
+ return ret;
+}
+
+static void
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
cassert(config_prof);
- malloc_mutex_lock(ctx->lock);
+ malloc_mutex_lock(tsdn, gctx->lock);
/*
- * Increment nlimbo so that ctx won't go away before dump.
- * Additionally, link ctx into the dump list so that it is included in
+ * Increment nlimbo so that gctx won't go away before dump.
+ * Additionally, link gctx into the dump list so that it is included in
* prof_dump()'s second pass.
*/
- ctx->nlimbo++;
- ql_tail_insert(ctx_ql, ctx, dump_link);
+ gctx->nlimbo++;
+ gctx_tree_insert(gctxs, gctx);
- memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
- ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
- volatile unsigned *epoch = &thr_cnt->epoch;
+ memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
- while (true) {
- unsigned epoch0 = *epoch;
+ malloc_mutex_unlock(tsdn, gctx->lock);
+}
- /* Make sure epoch is even. */
- if (epoch0 & 1U)
- continue;
+struct prof_gctx_merge_iter_arg_s {
+ tsdn_t *tsdn;
+ size_t leak_ngctx;
+};
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+ struct prof_gctx_merge_iter_arg_s *arg =
+ (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+ malloc_mutex_lock(arg->tsdn, gctx->lock);
+ tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+ (void *)arg->tsdn);
+ if (gctx->cnt_summed.curobjs != 0) {
+ arg->leak_ngctx++;
+ }
+ malloc_mutex_unlock(arg->tsdn, gctx->lock);
- memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
+ return NULL;
+}
- /* Terminate if epoch didn't change while reading. */
- if (*epoch == epoch0)
- break;
- }
+static void
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
+ prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+ prof_gctx_t *gctx;
- ctx->cnt_summed.curobjs += tcnt.curobjs;
- ctx->cnt_summed.curbytes += tcnt.curbytes;
- if (opt_prof_accum) {
- ctx->cnt_summed.accumobjs += tcnt.accumobjs;
- ctx->cnt_summed.accumbytes += tcnt.accumbytes;
+ /*
+ * Standard tree iteration won't work here, because as soon as we
+ * decrement gctx->nlimbo and unlock gctx, another thread can
+ * concurrently destroy it, which will corrupt the tree. Therefore,
+ * tear down the tree one node at a time during iteration.
+ */
+ while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+ gctx_tree_remove(gctxs, gctx);
+ malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+ {
+ prof_tctx_t *next;
+
+ next = NULL;
+ do {
+ prof_tctx_t *to_destroy =
+ tctx_tree_iter(&gctx->tctxs, next,
+ prof_tctx_finish_iter,
+ (void *)tsd_tsdn(tsd));
+ if (to_destroy != NULL) {
+ next = tctx_tree_next(&gctx->tctxs,
+ to_destroy);
+ tctx_tree_remove(&gctx->tctxs,
+ to_destroy);
+ idalloctm(tsd_tsdn(tsd), to_destroy,
+ NULL, NULL, true, true);
+ } else {
+ next = NULL;
+ }
+ } while (next != NULL);
+ }
+ gctx->nlimbo--;
+ if (prof_gctx_should_destroy(gctx)) {
+ gctx->nlimbo++;
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+ prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+ } else {
+ malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
}
}
+}
- if (ctx->cnt_summed.curobjs != 0)
- (*leak_nctx)++;
+struct prof_tdata_merge_iter_arg_s {
+ tsdn_t *tsdn;
+ prof_cnt_t cnt_all;
+};
+
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+ void *opaque) {
+ struct prof_tdata_merge_iter_arg_s *arg =
+ (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+ malloc_mutex_lock(arg->tsdn, tdata->lock);
+ if (!tdata->expired) {
+ size_t tabind;
+ union {
+ prof_tctx_t *p;
+ void *v;
+ } tctx;
+
+ tdata->dumping = true;
+ memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+ for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+ &tctx.v);) {
+ prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+ }
- /* Add to cnt_all. */
- cnt_all->curobjs += ctx->cnt_summed.curobjs;
- cnt_all->curbytes += ctx->cnt_summed.curbytes;
- if (opt_prof_accum) {
- cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
- cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
+ arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+ arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+ if (opt_prof_accum) {
+ arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+ arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+ }
+ } else {
+ tdata->dumping = false;
}
+ malloc_mutex_unlock(arg->tsdn, tdata->lock);
- malloc_mutex_unlock(ctx->lock);
+ return NULL;
}
-static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
-{
-
- if (opt_lg_prof_sample == 0) {
- if (prof_dump_printf(propagate_err,
- "heap profile: %"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
- cnt_all->curobjs, cnt_all->curbytes,
- cnt_all->accumobjs, cnt_all->accumbytes))
- return (true);
- } else {
- if (prof_dump_printf(propagate_err,
- "heap profile: %"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
- cnt_all->curobjs, cnt_all->curbytes,
- cnt_all->accumobjs, cnt_all->accumbytes,
- ((uint64_t)1U << opt_lg_prof_sample)))
- return (true);
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+ void *arg) {
+ bool propagate_err = *(bool *)arg;
+
+ if (!tdata->dumping) {
+ return NULL;
}
- return (false);
+ if (prof_dump_printf(propagate_err,
+ " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+ tdata->thr_uid, tdata->cnt_summed.curobjs,
+ tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+ tdata->cnt_summed.accumbytes,
+ (tdata->thread_name != NULL) ? " " : "",
+ (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
+ return tdata;
+ }
+ return NULL;
}
-static void
-prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
-{
-
- ctx->nlimbo--;
- ql_remove(ctx_ql, ctx, dump_link);
-}
+static bool
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+ const prof_cnt_t *cnt_all) {
+ bool ret;
-static void
-prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
-{
+ if (prof_dump_printf(propagate_err,
+ "heap_v2/%"FMTu64"\n"
+ " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+ ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+ cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
+ return true;
+ }
- malloc_mutex_lock(ctx->lock);
- prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
- malloc_mutex_unlock(ctx->lock);
+ malloc_mutex_lock(tsdn, &tdatas_mtx);
+ ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+ (void *)&propagate_err) != NULL);
+ malloc_mutex_unlock(tsdn, &tdatas_mtx);
+ return ret;
}
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
static bool
-prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
- prof_ctx_list_t *ctx_ql)
-{
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+ const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
bool ret;
unsigned i;
+ struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
cassert(config_prof);
-
- /*
- * Current statistics can sum to 0 as a result of unmerged per thread
- * statistics. Additionally, interval- and growth-triggered dumps can
- * occur between the time a ctx is created and when its statistics are
- * filled in. Avoid dumping any ctx that is an artifact of either
- * implementation detail.
- */
- malloc_mutex_lock(ctx->lock);
- if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
- (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
- assert(ctx->cnt_summed.curobjs == 0);
- assert(ctx->cnt_summed.curbytes == 0);
- assert(ctx->cnt_summed.accumobjs == 0);
- assert(ctx->cnt_summed.accumbytes == 0);
+ malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+ /* Avoid dumping such gctx's that have no useful data. */
+ if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+ (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+ assert(gctx->cnt_summed.curobjs == 0);
+ assert(gctx->cnt_summed.curbytes == 0);
+ assert(gctx->cnt_summed.accumobjs == 0);
+ assert(gctx->cnt_summed.accumbytes == 0);
ret = false;
goto label_return;
}
- if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
- " [%"PRIu64": %"PRIu64"] @",
- ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
- ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
+ if (prof_dump_printf(propagate_err, "@")) {
ret = true;
goto label_return;
}
-
for (i = 0; i < bt->len; i++) {
- if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
+ if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
(uintptr_t)bt->vec[i])) {
ret = true;
goto label_return;
}
}
- if (prof_dump_write(propagate_err, "\n")) {
+ if (prof_dump_printf(propagate_err,
+ "\n"
+ " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+ gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+ gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+ ret = true;
+ goto label_return;
+ }
+
+ prof_tctx_dump_iter_arg.tsdn = tsdn;
+ prof_tctx_dump_iter_arg.propagate_err = propagate_err;
+ if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+ (void *)&prof_tctx_dump_iter_arg) != NULL) {
ret = true;
goto label_return;
}
ret = false;
label_return:
- prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
- malloc_mutex_unlock(ctx->lock);
- return (ret);
+ return ret;
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...) {
+ int mfd;
+ va_list ap;
+ char filename[PATH_MAX + 1];
+
+ va_start(ap, format);
+ malloc_vsnprintf(filename, sizeof(filename), format, ap);
+ va_end(ap);
+
+#if defined(O_CLOEXEC)
+ mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+ mfd = open(filename, O_RDONLY);
+ if (mfd != -1) {
+ fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+ }
+#endif
+
+ return mfd;
+}
+#endif
+
+static int
+prof_getpid(void) {
+#ifdef _WIN32
+ return GetCurrentProcessId();
+#else
+ return getpid();
+#endif
}
static bool
-prof_dump_maps(bool propagate_err)
-{
+prof_dump_maps(bool propagate_err) {
bool ret;
int mfd;
- char filename[PATH_MAX + 1];
cassert(config_prof);
#ifdef __FreeBSD__
- malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
+ mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+ mfd = -1; // Not implemented
#else
- malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
- (int)getpid());
+ {
+ int pid = prof_getpid();
+
+ mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+ if (mfd == -1) {
+ mfd = prof_open_maps("/proc/%d/maps", pid);
+ }
+ }
#endif
- mfd = open(filename, O_RDONLY);
if (mfd != -1) {
ssize_t nread;
@@ -961,8 +1471,9 @@ prof_dump_maps(bool propagate_err)
goto label_return;
}
}
- nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
- PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+ nread = malloc_read_fd(mfd,
+ &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+ - prof_dump_buf_end);
} while (nread > 0);
} else {
ret = true;
@@ -971,214 +1482,391 @@ prof_dump_maps(bool propagate_err)
ret = false;
label_return:
- if (mfd != -1)
+ if (mfd != -1) {
close(mfd);
- return (ret);
+ }
+ return ret;
}
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
- const char *filename)
-{
-
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
+ const char *filename) {
+#ifdef JEMALLOC_PROF
+ /*
+ * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+ * differ slightly from what jeprof reports, because here we scale the
+ * summary values, whereas jeprof scales each context individually and
+ * reports the sums of the scaled values.
+ */
if (cnt_all->curbytes != 0) {
- malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
- PRId64" object%s, %zu context%s\n",
- cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
- cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
- leak_nctx, (leak_nctx != 1) ? "s" : "");
+ double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+ double ratio = (((double)cnt_all->curbytes) /
+ (double)cnt_all->curobjs) / sample_period;
+ double scale_factor = 1.0 / (1.0 - exp(-ratio));
+ uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+ * scale_factor);
+ uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+ scale_factor);
+
+ malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+ " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+ curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+ 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
malloc_printf(
- "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
+ "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
filename);
}
+#endif
}
-static bool
-prof_dump(bool propagate_err, const char *filename, bool leakcheck)
-{
- prof_tdata_t *prof_tdata;
- prof_cnt_t cnt_all;
+struct prof_gctx_dump_iter_arg_s {
+ tsdn_t *tsdn;
+ bool propagate_err;
+};
+
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+ prof_gctx_t *ret;
+ struct prof_gctx_dump_iter_arg_s *arg =
+ (struct prof_gctx_dump_iter_arg_s *)opaque;
+
+ malloc_mutex_lock(arg->tsdn, gctx->lock);
+
+ if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+ gctxs)) {
+ ret = gctx;
+ goto label_return;
+ }
+
+ ret = NULL;
+label_return:
+ malloc_mutex_unlock(arg->tsdn, gctx->lock);
+ return ret;
+}
+
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+ struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+ struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+ prof_gctx_tree_t *gctxs) {
size_t tabind;
union {
- prof_ctx_t *p;
+ prof_gctx_t *p;
void *v;
- } ctx;
- size_t leak_nctx;
- prof_ctx_list_t ctx_ql;
-
- cassert(config_prof);
+ } gctx;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
- return (true);
+ prof_enter(tsd, tdata);
- malloc_mutex_lock(&prof_dump_mtx);
+ /*
+ * Put gctx's in limbo and clear their counters in preparation for
+ * summing.
+ */
+ gctx_tree_new(gctxs);
+ for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+ prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+ }
- /* Merge per thread profile stats, and sum them in cnt_all. */
- memset(&cnt_all, 0, sizeof(prof_cnt_t));
- leak_nctx = 0;
- ql_new(&ctx_ql);
- prof_enter(prof_tdata);
- for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
- prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
- prof_leave(prof_tdata);
+ /*
+ * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+ * stats and merge them into the associated gctx's.
+ */
+ prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+ memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+ malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+ tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+ (void *)prof_tdata_merge_iter_arg);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+ /* Merge tctx stats into gctx's. */
+ prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+ prof_gctx_merge_iter_arg->leak_ngctx = 0;
+ gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+ (void *)prof_gctx_merge_iter_arg);
+
+ prof_leave(tsd, tdata);
+}
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+ bool leakcheck, prof_tdata_t *tdata,
+ struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+ struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+ struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+ prof_gctx_tree_t *gctxs) {
/* Create dump file. */
- if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
- goto label_open_close_error;
+ if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+ return true;
+ }
/* Dump profile header. */
- if (prof_dump_header(propagate_err, &cnt_all))
+ if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+ &prof_tdata_merge_iter_arg->cnt_all)) {
goto label_write_error;
+ }
- /* Dump per ctx profile stats. */
- while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
- if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
- goto label_write_error;
+ /* Dump per gctx profile stats. */
+ prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+ prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+ if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+ (void *)prof_gctx_dump_iter_arg) != NULL) {
+ goto label_write_error;
}
/* Dump /proc/<pid>/maps if possible. */
- if (prof_dump_maps(propagate_err))
+ if (prof_dump_maps(propagate_err)) {
goto label_write_error;
+ }
- if (prof_dump_close(propagate_err))
- goto label_open_close_error;
-
- malloc_mutex_unlock(&prof_dump_mtx);
-
- if (leakcheck)
- prof_leakcheck(&cnt_all, leak_nctx, filename);
+ if (prof_dump_close(propagate_err)) {
+ return true;
+ }
- return (false);
+ return false;
label_write_error:
prof_dump_close(propagate_err);
-label_open_close_error:
- while ((ctx.p = ql_first(&ctx_ql)) != NULL)
- prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
- malloc_mutex_unlock(&prof_dump_mtx);
- return (true);
+ return true;
}
-#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
-#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
-static void
-prof_dump_filename(char *filename, char v, int64_t vseq)
-{
+static bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+ bool leakcheck) {
+ cassert(config_prof);
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+
+ prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL) {
+ return true;
+ }
+
+ pre_reentrancy(tsd, NULL);
+ malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+ prof_gctx_tree_t gctxs;
+ struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+ struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+ struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+ prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+ &prof_gctx_merge_iter_arg, &gctxs);
+ bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+ &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+ &prof_gctx_dump_iter_arg, &gctxs);
+ prof_gctx_finish(tsd, &gctxs);
+
+ malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+ post_reentrancy(tsd);
+
+ if (err) {
+ return true;
+ }
+
+ if (leakcheck) {
+ prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+ prof_gctx_merge_iter_arg.leak_ngctx, filename);
+ }
+ return false;
+}
+
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+ uint64_t *accumbytes) {
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
+ struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+ struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+ prof_gctx_tree_t gctxs;
+
+ tsd = tsd_fetch();
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL) {
+ if (curobjs != NULL) {
+ *curobjs = 0;
+ }
+ if (curbytes != NULL) {
+ *curbytes = 0;
+ }
+ if (accumobjs != NULL) {
+ *accumobjs = 0;
+ }
+ if (accumbytes != NULL) {
+ *accumbytes = 0;
+ }
+ return;
+ }
+
+ prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+ &prof_gctx_merge_iter_arg, &gctxs);
+ prof_gctx_finish(tsd, &gctxs);
+
+ if (curobjs != NULL) {
+ *curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+ }
+ if (curbytes != NULL) {
+ *curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+ }
+ if (accumobjs != NULL) {
+ *accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+ }
+ if (accumbytes != NULL) {
+ *accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+ }
+}
+#endif
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
+#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
+static void
+prof_dump_filename(char *filename, char v, uint64_t vseq) {
cassert(config_prof);
if (vseq != VSEQ_INVALID) {
/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
- "%s.%d.%"PRIu64".%c%"PRId64".heap",
- opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
+ "%s.%d.%"FMTu64".%c%"FMTu64".heap",
+ opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
} else {
/* "<prefix>.<pid>.<seq>.<v>.heap" */
malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
- "%s.%d.%"PRIu64".%c.heap",
- opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
+ "%s.%d.%"FMTu64".%c.heap",
+ opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
}
prof_dump_seq++;
}
static void
-prof_fdump(void)
-{
+prof_fdump(void) {
+ tsd_t *tsd;
char filename[DUMP_FILENAME_BUFSIZE];
cassert(config_prof);
+ assert(opt_prof_final);
+ assert(opt_prof_prefix[0] != '\0');
- if (prof_booted == false)
+ if (!prof_booted) {
return;
+ }
+ tsd = tsd_fetch();
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+ prof_dump_filename(filename, 'f', VSEQ_INVALID);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+ prof_dump(tsd, false, filename, opt_prof_leak);
+}
- if (opt_prof_final && opt_prof_prefix[0] != '\0') {
- malloc_mutex_lock(&prof_dump_seq_mtx);
- prof_dump_filename(filename, 'f', VSEQ_INVALID);
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, opt_prof_leak);
+bool
+prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
+ cassert(config_prof);
+
+#ifndef JEMALLOC_ATOMIC_U64
+ if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+ WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
+ return true;
}
+ prof_accum->accumbytes = 0;
+#else
+ atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
+#endif
+ return false;
}
void
-prof_idump(void)
-{
- prof_tdata_t *prof_tdata;
- char filename[PATH_MAX + 1];
+prof_idump(tsdn_t *tsdn) {
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
cassert(config_prof);
- if (prof_booted == false)
+ if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
return;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ }
+ tsd = tsdn_tsd(tsdn);
+ if (tsd_reentrancy_level_get(tsd) > 0) {
return;
- if (prof_tdata->enq) {
- prof_tdata->enq_idump = true;
+ }
+
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL) {
+ return;
+ }
+ if (tdata->enq) {
+ tdata->enq_idump = true;
return;
}
if (opt_prof_prefix[0] != '\0') {
- malloc_mutex_lock(&prof_dump_seq_mtx);
+ char filename[PATH_MAX + 1];
+ malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
prof_dump_filename(filename, 'i', prof_dump_iseq);
prof_dump_iseq++;
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, false);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+ prof_dump(tsd, false, filename, false);
}
}
bool
-prof_mdump(const char *filename)
-{
- char filename_buf[DUMP_FILENAME_BUFSIZE];
-
+prof_mdump(tsd_t *tsd, const char *filename) {
cassert(config_prof);
+ assert(tsd_reentrancy_level_get(tsd) == 0);
- if (opt_prof == false || prof_booted == false)
- return (true);
-
+ if (!opt_prof || !prof_booted) {
+ return true;
+ }
+ char filename_buf[DUMP_FILENAME_BUFSIZE];
if (filename == NULL) {
/* No filename specified, so automatically generate one. */
- if (opt_prof_prefix[0] == '\0')
- return (true);
- malloc_mutex_lock(&prof_dump_seq_mtx);
+ if (opt_prof_prefix[0] == '\0') {
+ return true;
+ }
+ malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
prof_dump_mseq++;
- malloc_mutex_unlock(&prof_dump_seq_mtx);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
filename = filename_buf;
}
- return (prof_dump(true, filename, false));
+ return prof_dump(tsd, true, filename, false);
}
void
-prof_gdump(void)
-{
- prof_tdata_t *prof_tdata;
- char filename[DUMP_FILENAME_BUFSIZE];
+prof_gdump(tsdn_t *tsdn) {
+ tsd_t *tsd;
+ prof_tdata_t *tdata;
cassert(config_prof);
- if (prof_booted == false)
+ if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
+ return;
+ }
+ tsd = tsdn_tsd(tsdn);
+ if (tsd_reentrancy_level_get(tsd) > 0) {
return;
- prof_tdata = prof_tdata_get(false);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ }
+
+ tdata = prof_tdata_get(tsd, false);
+ if (tdata == NULL) {
return;
- if (prof_tdata->enq) {
- prof_tdata->enq_gdump = true;
+ }
+ if (tdata->enq) {
+ tdata->enq_gdump = true;
return;
}
if (opt_prof_prefix[0] != '\0') {
- malloc_mutex_lock(&prof_dump_seq_mtx);
+ char filename[DUMP_FILENAME_BUFSIZE];
+ malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
prof_dump_filename(filename, 'u', prof_dump_useq);
prof_dump_useq++;
- malloc_mutex_unlock(&prof_dump_seq_mtx);
- prof_dump(false, filename, false);
+ malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
+ prof_dump(tsd, false, filename, false);
}
}
static void
-prof_bt_hash(const void *key, size_t r_hash[2])
-{
+prof_bt_hash(const void *key, size_t r_hash[2]) {
prof_bt_t *bt = (prof_bt_t *)key;
cassert(config_prof);
@@ -1187,106 +1875,389 @@ prof_bt_hash(const void *key, size_t r_hash[2])
}
static bool
-prof_bt_keycomp(const void *k1, const void *k2)
-{
+prof_bt_keycomp(const void *k1, const void *k2) {
const prof_bt_t *bt1 = (prof_bt_t *)k1;
const prof_bt_t *bt2 = (prof_bt_t *)k2;
cassert(config_prof);
- if (bt1->len != bt2->len)
- return (false);
+ if (bt1->len != bt2->len) {
+ return false;
+ }
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
}
-prof_tdata_t *
-prof_tdata_init(void)
-{
- prof_tdata_t *prof_tdata;
+static uint64_t
+prof_thr_uid_alloc(tsdn_t *tsdn) {
+ uint64_t thr_uid;
+
+ malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
+ thr_uid = next_thr_uid;
+ next_thr_uid++;
+ malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
+
+ return thr_uid;
+}
+
+static prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+ char *thread_name, bool active) {
+ prof_tdata_t *tdata;
cassert(config_prof);
/* Initialize an empty cache for this thread. */
- prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
- if (prof_tdata == NULL)
- return (NULL);
+ tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+ sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+ arena_get(TSDN_NULL, 0, true), true);
+ if (tdata == NULL) {
+ return NULL;
+ }
- if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
- prof_bt_hash, prof_bt_keycomp)) {
- idalloc(prof_tdata);
- return (NULL);
+ tdata->lock = prof_tdata_mutex_choose(thr_uid);
+ tdata->thr_uid = thr_uid;
+ tdata->thr_discrim = thr_discrim;
+ tdata->thread_name = thread_name;
+ tdata->attached = true;
+ tdata->expired = false;
+ tdata->tctx_uid_next = 0;
+
+ if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+ prof_bt_keycomp)) {
+ idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+ return NULL;
}
- ql_new(&prof_tdata->lru_ql);
- prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
- if (prof_tdata->vec == NULL) {
- ckh_delete(&prof_tdata->bt2cnt);
- idalloc(prof_tdata);
- return (NULL);
+ tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+ prof_sample_threshold_update(tdata);
+
+ tdata->enq = false;
+ tdata->enq_idump = false;
+ tdata->enq_gdump = false;
+
+ tdata->dumping = false;
+ tdata->active = active;
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+ tdata_tree_insert(&tdatas, tdata);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+ return tdata;
+}
+
+prof_tdata_t *
+prof_tdata_init(tsd_t *tsd) {
+ return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+ NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
+}
+
+static bool
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+ if (tdata->attached && !even_if_attached) {
+ return false;
}
+ if (ckh_count(&tdata->bt2tctx) != 0) {
+ return false;
+ }
+ return true;
+}
- prof_tdata->prng_state = 0;
- prof_tdata->threshold = 0;
- prof_tdata->accum = 0;
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+ bool even_if_attached) {
+ malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+ return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
+}
+
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+ bool even_if_attached) {
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
- prof_tdata->enq = false;
- prof_tdata->enq_idump = false;
- prof_tdata->enq_gdump = false;
+ tdata_tree_remove(&tdatas, tdata);
- prof_tdata_tsd_set(&prof_tdata);
+ assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
- return (prof_tdata);
+ if (tdata->thread_name != NULL) {
+ idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+ true);
+ }
+ ckh_delete(tsd, &tdata->bt2tctx);
+ idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
}
-void
-prof_tdata_cleanup(void *arg)
-{
- prof_thr_cnt_t *cnt;
- prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
+ malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+ prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+}
- cassert(config_prof);
+static void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
+ bool destroy_tdata;
- if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
- /*
- * Another destructor deallocated memory after this destructor
- * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
- * in order to receive another callback.
- */
- prof_tdata = PROF_TDATA_STATE_PURGATORY;
- prof_tdata_tsd_set(&prof_tdata);
- } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to PROF_TDATA_STATE_PURGATORY so that other destructors
- * wouldn't cause re-creation of the prof_tdata. This time, do
- * nothing, so that the destructor will not be called again.
- */
- } else if (prof_tdata != NULL) {
+ malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+ if (tdata->attached) {
+ destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+ true);
/*
- * Delete the hash table. All of its contents can still be
- * iterated over via the LRU.
+ * Only detach if !destroy_tdata, because detaching would allow
+ * another thread to win the race to destroy tdata.
*/
- ckh_delete(&prof_tdata->bt2cnt);
- /*
- * Iteratively merge cnt's into the global stats and delete
- * them.
- */
- while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
- ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
- prof_ctx_merge(cnt->ctx, cnt);
- idalloc(cnt);
+ if (!destroy_tdata) {
+ tdata->attached = false;
}
- idalloc(prof_tdata->vec);
- idalloc(prof_tdata);
- prof_tdata = PROF_TDATA_STATE_PURGATORY;
- prof_tdata_tsd_set(&prof_tdata);
+ tsd_prof_tdata_set(tsd, NULL);
+ } else {
+ destroy_tdata = false;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+ if (destroy_tdata) {
+ prof_tdata_destroy(tsd, tdata, true);
}
}
+prof_tdata_t *
+prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
+ uint64_t thr_uid = tdata->thr_uid;
+ uint64_t thr_discrim = tdata->thr_discrim + 1;
+ char *thread_name = (tdata->thread_name != NULL) ?
+ prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
+ bool active = tdata->active;
+
+ prof_tdata_detach(tsd, tdata);
+ return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+ active);
+}
+
+static bool
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
+ bool destroy_tdata;
+
+ malloc_mutex_lock(tsdn, tdata->lock);
+ if (!tdata->expired) {
+ tdata->expired = true;
+ destroy_tdata = tdata->attached ? false :
+ prof_tdata_should_destroy(tsdn, tdata, false);
+ } else {
+ destroy_tdata = false;
+ }
+ malloc_mutex_unlock(tsdn, tdata->lock);
+
+ return destroy_tdata;
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+ void *arg) {
+ tsdn_t *tsdn = (tsdn_t *)arg;
+
+ return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
+}
+
void
-prof_boot0(void)
-{
+prof_reset(tsd_t *tsd, size_t lg_sample) {
+ prof_tdata_t *next;
+
+ assert(lg_sample < (sizeof(uint64_t) << 3));
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+ malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+
+ lg_prof_sample = lg_sample;
+
+ next = NULL;
+ do {
+ prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+ prof_tdata_reset_iter, (void *)tsd);
+ if (to_destroy != NULL) {
+ next = tdata_tree_next(&tdatas, to_destroy);
+ prof_tdata_destroy_locked(tsd, to_destroy, false);
+ } else {
+ next = NULL;
+ }
+ } while (next != NULL);
+
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+}
+
+void
+prof_tdata_cleanup(tsd_t *tsd) {
+ prof_tdata_t *tdata;
+
+ if (!config_prof) {
+ return;
+ }
+
+ tdata = tsd_prof_tdata_get(tsd);
+ if (tdata != NULL) {
+ prof_tdata_detach(tsd, tdata);
+ }
+}
+
+bool
+prof_active_get(tsdn_t *tsdn) {
+ bool prof_active_current;
+
+ malloc_mutex_lock(tsdn, &prof_active_mtx);
+ prof_active_current = prof_active;
+ malloc_mutex_unlock(tsdn, &prof_active_mtx);
+ return prof_active_current;
+}
+
+bool
+prof_active_set(tsdn_t *tsdn, bool active) {
+ bool prof_active_old;
+
+ malloc_mutex_lock(tsdn, &prof_active_mtx);
+ prof_active_old = prof_active;
+ prof_active = active;
+ malloc_mutex_unlock(tsdn, &prof_active_mtx);
+ return prof_active_old;
+}
+
+const char *
+prof_thread_name_get(tsd_t *tsd) {
+ prof_tdata_t *tdata;
+
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL) {
+ return "";
+ }
+ return (tdata->thread_name != NULL ? tdata->thread_name : "");
+}
+
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+ char *ret;
+ size_t size;
+
+ if (thread_name == NULL) {
+ return NULL;
+ }
+
+ size = strlen(thread_name) + 1;
+ if (size == 1) {
+ return "";
+ }
+
+ ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+ arena_get(TSDN_NULL, 0, true), true);
+ if (ret == NULL) {
+ return NULL;
+ }
+ memcpy(ret, thread_name, size);
+ return ret;
+}
+
+int
+prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
+ prof_tdata_t *tdata;
+ unsigned i;
+ char *s;
+
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL) {
+ return EAGAIN;
+ }
+
+ /* Validate input. */
+ if (thread_name == NULL) {
+ return EFAULT;
+ }
+ for (i = 0; thread_name[i] != '\0'; i++) {
+ char c = thread_name[i];
+ if (!isgraph(c) && !isblank(c)) {
+ return EFAULT;
+ }
+ }
+
+ s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+ if (s == NULL) {
+ return EAGAIN;
+ }
+
+ if (tdata->thread_name != NULL) {
+ idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+ true);
+ tdata->thread_name = NULL;
+ }
+ if (strlen(s) > 0) {
+ tdata->thread_name = s;
+ }
+ return 0;
+}
+
+bool
+prof_thread_active_get(tsd_t *tsd) {
+ prof_tdata_t *tdata;
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL) {
+ return false;
+ }
+ return tdata->active;
+}
+
+bool
+prof_thread_active_set(tsd_t *tsd, bool active) {
+ prof_tdata_t *tdata;
+
+ tdata = prof_tdata_get(tsd, true);
+ if (tdata == NULL) {
+ return true;
+ }
+ tdata->active = active;
+ return false;
+}
+
+bool
+prof_thread_active_init_get(tsdn_t *tsdn) {
+ bool active_init;
+
+ malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
+ active_init = prof_thread_active_init;
+ malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
+ return active_init;
+}
+
+bool
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
+ bool active_init_old;
+
+ malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
+ active_init_old = prof_thread_active_init;
+ prof_thread_active_init = active_init;
+ malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
+ return active_init_old;
+}
+
+bool
+prof_gdump_get(tsdn_t *tsdn) {
+ bool prof_gdump_current;
+
+ malloc_mutex_lock(tsdn, &prof_gdump_mtx);
+ prof_gdump_current = prof_gdump_val;
+ malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
+ return prof_gdump_current;
+}
+
+bool
+prof_gdump_set(tsdn_t *tsdn, bool gdump) {
+ bool prof_gdump_old;
+
+ malloc_mutex_lock(tsdn, &prof_gdump_mtx);
+ prof_gdump_old = prof_gdump_val;
+ prof_gdump_val = gdump;
+ malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
+ return prof_gdump_old;
+}
+
+void
+prof_boot0(void) {
cassert(config_prof);
memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
@@ -1294,17 +2265,15 @@ prof_boot0(void)
}
void
-prof_boot1(void)
-{
-
+prof_boot1(void) {
cassert(config_prof);
/*
- * opt_prof and prof_promote must be in their final state before any
- * arenas are initialized, so this function must be executed early.
+ * opt_prof must be in its final state before any arenas are
+ * initialized, so this function must be executed early.
*/
- if (opt_prof_leak && opt_prof == false) {
+ if (opt_prof_leak && !opt_prof) {
/*
* Enable opt_prof, but in such a way that profiles are never
* automatically dumped.
@@ -1317,48 +2286,101 @@ prof_boot1(void)
opt_lg_prof_interval);
}
}
-
- prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
}
bool
-prof_boot2(void)
-{
-
+prof_boot2(tsd_t *tsd) {
cassert(config_prof);
if (opt_prof) {
unsigned i;
- if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
- prof_bt_keycomp))
- return (true);
- if (malloc_mutex_init(&bt2ctx_mtx))
- return (true);
- if (prof_tdata_tsd_boot()) {
- malloc_write(
- "<jemalloc>: Error in pthread_key_create()\n");
- abort();
+ lg_prof_sample = opt_lg_prof_sample;
+
+ prof_active = opt_prof_active;
+ if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+ WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ prof_gdump_val = opt_prof_gdump;
+ if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+ WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+ return true;
}
- if (malloc_mutex_init(&prof_dump_seq_mtx))
- return (true);
- if (malloc_mutex_init(&prof_dump_mtx))
- return (true);
+ prof_thread_active_init = opt_prof_thread_active_init;
+ if (malloc_mutex_init(&prof_thread_active_init_mtx,
+ "prof_thread_active_init",
+ WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
- if (atexit(prof_fdump) != 0) {
+ if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+ prof_bt_keycomp)) {
+ return true;
+ }
+ if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+ WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ tdata_tree_new(&tdatas);
+ if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+ WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ next_thr_uid = 0;
+ if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+ WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
+ WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+ if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+ WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
+ atexit(prof_fdump) != 0) {
malloc_write("<jemalloc>: Error in atexit()\n");
- if (opt_abort)
+ if (opt_abort) {
abort();
+ }
}
- ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
- sizeof(malloc_mutex_t));
- if (ctx_locks == NULL)
- return (true);
+ gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+ b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
+ CACHELINE);
+ if (gctx_locks == NULL) {
+ return true;
+ }
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
- if (malloc_mutex_init(&ctx_locks[i]))
- return (true);
+ if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
+ WITNESS_RANK_PROF_GCTX,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+ }
+
+ tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+ b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
+ CACHELINE);
+ if (tdata_locks == NULL) {
+ return true;
+ }
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+ if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
+ WITNESS_RANK_PROF_TDATA,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
}
}
@@ -1372,48 +2394,79 @@ prof_boot2(void)
prof_booted = true;
- return (false);
+ return false;
}
void
-prof_prefork(void)
-{
-
- if (opt_prof) {
+prof_prefork0(tsdn_t *tsdn) {
+ if (config_prof && opt_prof) {
unsigned i;
- malloc_mutex_prefork(&bt2ctx_mtx);
- malloc_mutex_prefork(&prof_dump_seq_mtx);
- for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_prefork(&ctx_locks[i]);
+ malloc_mutex_prefork(tsdn, &prof_dump_mtx);
+ malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
+ malloc_mutex_prefork(tsdn, &tdatas_mtx);
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+ malloc_mutex_prefork(tsdn, &tdata_locks[i]);
+ }
+ for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+ malloc_mutex_prefork(tsdn, &gctx_locks[i]);
+ }
}
}
void
-prof_postfork_parent(void)
-{
+prof_prefork1(tsdn_t *tsdn) {
+ if (config_prof && opt_prof) {
+ malloc_mutex_prefork(tsdn, &prof_active_mtx);
+ malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+ malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+ malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
+ malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
+ }
+}
- if (opt_prof) {
+void
+prof_postfork_parent(tsdn_t *tsdn) {
+ if (config_prof && opt_prof) {
unsigned i;
- for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_postfork_parent(&ctx_locks[i]);
- malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
- malloc_mutex_postfork_parent(&bt2ctx_mtx);
+ malloc_mutex_postfork_parent(tsdn,
+ &prof_thread_active_init_mtx);
+ malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+ malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
+ malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+ malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
+ for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+ malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
+ }
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+ malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+ }
+ malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
+ malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
+ malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
}
}
void
-prof_postfork_child(void)
-{
-
- if (opt_prof) {
+prof_postfork_child(tsdn_t *tsdn) {
+ if (config_prof && opt_prof) {
unsigned i;
- for (i = 0; i < PROF_NCTX_LOCKS; i++)
- malloc_mutex_postfork_child(&ctx_locks[i]);
- malloc_mutex_postfork_child(&prof_dump_seq_mtx);
- malloc_mutex_postfork_child(&bt2ctx_mtx);
+ malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
+ malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+ malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
+ malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+ malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
+ for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+ malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
+ }
+ for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+ malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+ }
+ malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
+ malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
+ malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
}
}
diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c
deleted file mode 100644
index 543151164..000000000
--- a/deps/jemalloc/src/quarantine.c
+++ /dev/null
@@ -1,199 +0,0 @@
-#define JEMALLOC_QUARANTINE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/*
- * quarantine pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define QUARANTINE_STATE_REINCARNATED ((quarantine_t *)(uintptr_t)1)
-#define QUARANTINE_STATE_PURGATORY ((quarantine_t *)(uintptr_t)2)
-#define QUARANTINE_STATE_MAX QUARANTINE_STATE_PURGATORY
-
-/******************************************************************************/
-/* Data. */
-
-malloc_tsd_data(, quarantine, quarantine_t *, NULL)
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static quarantine_t *quarantine_grow(quarantine_t *quarantine);
-static void quarantine_drain_one(quarantine_t *quarantine);
-static void quarantine_drain(quarantine_t *quarantine, size_t upper_bound);
-
-/******************************************************************************/
-
-quarantine_t *
-quarantine_init(size_t lg_maxobjs)
-{
- quarantine_t *quarantine;
-
- quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) +
- ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)));
- if (quarantine == NULL)
- return (NULL);
- quarantine->curbytes = 0;
- quarantine->curobjs = 0;
- quarantine->first = 0;
- quarantine->lg_maxobjs = lg_maxobjs;
-
- quarantine_tsd_set(&quarantine);
-
- return (quarantine);
-}
-
-static quarantine_t *
-quarantine_grow(quarantine_t *quarantine)
-{
- quarantine_t *ret;
-
- ret = quarantine_init(quarantine->lg_maxobjs + 1);
- if (ret == NULL) {
- quarantine_drain_one(quarantine);
- return (quarantine);
- }
-
- ret->curbytes = quarantine->curbytes;
- ret->curobjs = quarantine->curobjs;
- if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
- quarantine->lg_maxobjs)) {
- /* objs ring buffer data are contiguous. */
- memcpy(ret->objs, &quarantine->objs[quarantine->first],
- quarantine->curobjs * sizeof(quarantine_obj_t));
- } else {
- /* objs ring buffer data wrap around. */
- size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) -
- quarantine->first;
- size_t ncopy_b = quarantine->curobjs - ncopy_a;
-
- memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
- * sizeof(quarantine_obj_t));
- memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
- sizeof(quarantine_obj_t));
- }
- idalloc(quarantine);
-
- return (ret);
-}
-
-static void
-quarantine_drain_one(quarantine_t *quarantine)
-{
- quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
- assert(obj->usize == isalloc(obj->ptr, config_prof));
- idalloc(obj->ptr);
- quarantine->curbytes -= obj->usize;
- quarantine->curobjs--;
- quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
- quarantine->lg_maxobjs) - 1);
-}
-
-static void
-quarantine_drain(quarantine_t *quarantine, size_t upper_bound)
-{
-
- while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
- quarantine_drain_one(quarantine);
-}
-
-void
-quarantine(void *ptr)
-{
- quarantine_t *quarantine;
- size_t usize = isalloc(ptr, config_prof);
-
- cassert(config_fill);
- assert(opt_quarantine);
-
- quarantine = *quarantine_tsd_get();
- if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) {
- if (quarantine == QUARANTINE_STATE_PURGATORY) {
- /*
- * Make a note that quarantine() was called after
- * quarantine_cleanup() was called.
- */
- quarantine = QUARANTINE_STATE_REINCARNATED;
- quarantine_tsd_set(&quarantine);
- }
- idalloc(ptr);
- return;
- }
- /*
- * Drain one or more objects if the quarantine size limit would be
- * exceeded by appending ptr.
- */
- if (quarantine->curbytes + usize > opt_quarantine) {
- size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
- - usize : 0;
- quarantine_drain(quarantine, upper_bound);
- }
- /* Grow the quarantine ring buffer if it's full. */
- if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
- quarantine = quarantine_grow(quarantine);
- /* quarantine_grow() must free a slot if it fails to grow. */
- assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
- /* Append ptr if its size doesn't exceed the quarantine size. */
- if (quarantine->curbytes + usize <= opt_quarantine) {
- size_t offset = (quarantine->first + quarantine->curobjs) &
- ((ZU(1) << quarantine->lg_maxobjs) - 1);
- quarantine_obj_t *obj = &quarantine->objs[offset];
- obj->ptr = ptr;
- obj->usize = usize;
- quarantine->curbytes += usize;
- quarantine->curobjs++;
- if (config_fill && opt_junk) {
- /*
- * Only do redzone validation if Valgrind isn't in
- * operation.
- */
- if ((config_valgrind == false || opt_valgrind == false)
- && usize <= SMALL_MAXCLASS)
- arena_quarantine_junk_small(ptr, usize);
- else
- memset(ptr, 0x5a, usize);
- }
- } else {
- assert(quarantine->curbytes == 0);
- idalloc(ptr);
- }
-}
-
-void
-quarantine_cleanup(void *arg)
-{
- quarantine_t *quarantine = *(quarantine_t **)arg;
-
- if (quarantine == QUARANTINE_STATE_REINCARNATED) {
- /*
- * Another destructor deallocated memory after this destructor
- * was called. Reset quarantine to QUARANTINE_STATE_PURGATORY
- * in order to receive another callback.
- */
- quarantine = QUARANTINE_STATE_PURGATORY;
- quarantine_tsd_set(&quarantine);
- } else if (quarantine == QUARANTINE_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to QUARANTINE_STATE_PURGATORY so that other destructors
- * wouldn't cause re-creation of the quarantine. This time, do
- * nothing, so that the destructor will not be called again.
- */
- } else if (quarantine != NULL) {
- quarantine_drain(quarantine, 0);
- idalloc(quarantine);
- quarantine = QUARANTINE_STATE_PURGATORY;
- quarantine_tsd_set(&quarantine);
- }
-}
-
-bool
-quarantine_boot(void)
-{
-
- cassert(config_fill);
-
- if (quarantine_tsd_boot())
- return (true);
-
- return (false);
-}
diff --git a/deps/jemalloc/src/rtree.c b/deps/jemalloc/src/rtree.c
index 205957ac4..53702cf72 100644
--- a/deps/jemalloc/src/rtree.c
+++ b/deps/jemalloc/src/rtree.c
@@ -1,105 +1,320 @@
-#define JEMALLOC_RTREE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-rtree_t *
-rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
-{
- rtree_t *ret;
- unsigned bits_per_level, bits_in_leaf, height, i;
-
- assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
-
- bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
- bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1;
- if (bits > bits_in_leaf) {
- height = 1 + (bits - bits_in_leaf) / bits_per_level;
- if ((height-1) * bits_per_level + bits_in_leaf != bits)
- height++;
- } else {
- height = 1;
- }
- assert((height-1) * bits_per_level + bits_in_leaf >= bits);
-
- ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
- (sizeof(unsigned) * height));
- if (ret == NULL)
- return (NULL);
- memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
- height));
-
- ret->alloc = alloc;
- ret->dalloc = dalloc;
- if (malloc_mutex_init(&ret->mutex)) {
- if (dalloc != NULL)
- dalloc(ret);
- return (NULL);
- }
- ret->height = height;
- if (height > 1) {
- if ((height-1) * bits_per_level + bits_in_leaf > bits) {
- ret->level2bits[0] = (bits - bits_in_leaf) %
- bits_per_level;
- } else
- ret->level2bits[0] = bits_per_level;
- for (i = 1; i < height-1; i++)
- ret->level2bits[i] = bits_per_level;
- ret->level2bits[height-1] = bits_in_leaf;
- } else
- ret->level2bits[0] = bits;
-
- ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
- if (ret->root == NULL) {
- if (dalloc != NULL)
- dalloc(ret);
- return (NULL);
- }
- memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
-
- return (ret);
+#define JEMALLOC_RTREE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
+
+/*
+ * Only the most significant bits of keys passed to rtree_{read,write}() are
+ * used.
+ */
+bool
+rtree_new(rtree_t *rtree, bool zeroed) {
+#ifdef JEMALLOC_JET
+ if (!zeroed) {
+ memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
+ }
+#else
+ assert(zeroed);
+#endif
+
+ if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ return false;
+}
+
+static rtree_node_elm_t *
+rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+ return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
+ sizeof(rtree_node_elm_t), CACHELINE);
}
+rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
static void
-rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
-{
+rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
+ /* Nodes are never deleted during normal operation. */
+ not_reached();
+}
+UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
+ rtree_node_dalloc_impl;
+
+static rtree_leaf_elm_t *
+rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+ return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
+ sizeof(rtree_leaf_elm_t), CACHELINE);
+}
+rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;
- if (level < rtree->height - 1) {
- size_t nchildren, i;
+static void
+rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
+ /* Leaves are never deleted during normal operation. */
+ not_reached();
+}
+UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
+ rtree_leaf_dalloc_impl;
- nchildren = ZU(1) << rtree->level2bits[level];
- for (i = 0; i < nchildren; i++) {
- void **child = (void **)node[i];
- if (child != NULL)
- rtree_delete_subtree(rtree, child, level + 1);
+#ifdef JEMALLOC_JET
+# if RTREE_HEIGHT > 1
+static void
+rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
+ unsigned level) {
+ size_t nchildren = ZU(1) << rtree_levels[level].bits;
+ if (level + 2 < RTREE_HEIGHT) {
+ for (size_t i = 0; i < nchildren; i++) {
+ rtree_node_elm_t *node =
+ (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
+ ATOMIC_RELAXED);
+ if (node != NULL) {
+ rtree_delete_subtree(tsdn, rtree, node, level +
+ 1);
+ }
+ }
+ } else {
+ for (size_t i = 0; i < nchildren; i++) {
+ rtree_leaf_elm_t *leaf =
+ (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
+ ATOMIC_RELAXED);
+ if (leaf != NULL) {
+ rtree_leaf_dalloc(tsdn, rtree, leaf);
+ }
}
}
- rtree->dalloc(node);
+
+ if (subtree != rtree->root) {
+ rtree_node_dalloc(tsdn, rtree, subtree);
+ }
}
+# endif
void
-rtree_delete(rtree_t *rtree)
-{
+rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
+# if RTREE_HEIGHT > 1
+ rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
+# endif
+}
+#endif
- rtree_delete_subtree(rtree, rtree->root, 0);
- rtree->dalloc(rtree);
+static rtree_node_elm_t *
+rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
+ atomic_p_t *elmp) {
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
+ /*
+ * If *elmp is non-null, then it was initialized with the init lock
+ * held, so we can get by with 'relaxed' here.
+ */
+ rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
+ if (node == NULL) {
+ node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
+ rtree_levels[level].bits);
+ if (node == NULL) {
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
+ return NULL;
+ }
+ /*
+ * Even though we hold the lock, a later reader might not; we
+ * need release semantics.
+ */
+ atomic_store_p(elmp, node, ATOMIC_RELEASE);
+ }
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
+
+ return node;
}
-void
-rtree_prefork(rtree_t *rtree)
-{
+static rtree_leaf_elm_t *
+rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
+ /*
+ * If *elmp is non-null, then it was initialized with the init lock
+ * held, so we can get by with 'relaxed' here.
+ */
+ rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
+ if (leaf == NULL) {
+ leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
+ rtree_levels[RTREE_HEIGHT-1].bits);
+ if (leaf == NULL) {
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
+ return NULL;
+ }
+ /*
+ * Even though we hold the lock, a later reader might not; we
+ * need release semantics.
+ */
+ atomic_store_p(elmp, leaf, ATOMIC_RELEASE);
+ }
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
- malloc_mutex_prefork(&rtree->mutex);
+ return leaf;
}
-void
-rtree_postfork_parent(rtree_t *rtree)
-{
+static bool
+rtree_node_valid(rtree_node_elm_t *node) {
+ return ((uintptr_t)node != (uintptr_t)0);
+}
- malloc_mutex_postfork_parent(&rtree->mutex);
+static bool
+rtree_leaf_valid(rtree_leaf_elm_t *leaf) {
+ return ((uintptr_t)leaf != (uintptr_t)0);
}
-void
-rtree_postfork_child(rtree_t *rtree)
-{
+static rtree_node_elm_t *
+rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
+ rtree_node_elm_t *node;
+
+ if (dependent) {
+ node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
+ ATOMIC_RELAXED);
+ } else {
+ node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
+ ATOMIC_ACQUIRE);
+ }
- malloc_mutex_postfork_child(&rtree->mutex);
+ assert(!dependent || node != NULL);
+ return node;
+}
+
+static rtree_node_elm_t *
+rtree_child_node_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+ unsigned level, bool dependent) {
+ rtree_node_elm_t *node;
+
+ node = rtree_child_node_tryread(elm, dependent);
+ if (!dependent && unlikely(!rtree_node_valid(node))) {
+ node = rtree_node_init(tsdn, rtree, level + 1, &elm->child);
+ }
+ assert(!dependent || node != NULL);
+ return node;
+}
+
+static rtree_leaf_elm_t *
+rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
+ rtree_leaf_elm_t *leaf;
+
+ if (dependent) {
+ leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
+ ATOMIC_RELAXED);
+ } else {
+ leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
+ ATOMIC_ACQUIRE);
+ }
+
+ assert(!dependent || leaf != NULL);
+ return leaf;
+}
+
+static rtree_leaf_elm_t *
+rtree_child_leaf_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+ unsigned level, bool dependent) {
+ rtree_leaf_elm_t *leaf;
+
+ leaf = rtree_child_leaf_tryread(elm, dependent);
+ if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
+ leaf = rtree_leaf_init(tsdn, rtree, &elm->child);
+ }
+ assert(!dependent || leaf != NULL);
+ return leaf;
+}
+
+rtree_leaf_elm_t *
+rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+ uintptr_t key, bool dependent, bool init_missing) {
+ rtree_node_elm_t *node;
+ rtree_leaf_elm_t *leaf;
+#if RTREE_HEIGHT > 1
+ node = rtree->root;
+#else
+ leaf = rtree->root;
+#endif
+
+ if (config_debug) {
+ uintptr_t leafkey = rtree_leafkey(key);
+ for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+ assert(rtree_ctx->cache[i].leafkey != leafkey);
+ }
+ for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+ assert(rtree_ctx->l2_cache[i].leafkey != leafkey);
+ }
+ }
+
+#define RTREE_GET_CHILD(level) { \
+ assert(level < RTREE_HEIGHT-1); \
+ if (level != 0 && !dependent && \
+ unlikely(!rtree_node_valid(node))) { \
+ return NULL; \
+ } \
+ uintptr_t subkey = rtree_subkey(key, level); \
+ if (level + 2 < RTREE_HEIGHT) { \
+ node = init_missing ? \
+ rtree_child_node_read(tsdn, rtree, \
+ &node[subkey], level, dependent) : \
+ rtree_child_node_tryread(&node[subkey], \
+ dependent); \
+ } else { \
+ leaf = init_missing ? \
+ rtree_child_leaf_read(tsdn, rtree, \
+ &node[subkey], level, dependent) : \
+ rtree_child_leaf_tryread(&node[subkey], \
+ dependent); \
+ } \
+ }
+ /*
+ * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
+ * (1) evict last entry in L2 cache; (2) move the collision slot from L1
+ * cache down to L2; and 3) fill L1.
+ */
+#define RTREE_GET_LEAF(level) { \
+ assert(level == RTREE_HEIGHT-1); \
+ if (!dependent && unlikely(!rtree_leaf_valid(leaf))) { \
+ return NULL; \
+ } \
+ if (RTREE_CTX_NCACHE_L2 > 1) { \
+ memmove(&rtree_ctx->l2_cache[1], \
+ &rtree_ctx->l2_cache[0], \
+ sizeof(rtree_ctx_cache_elm_t) * \
+ (RTREE_CTX_NCACHE_L2 - 1)); \
+ } \
+ size_t slot = rtree_cache_direct_map(key); \
+ rtree_ctx->l2_cache[0].leafkey = \
+ rtree_ctx->cache[slot].leafkey; \
+ rtree_ctx->l2_cache[0].leaf = \
+ rtree_ctx->cache[slot].leaf; \
+ uintptr_t leafkey = rtree_leafkey(key); \
+ rtree_ctx->cache[slot].leafkey = leafkey; \
+ rtree_ctx->cache[slot].leaf = leaf; \
+ uintptr_t subkey = rtree_subkey(key, level); \
+ return &leaf[subkey]; \
+ }
+ if (RTREE_HEIGHT > 1) {
+ RTREE_GET_CHILD(0)
+ }
+ if (RTREE_HEIGHT > 2) {
+ RTREE_GET_CHILD(1)
+ }
+ if (RTREE_HEIGHT > 3) {
+ for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+ RTREE_GET_CHILD(i)
+ }
+ }
+ RTREE_GET_LEAF(RTREE_HEIGHT-1)
+#undef RTREE_GET_CHILD
+#undef RTREE_GET_LEAF
+ not_reached();
+}
+
+void
+rtree_ctx_data_init(rtree_ctx_t *ctx) {
+ for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+ rtree_ctx_cache_elm_t *cache = &ctx->cache[i];
+ cache->leafkey = RTREE_LEAFKEY_INVALID;
+ cache->leaf = NULL;
+ }
+ for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+ rtree_ctx_cache_elm_t *cache = &ctx->l2_cache[i];
+ cache->leafkey = RTREE_LEAFKEY_INVALID;
+ cache->leaf = NULL;
+ }
}
diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c
index bef2ab33c..08b9507cf 100644
--- a/deps/jemalloc/src/stats.c
+++ b/deps/jemalloc/src/stats.c
@@ -1,282 +1,1235 @@
-#define JEMALLOC_STATS_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_STATS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
-#define CTL_GET(n, v, t) do { \
- size_t sz = sizeof(t); \
- xmallctl(n, v, &sz, NULL, 0); \
-} while (0)
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
-#define CTL_I_GET(n, v, t) do { \
- size_t mib[6]; \
- size_t miblen = sizeof(mib) / sizeof(size_t); \
+const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
+#define OP(mtx) #mtx,
+ MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+};
+
+const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
+#define OP(mtx) #mtx,
+ MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+};
+
+#define CTL_GET(n, v, t) do { \
size_t sz = sizeof(t); \
- xmallctlnametomib(n, mib, &miblen); \
- mib[2] = i; \
- xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+ xmallctl(n, (void *)v, &sz, NULL, 0); \
} while (0)
-#define CTL_J_GET(n, v, t) do { \
- size_t mib[6]; \
+#define CTL_M2_GET(n, i, v, t) do { \
+ size_t mib[CTL_MAX_DEPTH]; \
size_t miblen = sizeof(mib) / sizeof(size_t); \
size_t sz = sizeof(t); \
xmallctlnametomib(n, mib, &miblen); \
- mib[2] = j; \
- xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+ mib[2] = (i); \
+ xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \
} while (0)
-#define CTL_IJ_GET(n, v, t) do { \
- size_t mib[6]; \
+#define CTL_M2_M4_GET(n, i, j, v, t) do { \
+ size_t mib[CTL_MAX_DEPTH]; \
size_t miblen = sizeof(mib) / sizeof(size_t); \
size_t sz = sizeof(t); \
xmallctlnametomib(n, mib, &miblen); \
- mib[2] = i; \
- mib[4] = j; \
- xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \
+ mib[2] = (i); \
+ mib[4] = (j); \
+ xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0); \
} while (0)
/******************************************************************************/
/* Data. */
-bool opt_stats_print = false;
-
-size_t stats_cactive = 0;
+bool opt_stats_print = false;
+char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-static void stats_arena_bins_print(void (*write_cb)(void *, const char *),
- void *cbopaque, unsigned i);
-static void stats_arena_lruns_print(void (*write_cb)(void *, const char *),
- void *cbopaque, unsigned i);
-static void stats_arena_print(void (*write_cb)(void *, const char *),
- void *cbopaque, unsigned i, bool bins, bool large);
+/* Calculate x.yyy and output a string (takes a fixed sized char array). */
+static bool
+get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
+ if (divisor == 0 || dividend > divisor) {
+ /* The rate is not supposed to be greater than 1. */
+ return true;
+ }
+ if (dividend > 0) {
+ assert(UINT64_MAX / dividend >= 1000);
+ }
-/******************************************************************************/
+ unsigned n = (unsigned)((dividend * 1000) / divisor);
+ if (n < 10) {
+ malloc_snprintf(str, 6, "0.00%u", n);
+ } else if (n < 100) {
+ malloc_snprintf(str, 6, "0.0%u", n);
+ } else if (n < 1000) {
+ malloc_snprintf(str, 6, "0.%u", n);
+ } else {
+ malloc_snprintf(str, 6, "1");
+ }
+
+ return false;
+}
+
+#define MUTEX_CTL_STR_MAX_LENGTH 128
+static void
+gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
+ const char *mutex, const char *counter) {
+ malloc_snprintf(str, buf_len, "stats.%s.%s.%s", prefix, mutex, counter);
+}
+
+static void
+mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
+ emitter_col_t *name,
+ emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+ emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+ mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+ mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
+
+ emitter_col_t *col;
+
+ if (name != NULL) {
+ emitter_col_init(name, row);
+ name->justify = emitter_justify_left;
+ name->width = 21;
+ name->type = emitter_type_title;
+ name->str_val = table_name;
+ }
+
+#define WIDTH_uint32_t 12
+#define WIDTH_uint64_t 16
+#define OP(counter, counter_type, human) \
+ col = &col_##counter_type[k_##counter_type]; \
+ ++k_##counter_type; \
+ emitter_col_init(col, row); \
+ col->justify = emitter_justify_right; \
+ col->width = WIDTH_##counter_type; \
+ col->type = emitter_type_title; \
+ col->str_val = human;
+ MUTEX_PROF_COUNTERS
+#undef OP
+#undef WIDTH_uint32_t
+#undef WIDTH_uint64_t
+}
+
+static void
+mutex_stats_read_global(const char *name, emitter_col_t *col_name,
+ emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+ emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+ char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+
+ col_name->str_val = name;
+
+ emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human) \
+ dst = &col_##counter_type[mutex_counter_##counter]; \
+ dst->type = EMITTER_TYPE_##counter_type; \
+ gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \
+ "mutexes", name, #counter); \
+ CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);
+ MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+static void
+mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
+ const char *name, emitter_col_t *col_name,
+ emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+ emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+ char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+
+ col_name->str_val = name;
+
+ emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human) \
+ dst = &col_##counter_type[mutex_counter_##counter]; \
+ dst->type = EMITTER_TYPE_##counter_type; \
+ gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \
+ "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\
+ CTL_M2_GET(cmd, arena_ind, \
+ (counter_type *)&dst->bool_val, counter_type);
+ MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+static void
+mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
+ emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+ emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+ char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+ emitter_col_t *dst;
+
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human) \
+ dst = &col_##counter_type[mutex_counter_##counter]; \
+ dst->type = EMITTER_TYPE_##counter_type; \
+ gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \
+ "arenas.0.bins.0","mutex", #counter); \
+ CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \
+ (counter_type *)&dst->bool_val, counter_type);
+ MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+/* "row" can be NULL to avoid emitting in table mode. */
+static void
+mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
+ emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+ emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+ if (row != NULL) {
+ emitter_table_row(emitter, row);
+ }
+
+ mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+ mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
+
+ emitter_col_t *col;
+
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, type, human) \
+ col = &col_##type[k_##type]; \
+ ++k_##type; \
+ emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type, \
+ (const void *)&col->bool_val);
+ MUTEX_PROF_COUNTERS;
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
static void
-stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
- unsigned i)
-{
+stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
size_t page;
- bool config_tcache;
- unsigned nbins, j, gap_start;
+ bool in_gap, in_gap_prev;
+ unsigned nbins, j;
CTL_GET("arenas.page", &page, size_t);
- CTL_GET("config.tcache", &config_tcache, bool);
- if (config_tcache) {
- malloc_cprintf(write_cb, cbopaque,
- "bins: bin size regs pgs allocated nmalloc"
- " ndalloc nrequests nfills nflushes"
- " newruns reruns curruns\n");
- } else {
- malloc_cprintf(write_cb, cbopaque,
- "bins: bin size regs pgs allocated nmalloc"
- " ndalloc newruns reruns curruns\n");
- }
CTL_GET("arenas.nbins", &nbins, unsigned);
- for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
- uint64_t nruns;
-
- CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
- if (nruns == 0) {
- if (gap_start == UINT_MAX)
- gap_start = j;
- } else {
- size_t reg_size, run_size, allocated;
- uint32_t nregs;
- uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
- uint64_t reruns;
- size_t curruns;
-
- if (gap_start != UINT_MAX) {
- if (j > gap_start + 1) {
- /* Gap of more than one size class. */
- malloc_cprintf(write_cb, cbopaque,
- "[%u..%u]\n", gap_start,
- j - 1);
- } else {
- /* Gap of one size class. */
- malloc_cprintf(write_cb, cbopaque,
- "[%u]\n", gap_start);
- }
- gap_start = UINT_MAX;
- }
- CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
- CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
- CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
- &allocated, size_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
- &nmalloc, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
- &ndalloc, uint64_t);
- if (config_tcache) {
- CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
- &nrequests, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
- &nfills, uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
- &nflushes, uint64_t);
- }
- CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
- uint64_t);
- CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
- size_t);
- if (config_tcache) {
- malloc_cprintf(write_cb, cbopaque,
- "%13u %5zu %4u %3zu %12zu %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- j, reg_size, nregs, run_size / page,
- allocated, nmalloc, ndalloc, nrequests,
- nfills, nflushes, nruns, reruns, curruns);
+
+ emitter_row_t header_row;
+ emitter_row_init(&header_row);
+
+ emitter_row_t row;
+ emitter_row_init(&row);
+#define COL(name, left_or_right, col_width, etype) \
+ emitter_col_t col_##name; \
+ emitter_col_init(&col_##name, &row); \
+ col_##name.justify = emitter_justify_##left_or_right; \
+ col_##name.width = col_width; \
+ col_##name.type = emitter_type_##etype; \
+ emitter_col_t header_col_##name; \
+ emitter_col_init(&header_col_##name, &header_row); \
+ header_col_##name.justify = emitter_justify_##left_or_right; \
+ header_col_##name.width = col_width; \
+ header_col_##name.type = emitter_type_title; \
+ header_col_##name.str_val = #name;
+
+ COL(size, right, 20, size)
+ COL(ind, right, 4, unsigned)
+ COL(allocated, right, 13, uint64)
+ COL(nmalloc, right, 13, uint64)
+ COL(ndalloc, right, 13, uint64)
+ COL(nrequests, right, 13, uint64)
+ COL(curregs, right, 13, size)
+ COL(curslabs, right, 13, size)
+ COL(regs, right, 5, unsigned)
+ COL(pgs, right, 4, size)
+ /* To buffer a right- and left-justified column. */
+ COL(justify_spacer, right, 1, title)
+ COL(util, right, 6, title)
+ COL(nfills, right, 13, uint64)
+ COL(nflushes, right, 13, uint64)
+ COL(nslabs, right, 13, uint64)
+ COL(nreslabs, right, 13, uint64)
+#undef COL
+
+ /* Don't want to actually print the name. */
+ header_col_justify_spacer.str_val = " ";
+ col_justify_spacer.str_val = " ";
+
+
+ emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters];
+ emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters];
+
+ emitter_col_t header_mutex64[mutex_prof_num_uint64_t_counters];
+ emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters];
+
+ if (mutex) {
+ mutex_stats_init_cols(&row, NULL, NULL, col_mutex64,
+ col_mutex32);
+ mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64,
+ header_mutex32);
+ }
+
+ /*
+ * We print a "bins:" header as part of the table row; we need to adjust
+ * the header size column to compensate.
+ */
+ header_col_size.width -=5;
+ emitter_table_printf(emitter, "bins:");
+ emitter_table_row(emitter, &header_row);
+ emitter_json_arr_begin(emitter, "bins");
+
+ for (j = 0, in_gap = false; j < nbins; j++) {
+ uint64_t nslabs;
+ size_t reg_size, slab_size, curregs;
+ size_t curslabs;
+ uint32_t nregs;
+ uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+ uint64_t nreslabs;
+
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
+ uint64_t);
+ in_gap_prev = in_gap;
+ in_gap = (nslabs == 0);
+
+ if (in_gap_prev && !in_gap) {
+ emitter_table_printf(emitter,
+ " ---\n");
+ }
+
+ CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+ CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+ CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t);
+
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+ uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+ uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
+ size_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+ &nrequests, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, &nfills,
+ uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, &nflushes,
+ uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j, &nreslabs,
+ uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
+ size_t);
+
+ if (mutex) {
+ mutex_stats_read_arena_bin(i, j, col_mutex64,
+ col_mutex32);
+ }
+
+ emitter_json_arr_obj_begin(emitter);
+ emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
+ &nmalloc);
+ emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
+ &ndalloc);
+ emitter_json_kv(emitter, "curregs", emitter_type_size,
+ &curregs);
+ emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
+ &nrequests);
+ emitter_json_kv(emitter, "nfills", emitter_type_uint64,
+ &nfills);
+ emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
+ &nflushes);
+ emitter_json_kv(emitter, "nreslabs", emitter_type_uint64,
+ &nreslabs);
+ emitter_json_kv(emitter, "curslabs", emitter_type_size,
+ &curslabs);
+ if (mutex) {
+ emitter_json_dict_begin(emitter, "mutex");
+ mutex_stats_emit(emitter, NULL, col_mutex64,
+ col_mutex32);
+ emitter_json_dict_end(emitter);
+ }
+ emitter_json_arr_obj_end(emitter);
+
+ size_t availregs = nregs * curslabs;
+ char util[6];
+ if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util))
+ {
+ if (availregs == 0) {
+ malloc_snprintf(util, sizeof(util), "1");
+ } else if (curregs > availregs) {
+ /*
+ * Race detected: the counters were read in
+ * separate mallctl calls and concurrent
+ * operations happened in between. In this case
+ * no meaningful utilization can be computed.
+ */
+ malloc_snprintf(util, sizeof(util), " race");
} else {
- malloc_cprintf(write_cb, cbopaque,
- "%13u %5zu %4u %3zu %12zu %12"PRIu64
- " %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- j, reg_size, nregs, run_size / page,
- allocated, nmalloc, ndalloc, nruns, reruns,
- curruns);
+ not_reached();
}
}
+
+ col_size.size_val = reg_size;
+ col_ind.unsigned_val = j;
+ col_allocated.size_val = curregs * reg_size;
+ col_nmalloc.uint64_val = nmalloc;
+ col_ndalloc.uint64_val = ndalloc;
+ col_nrequests.uint64_val = nrequests;
+ col_curregs.size_val = curregs;
+ col_curslabs.size_val = curslabs;
+ col_regs.unsigned_val = nregs;
+ col_pgs.size_val = slab_size / page;
+ col_util.str_val = util;
+ col_nfills.uint64_val = nfills;
+ col_nflushes.uint64_val = nflushes;
+ col_nslabs.uint64_val = nslabs;
+ col_nreslabs.uint64_val = nreslabs;
+
+ /*
+ * Note that mutex columns were initialized above, if mutex ==
+ * true.
+ */
+
+ emitter_table_row(emitter, &row);
}
- if (gap_start != UINT_MAX) {
- if (j > gap_start + 1) {
- /* Gap of more than one size class. */
- malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
- gap_start, j - 1);
- } else {
- /* Gap of one size class. */
- malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
- }
+ emitter_json_arr_end(emitter); /* Close "bins". */
+
+ if (in_gap) {
+ emitter_table_printf(emitter, " ---\n");
}
}
static void
-stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
- unsigned i)
-{
- size_t page, nlruns, j;
- ssize_t gap_start;
+stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
+ unsigned nbins, nlextents, j;
+ bool in_gap, in_gap_prev;
- CTL_GET("arenas.page", &page, size_t);
+ CTL_GET("arenas.nbins", &nbins, unsigned);
+ CTL_GET("arenas.nlextents", &nlextents, unsigned);
+
+ emitter_row_t header_row;
+ emitter_row_init(&header_row);
+ emitter_row_t row;
+ emitter_row_init(&row);
+
+#define COL(name, left_or_right, col_width, etype) \
+ emitter_col_t header_##name; \
+ emitter_col_init(&header_##name, &header_row); \
+ header_##name.justify = emitter_justify_##left_or_right; \
+ header_##name.width = col_width; \
+ header_##name.type = emitter_type_title; \
+ header_##name.str_val = #name; \
+ \
+ emitter_col_t col_##name; \
+ emitter_col_init(&col_##name, &row); \
+ col_##name.justify = emitter_justify_##left_or_right; \
+ col_##name.width = col_width; \
+ col_##name.type = emitter_type_##etype;
+
+ COL(size, right, 20, size)
+ COL(ind, right, 4, unsigned)
+ COL(allocated, right, 13, size)
+ COL(nmalloc, right, 13, uint64)
+ COL(ndalloc, right, 13, uint64)
+ COL(nrequests, right, 13, uint64)
+ COL(curlextents, right, 13, size)
+#undef COL
+
+ /* As with bins, we label the large extents table. */
+ header_size.width -= 6;
+ emitter_table_printf(emitter, "large:");
+ emitter_table_row(emitter, &header_row);
+ emitter_json_arr_begin(emitter, "lextents");
- malloc_cprintf(write_cb, cbopaque,
- "large: size pages nmalloc ndalloc nrequests"
- " curruns\n");
- CTL_GET("arenas.nlruns", &nlruns, size_t);
- for (j = 0, gap_start = -1; j < nlruns; j++) {
+ for (j = 0, in_gap = false; j < nlextents; j++) {
uint64_t nmalloc, ndalloc, nrequests;
- size_t run_size, curruns;
+ size_t lextent_size, curlextents;
- CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
- uint64_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
- uint64_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
- uint64_t);
- if (nrequests == 0) {
- if (gap_start == -1)
- gap_start = j;
- } else {
- CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
- CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
- size_t);
- if (gap_start != -1) {
- malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
- j - gap_start);
- gap_start = -1;
- }
- malloc_cprintf(write_cb, cbopaque,
- "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
- " %12zu\n",
- run_size, run_size / page, nmalloc, ndalloc,
- nrequests, curruns);
+ CTL_M2_M4_GET("stats.arenas.0.lextents.0.nmalloc", i, j,
+ &nmalloc, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.lextents.0.ndalloc", i, j,
+ &ndalloc, uint64_t);
+ CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
+ &nrequests, uint64_t);
+ in_gap_prev = in_gap;
+ in_gap = (nrequests == 0);
+
+ if (in_gap_prev && !in_gap) {
+ emitter_table_printf(emitter,
+ " ---\n");
}
+
+ CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t);
+ CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
+ &curlextents, size_t);
+
+ emitter_json_arr_obj_begin(emitter);
+ emitter_json_kv(emitter, "curlextents", emitter_type_size,
+ &curlextents);
+ emitter_json_arr_obj_end(emitter);
+
+ col_size.size_val = lextent_size;
+ col_ind.unsigned_val = nbins + j;
+ col_allocated.size_val = curlextents * lextent_size;
+ col_nmalloc.uint64_val = nmalloc;
+ col_ndalloc.uint64_val = ndalloc;
+ col_nrequests.uint64_val = nrequests;
+ col_curlextents.size_val = curlextents;
+
+ if (!in_gap) {
+ emitter_table_row(emitter, &row);
+ }
+ }
+ emitter_json_arr_end(emitter); /* Close "lextents". */
+ if (in_gap) {
+ emitter_table_printf(emitter, " ---\n");
+ }
+}
+
+static void
+stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
+ emitter_row_t row;
+ emitter_col_t col_name;
+ emitter_col_t col64[mutex_prof_num_uint64_t_counters];
+ emitter_col_t col32[mutex_prof_num_uint32_t_counters];
+
+ emitter_row_init(&row);
+ mutex_stats_init_cols(&row, "", &col_name, col64, col32);
+
+ emitter_json_dict_begin(emitter, "mutexes");
+ emitter_table_row(emitter, &row);
+
+ for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
+ i++) {
+ const char *name = arena_mutex_names[i];
+ emitter_json_dict_begin(emitter, name);
+ mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
+ col32);
+ mutex_stats_emit(emitter, &row, col64, col32);
+ emitter_json_dict_end(emitter); /* Close the mutex dict. */
}
- if (gap_start != -1)
- malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+ emitter_json_dict_end(emitter); /* End "mutexes". */
}
static void
-stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
- unsigned i, bool bins, bool large)
-{
+stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
+ bool mutex) {
unsigned nthreads;
const char *dss;
- size_t page, pactive, pdirty, mapped;
- uint64_t npurge, nmadvise, purged;
+ ssize_t dirty_decay_ms, muzzy_decay_ms;
+ size_t page, pactive, pdirty, pmuzzy, mapped, retained;
+ size_t base, internal, resident, metadata_thp;
+ uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
+ uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
size_t small_allocated;
uint64_t small_nmalloc, small_ndalloc, small_nrequests;
size_t large_allocated;
uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+ size_t tcache_bytes;
+ uint64_t uptime;
CTL_GET("arenas.page", &page, size_t);
- CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
- malloc_cprintf(write_cb, cbopaque,
- "assigned threads: %u\n", nthreads);
- CTL_I_GET("stats.arenas.0.dss", &dss, const char *);
- malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
- dss);
- CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
- CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
- CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
- CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
- CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
- malloc_cprintf(write_cb, cbopaque,
- "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
- " %"PRIu64" madvise%s, %"PRIu64" purged\n",
- pactive, pdirty, npurge, npurge == 1 ? "" : "s",
- nmadvise, nmadvise == 1 ? "" : "s", purged);
-
- malloc_cprintf(write_cb, cbopaque,
- " allocated nmalloc ndalloc nrequests\n");
- CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
- CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
- malloc_cprintf(write_cb, cbopaque,
- "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
- CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
- CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
- malloc_cprintf(write_cb, cbopaque,
- "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
- malloc_cprintf(write_cb, cbopaque,
- "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- small_allocated + large_allocated,
- small_nmalloc + large_nmalloc,
- small_ndalloc + large_ndalloc,
- small_nrequests + large_nrequests);
- malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", pactive * page);
- CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
- malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped);
-
- if (bins)
- stats_arena_bins_print(write_cb, cbopaque, i);
- if (large)
- stats_arena_lruns_print(write_cb, cbopaque, i);
+ CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
+ emitter_kv(emitter, "nthreads", "assigned threads",
+ emitter_type_unsigned, &nthreads);
+
+ CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
+ emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64,
+ &uptime);
+
+ CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
+ emitter_kv(emitter, "dss", "dss allocation precedence",
+ emitter_type_string, &dss);
+
+ CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
+ ssize_t);
+ CTL_M2_GET("stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms,
+ ssize_t);
+ CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
+ CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
+ CTL_M2_GET("stats.arenas.0.pmuzzy", i, &pmuzzy, size_t);
+ CTL_M2_GET("stats.arenas.0.dirty_npurge", i, &dirty_npurge, uint64_t);
+ CTL_M2_GET("stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise,
+ uint64_t);
+ CTL_M2_GET("stats.arenas.0.dirty_purged", i, &dirty_purged, uint64_t);
+ CTL_M2_GET("stats.arenas.0.muzzy_npurge", i, &muzzy_npurge, uint64_t);
+ CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
+ uint64_t);
+ CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
+
+ emitter_row_t decay_row;
+ emitter_row_init(&decay_row);
+
+ /* JSON-style emission. */
+ emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize,
+ &dirty_decay_ms);
+ emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize,
+ &muzzy_decay_ms);
+
+ emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive);
+ emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty);
+ emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy);
+
+ emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64,
+ &dirty_npurge);
+ emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64,
+ &dirty_nmadvise);
+ emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64,
+ &dirty_purged);
+
+ emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64,
+ &muzzy_npurge);
+ emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64,
+ &muzzy_nmadvise);
+ emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
+ &muzzy_purged);
+
+ /* Table-style emission. */
+ emitter_col_t decay_type;
+ emitter_col_init(&decay_type, &decay_row);
+ decay_type.justify = emitter_justify_right;
+ decay_type.width = 9;
+ decay_type.type = emitter_type_title;
+ decay_type.str_val = "decaying:";
+
+ emitter_col_t decay_time;
+ emitter_col_init(&decay_time, &decay_row);
+ decay_time.justify = emitter_justify_right;
+ decay_time.width = 6;
+ decay_time.type = emitter_type_title;
+ decay_time.str_val = "time";
+
+ emitter_col_t decay_npages;
+ emitter_col_init(&decay_npages, &decay_row);
+ decay_npages.justify = emitter_justify_right;
+ decay_npages.width = 13;
+ decay_npages.type = emitter_type_title;
+ decay_npages.str_val = "npages";
+
+ emitter_col_t decay_sweeps;
+ emitter_col_init(&decay_sweeps, &decay_row);
+ decay_sweeps.justify = emitter_justify_right;
+ decay_sweeps.width = 13;
+ decay_sweeps.type = emitter_type_title;
+ decay_sweeps.str_val = "sweeps";
+
+ emitter_col_t decay_madvises;
+ emitter_col_init(&decay_madvises, &decay_row);
+ decay_madvises.justify = emitter_justify_right;
+ decay_madvises.width = 13;
+ decay_madvises.type = emitter_type_title;
+ decay_madvises.str_val = "madvises";
+
+ emitter_col_t decay_purged;
+ emitter_col_init(&decay_purged, &decay_row);
+ decay_purged.justify = emitter_justify_right;
+ decay_purged.width = 13;
+ decay_purged.type = emitter_type_title;
+ decay_purged.str_val = "purged";
+
+ /* Title row. */
+ emitter_table_row(emitter, &decay_row);
+
+ /* Dirty row. */
+ decay_type.str_val = "dirty:";
+
+ if (dirty_decay_ms >= 0) {
+ decay_time.type = emitter_type_ssize;
+ decay_time.ssize_val = dirty_decay_ms;
+ } else {
+ decay_time.type = emitter_type_title;
+ decay_time.str_val = "N/A";
+ }
+
+ decay_npages.type = emitter_type_size;
+ decay_npages.size_val = pdirty;
+
+ decay_sweeps.type = emitter_type_uint64;
+ decay_sweeps.uint64_val = dirty_npurge;
+
+ decay_madvises.type = emitter_type_uint64;
+ decay_madvises.uint64_val = dirty_nmadvise;
+
+ decay_purged.type = emitter_type_uint64;
+ decay_purged.uint64_val = dirty_purged;
+
+ emitter_table_row(emitter, &decay_row);
+
+ /* Muzzy row. */
+ decay_type.str_val = "muzzy:";
+
+ if (muzzy_decay_ms >= 0) {
+ decay_time.type = emitter_type_ssize;
+ decay_time.ssize_val = muzzy_decay_ms;
+ } else {
+ decay_time.type = emitter_type_title;
+ decay_time.str_val = "N/A";
+ }
+
+ decay_npages.type = emitter_type_size;
+ decay_npages.size_val = pmuzzy;
+
+ decay_sweeps.type = emitter_type_uint64;
+ decay_sweeps.uint64_val = muzzy_npurge;
+
+ decay_madvises.type = emitter_type_uint64;
+ decay_madvises.uint64_val = muzzy_nmadvise;
+
+ decay_purged.type = emitter_type_uint64;
+ decay_purged.uint64_val = muzzy_purged;
+
+ emitter_table_row(emitter, &decay_row);
+
+ /* Small / large / total allocation counts. */
+ emitter_row_t alloc_count_row;
+ emitter_row_init(&alloc_count_row);
+
+ emitter_col_t alloc_count_title;
+ emitter_col_init(&alloc_count_title, &alloc_count_row);
+ alloc_count_title.justify = emitter_justify_left;
+ alloc_count_title.width = 25;
+ alloc_count_title.type = emitter_type_title;
+ alloc_count_title.str_val = "";
+
+ emitter_col_t alloc_count_allocated;
+ emitter_col_init(&alloc_count_allocated, &alloc_count_row);
+ alloc_count_allocated.justify = emitter_justify_right;
+ alloc_count_allocated.width = 12;
+ alloc_count_allocated.type = emitter_type_title;
+ alloc_count_allocated.str_val = "allocated";
+
+ emitter_col_t alloc_count_nmalloc;
+ emitter_col_init(&alloc_count_nmalloc, &alloc_count_row);
+ alloc_count_nmalloc.justify = emitter_justify_right;
+ alloc_count_nmalloc.width = 12;
+ alloc_count_nmalloc.type = emitter_type_title;
+ alloc_count_nmalloc.str_val = "nmalloc";
+
+ emitter_col_t alloc_count_ndalloc;
+ emitter_col_init(&alloc_count_ndalloc, &alloc_count_row);
+ alloc_count_ndalloc.justify = emitter_justify_right;
+ alloc_count_ndalloc.width = 12;
+ alloc_count_ndalloc.type = emitter_type_title;
+ alloc_count_ndalloc.str_val = "ndalloc";
+
+ emitter_col_t alloc_count_nrequests;
+ emitter_col_init(&alloc_count_nrequests, &alloc_count_row);
+ alloc_count_nrequests.justify = emitter_justify_right;
+ alloc_count_nrequests.width = 12;
+ alloc_count_nrequests.type = emitter_type_title;
+ alloc_count_nrequests.str_val = "nrequests";
+
+ emitter_table_row(emitter, &alloc_count_row);
+
+#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \
+ CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \
+ &small_or_large##_##name, valtype##_t); \
+ emitter_json_kv(emitter, #name, emitter_type_##valtype, \
+ &small_or_large##_##name); \
+ alloc_count_##name.type = emitter_type_##valtype; \
+ alloc_count_##name.valtype##_val = small_or_large##_##name;
+
+ emitter_json_dict_begin(emitter, "small");
+ alloc_count_title.str_val = "small:";
+
+ GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
+ GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
+ GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
+ GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
+
+ emitter_table_row(emitter, &alloc_count_row);
+ emitter_json_dict_end(emitter); /* Close "small". */
+
+ emitter_json_dict_begin(emitter, "large");
+ alloc_count_title.str_val = "large:";
+
+ GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
+ GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
+ GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
+ GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
+
+ emitter_table_row(emitter, &alloc_count_row);
+ emitter_json_dict_end(emitter); /* Close "large". */
+
+#undef GET_AND_EMIT_ALLOC_STAT
+
+ /* Aggregated small + large stats are emitter only in table mode. */
+ alloc_count_title.str_val = "total:";
+ alloc_count_allocated.size_val = small_allocated + large_allocated;
+ alloc_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
+ alloc_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
+ alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+ emitter_table_row(emitter, &alloc_count_row);
+
+ emitter_row_t mem_count_row;
+ emitter_row_init(&mem_count_row);
+
+ emitter_col_t mem_count_title;
+ emitter_col_init(&mem_count_title, &mem_count_row);
+ mem_count_title.justify = emitter_justify_left;
+ mem_count_title.width = 25;
+ mem_count_title.type = emitter_type_title;
+ mem_count_title.str_val = "";
+
+ emitter_col_t mem_count_val;
+ emitter_col_init(&mem_count_val, &mem_count_row);
+ mem_count_val.justify = emitter_justify_right;
+ mem_count_val.width = 12;
+ mem_count_val.type = emitter_type_title;
+ mem_count_val.str_val = "";
+
+ emitter_table_row(emitter, &mem_count_row);
+ mem_count_val.type = emitter_type_size;
+
+ /* Active count in bytes is emitted only in table mode. */
+ mem_count_title.str_val = "active:";
+ mem_count_val.size_val = pactive * page;
+ emitter_table_row(emitter, &mem_count_row);
+
+#define GET_AND_EMIT_MEM_STAT(stat) \
+ CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t); \
+ emitter_json_kv(emitter, #stat, emitter_type_size, &stat); \
+ mem_count_title.str_val = #stat":"; \
+ mem_count_val.size_val = stat; \
+ emitter_table_row(emitter, &mem_count_row);
+
+ GET_AND_EMIT_MEM_STAT(mapped)
+ GET_AND_EMIT_MEM_STAT(retained)
+ GET_AND_EMIT_MEM_STAT(base)
+ GET_AND_EMIT_MEM_STAT(internal)
+ GET_AND_EMIT_MEM_STAT(metadata_thp)
+ GET_AND_EMIT_MEM_STAT(tcache_bytes)
+ GET_AND_EMIT_MEM_STAT(resident)
+#undef GET_AND_EMIT_MEM_STAT
+
+ if (mutex) {
+ stats_arena_mutexes_print(emitter, i);
+ }
+ if (bins) {
+ stats_arena_bins_print(emitter, mutex, i);
+ }
+ if (large) {
+ stats_arena_lextents_print(emitter, i);
+ }
+}
+
+static void
+stats_general_print(emitter_t *emitter) {
+ const char *cpv;
+ bool bv, bv2;
+ unsigned uv;
+ uint32_t u32v;
+ uint64_t u64v;
+ ssize_t ssv, ssv2;
+ size_t sv, bsz, usz, ssz, sssz, cpsz;
+
+ bsz = sizeof(bool);
+ usz = sizeof(unsigned);
+ ssz = sizeof(size_t);
+ sssz = sizeof(ssize_t);
+ cpsz = sizeof(const char *);
+
+ CTL_GET("version", &cpv, const char *);
+ emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
+
+ /* config. */
+ emitter_dict_begin(emitter, "config", "Build-time option settings");
+#define CONFIG_WRITE_BOOL(name) \
+ do { \
+ CTL_GET("config."#name, &bv, bool); \
+ emitter_kv(emitter, #name, "config."#name, \
+ emitter_type_bool, &bv); \
+ } while (0)
+
+ CONFIG_WRITE_BOOL(cache_oblivious);
+ CONFIG_WRITE_BOOL(debug);
+ CONFIG_WRITE_BOOL(fill);
+ CONFIG_WRITE_BOOL(lazy_lock);
+ emitter_kv(emitter, "malloc_conf", "config.malloc_conf",
+ emitter_type_string, &config_malloc_conf);
+
+ CONFIG_WRITE_BOOL(prof);
+ CONFIG_WRITE_BOOL(prof_libgcc);
+ CONFIG_WRITE_BOOL(prof_libunwind);
+ CONFIG_WRITE_BOOL(stats);
+ CONFIG_WRITE_BOOL(utrace);
+ CONFIG_WRITE_BOOL(xmalloc);
+#undef CONFIG_WRITE_BOOL
+ emitter_dict_end(emitter); /* Close "config" dict. */
+
+ /* opt. */
+#define OPT_WRITE(name, var, size, emitter_type) \
+ if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) == \
+ 0) { \
+ emitter_kv(emitter, name, "opt."name, emitter_type, \
+ &var); \
+ }
+
+#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, \
+ altname) \
+ if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) == \
+ 0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0) \
+ == 0) { \
+ emitter_kv_note(emitter, name, "opt."name, \
+ emitter_type, &var1, altname, emitter_type, \
+ &var2); \
+ }
+
+#define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool)
+#define OPT_WRITE_BOOL_MUTABLE(name, altname) \
+ OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname)
+
+#define OPT_WRITE_UNSIGNED(name) \
+ OPT_WRITE(name, uv, usz, emitter_type_unsigned)
+
+#define OPT_WRITE_SSIZE_T(name) \
+ OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
+#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname) \
+ OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, \
+ altname)
+
+#define OPT_WRITE_CHAR_P(name) \
+ OPT_WRITE(name, cpv, cpsz, emitter_type_string)
+
+ emitter_dict_begin(emitter, "opt", "Run-time option settings");
+
+ OPT_WRITE_BOOL("abort")
+ OPT_WRITE_BOOL("abort_conf")
+ OPT_WRITE_BOOL("retain")
+ OPT_WRITE_CHAR_P("dss")
+ OPT_WRITE_UNSIGNED("narenas")
+ OPT_WRITE_CHAR_P("percpu_arena")
+ OPT_WRITE_CHAR_P("metadata_thp")
+ OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
+ OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
+ OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
+ OPT_WRITE_UNSIGNED("lg_extent_max_active_fit")
+ OPT_WRITE_CHAR_P("junk")
+ OPT_WRITE_BOOL("zero")
+ OPT_WRITE_BOOL("utrace")
+ OPT_WRITE_BOOL("xmalloc")
+ OPT_WRITE_BOOL("tcache")
+ OPT_WRITE_SSIZE_T("lg_tcache_max")
+ OPT_WRITE_CHAR_P("thp")
+ OPT_WRITE_BOOL("prof")
+ OPT_WRITE_CHAR_P("prof_prefix")
+ OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
+ OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
+ "prof.thread_active_init")
+ OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample")
+ OPT_WRITE_BOOL("prof_accum")
+ OPT_WRITE_SSIZE_T("lg_prof_interval")
+ OPT_WRITE_BOOL("prof_gdump")
+ OPT_WRITE_BOOL("prof_final")
+ OPT_WRITE_BOOL("prof_leak")
+ OPT_WRITE_BOOL("stats_print")
+ OPT_WRITE_CHAR_P("stats_print_opts")
+
+ emitter_dict_end(emitter);
+
+#undef OPT_WRITE
+#undef OPT_WRITE_MUTABLE
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_UNSIGNED
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_SSIZE_T_MUTABLE
+#undef OPT_WRITE_CHAR_P
+
+ /* prof. */
+ if (config_prof) {
+ emitter_dict_begin(emitter, "prof", "Profiling settings");
+
+ CTL_GET("prof.thread_active_init", &bv, bool);
+ emitter_kv(emitter, "thread_active_init",
+ "prof.thread_active_init", emitter_type_bool, &bv);
+
+ CTL_GET("prof.active", &bv, bool);
+ emitter_kv(emitter, "active", "prof.active", emitter_type_bool,
+ &bv);
+
+ CTL_GET("prof.gdump", &bv, bool);
+ emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool,
+ &bv);
+
+ CTL_GET("prof.interval", &u64v, uint64_t);
+ emitter_kv(emitter, "interval", "prof.interval",
+ emitter_type_uint64, &u64v);
+
+ CTL_GET("prof.lg_sample", &ssv, ssize_t);
+ emitter_kv(emitter, "lg_sample", "prof.lg_sample",
+ emitter_type_ssize, &ssv);
+
+ emitter_dict_end(emitter); /* Close "prof". */
+ }
+
+ /* arenas. */
+ /*
+ * The json output sticks arena info into an "arenas" dict; the table
+ * output puts them at the top-level.
+ */
+ emitter_json_dict_begin(emitter, "arenas");
+
+ CTL_GET("arenas.narenas", &uv, unsigned);
+ emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv);
+
+ /*
+ * Decay settings are emitted only in json mode; in table mode, they're
+ * emitted as notes with the opt output, above.
+ */
+ CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t);
+ emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, &ssv);
+
+ CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t);
+ emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, &ssv);
+
+ CTL_GET("arenas.quantum", &sv, size_t);
+ emitter_kv(emitter, "quantum", "Quantum size", emitter_type_size, &sv);
+
+ CTL_GET("arenas.page", &sv, size_t);
+ emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
+
+ if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
+ emitter_kv(emitter, "tcache_max",
+ "Maximum thread-cached size class", emitter_type_size, &sv);
+ }
+
+ unsigned nbins;
+ CTL_GET("arenas.nbins", &nbins, unsigned);
+ emitter_kv(emitter, "nbins", "Number of bin size classes",
+ emitter_type_unsigned, &nbins);
+
+ unsigned nhbins;
+ CTL_GET("arenas.nhbins", &nhbins, unsigned);
+ emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes",
+ emitter_type_unsigned, &nhbins);
+
+ /*
+ * We do enough mallctls in a loop that we actually want to omit them
+ * (not just omit the printing).
+ */
+ if (emitter->output == emitter_output_json) {
+ emitter_json_arr_begin(emitter, "bin");
+ for (unsigned i = 0; i < nbins; i++) {
+ emitter_json_arr_obj_begin(emitter);
+
+ CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+ emitter_json_kv(emitter, "size", emitter_type_size,
+ &sv);
+
+ CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+ emitter_json_kv(emitter, "nregs", emitter_type_uint32,
+ &u32v);
+
+ CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t);
+ emitter_json_kv(emitter, "slab_size", emitter_type_size,
+ &sv);
+
+ emitter_json_arr_obj_end(emitter);
+ }
+ emitter_json_arr_end(emitter); /* Close "bin". */
+ }
+
+ unsigned nlextents;
+ CTL_GET("arenas.nlextents", &nlextents, unsigned);
+ emitter_kv(emitter, "nlextents", "Number of large size classes",
+ emitter_type_unsigned, &nlextents);
+
+ if (emitter->output == emitter_output_json) {
+ emitter_json_arr_begin(emitter, "lextent");
+ for (unsigned i = 0; i < nlextents; i++) {
+ emitter_json_arr_obj_begin(emitter);
+
+ CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
+ emitter_json_kv(emitter, "size", emitter_type_size,
+ &sv);
+
+ emitter_json_arr_obj_end(emitter);
+ }
+ emitter_json_arr_end(emitter); /* Close "lextent". */
+ }
+
+ emitter_json_dict_end(emitter); /* Close "arenas" */
+}
+
+static void
+stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
+ bool unmerged, bool bins, bool large, bool mutex) {
+ /*
+ * These should be deleted. We keep them around for a while, to aid in
+ * the transition to the emitter code.
+ */
+ size_t allocated, active, metadata, metadata_thp, resident, mapped,
+ retained;
+ size_t num_background_threads;
+ uint64_t background_thread_num_runs, background_thread_run_interval;
+
+ CTL_GET("stats.allocated", &allocated, size_t);
+ CTL_GET("stats.active", &active, size_t);
+ CTL_GET("stats.metadata", &metadata, size_t);
+ CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
+ CTL_GET("stats.resident", &resident, size_t);
+ CTL_GET("stats.mapped", &mapped, size_t);
+ CTL_GET("stats.retained", &retained, size_t);
+
+ if (have_background_thread) {
+ CTL_GET("stats.background_thread.num_threads",
+ &num_background_threads, size_t);
+ CTL_GET("stats.background_thread.num_runs",
+ &background_thread_num_runs, uint64_t);
+ CTL_GET("stats.background_thread.run_interval",
+ &background_thread_run_interval, uint64_t);
+ } else {
+ num_background_threads = 0;
+ background_thread_num_runs = 0;
+ background_thread_run_interval = 0;
+ }
+
+ /* Generic global stats. */
+ emitter_json_dict_begin(emitter, "stats");
+ emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
+ emitter_json_kv(emitter, "active", emitter_type_size, &active);
+ emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
+ emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
+ &metadata_thp);
+ emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
+ emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
+ emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
+
+ emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
+ "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
+ "retained: %zu\n", allocated, active, metadata, metadata_thp,
+ resident, mapped, retained);
+
+ /* Background thread stats. */
+ emitter_json_dict_begin(emitter, "background_thread");
+ emitter_json_kv(emitter, "num_threads", emitter_type_size,
+ &num_background_threads);
+ emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
+ &background_thread_num_runs);
+ emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
+ &background_thread_run_interval);
+ emitter_json_dict_end(emitter); /* Close "background_thread". */
+
+ emitter_table_printf(emitter, "Background threads: %zu, "
+ "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
+ num_background_threads, background_thread_num_runs,
+ background_thread_run_interval);
+
+ if (mutex) {
+ emitter_row_t row;
+ emitter_col_t name;
+ emitter_col_t col64[mutex_prof_num_uint64_t_counters];
+ emitter_col_t col32[mutex_prof_num_uint32_t_counters];
+
+ emitter_row_init(&row);
+ mutex_stats_init_cols(&row, "", &name, col64, col32);
+
+ emitter_table_row(emitter, &row);
+ emitter_json_dict_begin(emitter, "mutexes");
+
+ for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
+ mutex_stats_read_global(global_mutex_names[i], &name,
+ col64, col32);
+ emitter_json_dict_begin(emitter, global_mutex_names[i]);
+ mutex_stats_emit(emitter, &row, col64, col32);
+ emitter_json_dict_end(emitter);
+ }
+
+ emitter_json_dict_end(emitter); /* Close "mutexes". */
+ }
+
+ emitter_json_dict_end(emitter); /* Close "stats". */
+
+ if (merged || destroyed || unmerged) {
+ unsigned narenas;
+
+ emitter_json_dict_begin(emitter, "stats.arenas");
+
+ CTL_GET("arenas.narenas", &narenas, unsigned);
+ size_t mib[3];
+ size_t miblen = sizeof(mib) / sizeof(size_t);
+ size_t sz;
+ VARIABLE_ARRAY(bool, initialized, narenas);
+ bool destroyed_initialized;
+ unsigned i, j, ninitialized;
+
+ xmallctlnametomib("arena.0.initialized", mib, &miblen);
+ for (i = ninitialized = 0; i < narenas; i++) {
+ mib[1] = i;
+ sz = sizeof(bool);
+ xmallctlbymib(mib, miblen, &initialized[i], &sz,
+ NULL, 0);
+ if (initialized[i]) {
+ ninitialized++;
+ }
+ }
+ mib[1] = MALLCTL_ARENAS_DESTROYED;
+ sz = sizeof(bool);
+ xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
+ NULL, 0);
+
+ /* Merged stats. */
+ if (merged && (ninitialized > 1 || !unmerged)) {
+ /* Print merged arena stats. */
+ emitter_table_printf(emitter, "Merged arenas stats:\n");
+ emitter_json_dict_begin(emitter, "merged");
+ stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
+ large, mutex);
+ emitter_json_dict_end(emitter); /* Close "merged". */
+ }
+
+ /* Destroyed stats. */
+ if (destroyed_initialized && destroyed) {
+ /* Print destroyed arena stats. */
+ emitter_table_printf(emitter,
+ "Destroyed arenas stats:\n");
+ emitter_json_dict_begin(emitter, "destroyed");
+ stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
+ bins, large, mutex);
+ emitter_json_dict_end(emitter); /* Close "destroyed". */
+ }
+
+ /* Unmerged stats. */
+ if (unmerged) {
+ for (i = j = 0; i < narenas; i++) {
+ if (initialized[i]) {
+ char arena_ind_str[20];
+ malloc_snprintf(arena_ind_str,
+ sizeof(arena_ind_str), "%u", i);
+ emitter_json_dict_begin(emitter,
+ arena_ind_str);
+ emitter_table_printf(emitter,
+ "arenas[%s]:\n", arena_ind_str);
+ stats_arena_print(emitter, i, bins,
+ large, mutex);
+ /* Close "<arena-ind>". */
+ emitter_json_dict_end(emitter);
+ }
+ }
+ }
+ emitter_json_dict_end(emitter); /* Close "stats.arenas". */
+ }
}
void
stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *opts)
-{
+ const char *opts) {
int err;
uint64_t epoch;
size_t u64sz;
- bool general = true;
- bool merged = true;
- bool unmerged = true;
- bool bins = true;
- bool large = true;
+#define OPTION(o, v, d, s) bool v = d;
+ STATS_PRINT_OPTIONS
+#undef OPTION
/*
* Refresh stats, in case mallctl() was called by the application.
@@ -287,7 +1240,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
* */
epoch = 1;
u64sz = sizeof(uint64_t);
- err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+ err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
+ sizeof(uint64_t));
if (err != 0) {
if (err == EAGAIN) {
malloc_write("<jemalloc>: Memory allocation failure in "
@@ -300,250 +1254,33 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
if (opts != NULL) {
- unsigned i;
-
- for (i = 0; opts[i] != '\0'; i++) {
+ for (unsigned i = 0; opts[i] != '\0'; i++) {
switch (opts[i]) {
- case 'g':
- general = false;
- break;
- case 'm':
- merged = false;
- break;
- case 'a':
- unmerged = false;
- break;
- case 'b':
- bins = false;
- break;
- case 'l':
- large = false;
- break;
+#define OPTION(o, v, d, s) case o: v = s; break;
+ STATS_PRINT_OPTIONS
+#undef OPTION
default:;
}
}
}
- malloc_cprintf(write_cb, cbopaque,
- "___ Begin jemalloc statistics ___\n");
- if (general) {
- int err;
- const char *cpv;
- bool bv;
- unsigned uv;
- ssize_t ssv;
- size_t sv, bsz, ssz, sssz, cpsz;
-
- bsz = sizeof(bool);
- ssz = sizeof(size_t);
- sssz = sizeof(ssize_t);
- cpsz = sizeof(const char *);
-
- CTL_GET("version", &cpv, const char *);
- malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
- CTL_GET("config.debug", &bv, bool);
- malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
- bv ? "enabled" : "disabled");
-
-#define OPT_WRITE_BOOL(n) \
- if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0)) \
- == 0) { \
- malloc_cprintf(write_cb, cbopaque, \
- " opt."#n": %s\n", bv ? "true" : "false"); \
- }
-#define OPT_WRITE_SIZE_T(n) \
- if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0)) \
- == 0) { \
- malloc_cprintf(write_cb, cbopaque, \
- " opt."#n": %zu\n", sv); \
- }
-#define OPT_WRITE_SSIZE_T(n) \
- if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0)) \
- == 0) { \
- malloc_cprintf(write_cb, cbopaque, \
- " opt."#n": %zd\n", ssv); \
- }
-#define OPT_WRITE_CHAR_P(n) \
- if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0)) \
- == 0) { \
- malloc_cprintf(write_cb, cbopaque, \
- " opt."#n": \"%s\"\n", cpv); \
- }
-
- malloc_cprintf(write_cb, cbopaque,
- "Run-time option settings:\n");
- OPT_WRITE_BOOL(abort)
- OPT_WRITE_SIZE_T(lg_chunk)
- OPT_WRITE_CHAR_P(dss)
- OPT_WRITE_SIZE_T(narenas)
- OPT_WRITE_SSIZE_T(lg_dirty_mult)
- OPT_WRITE_BOOL(stats_print)
- OPT_WRITE_BOOL(junk)
- OPT_WRITE_SIZE_T(quarantine)
- OPT_WRITE_BOOL(redzone)
- OPT_WRITE_BOOL(zero)
- OPT_WRITE_BOOL(utrace)
- OPT_WRITE_BOOL(valgrind)
- OPT_WRITE_BOOL(xmalloc)
- OPT_WRITE_BOOL(tcache)
- OPT_WRITE_SSIZE_T(lg_tcache_max)
- OPT_WRITE_BOOL(prof)
- OPT_WRITE_CHAR_P(prof_prefix)
- OPT_WRITE_BOOL(prof_active)
- OPT_WRITE_SSIZE_T(lg_prof_sample)
- OPT_WRITE_BOOL(prof_accum)
- OPT_WRITE_SSIZE_T(lg_prof_interval)
- OPT_WRITE_BOOL(prof_gdump)
- OPT_WRITE_BOOL(prof_final)
- OPT_WRITE_BOOL(prof_leak)
-
-#undef OPT_WRITE_BOOL
-#undef OPT_WRITE_SIZE_T
-#undef OPT_WRITE_SSIZE_T
-#undef OPT_WRITE_CHAR_P
-
- malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
-
- CTL_GET("arenas.narenas", &uv, unsigned);
- malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
-
- malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
- sizeof(void *));
-
- CTL_GET("arenas.quantum", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
-
- CTL_GET("arenas.page", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+ emitter_t emitter;
+ emitter_init(&emitter,
+ json ? emitter_output_json : emitter_output_table, write_cb,
+ cbopaque);
+ emitter_begin(&emitter);
+ emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
+ emitter_json_dict_begin(&emitter, "jemalloc");
- CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
- if (ssv >= 0) {
- malloc_cprintf(write_cb, cbopaque,
- "Min active:dirty page ratio per arena: %u:1\n",
- (1U << ssv));
- } else {
- malloc_cprintf(write_cb, cbopaque,
- "Min active:dirty page ratio per arena: N/A\n");
- }
- if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
- == 0) {
- malloc_cprintf(write_cb, cbopaque,
- "Maximum thread-cached size class: %zu\n", sv);
- }
- if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
- bv) {
- CTL_GET("opt.lg_prof_sample", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque,
- "Average profile sample interval: %"PRIu64
- " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
-
- CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
- if (ssv >= 0) {
- malloc_cprintf(write_cb, cbopaque,
- "Average profile dump interval: %"PRIu64
- " (2^%zd)\n",
- (((uint64_t)1U) << ssv), ssv);
- } else {
- malloc_cprintf(write_cb, cbopaque,
- "Average profile dump interval: N/A\n");
- }
- }
- CTL_GET("opt.lg_chunk", &sv, size_t);
- malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n",
- (ZU(1) << sv), sv);
+ if (general) {
+ stats_general_print(&emitter);
}
-
if (config_stats) {
- size_t *cactive;
- size_t allocated, active, mapped;
- size_t chunks_current, chunks_high;
- uint64_t chunks_total;
- size_t huge_allocated;
- uint64_t huge_nmalloc, huge_ndalloc;
-
- CTL_GET("stats.cactive", &cactive, size_t *);
- CTL_GET("stats.allocated", &allocated, size_t);
- CTL_GET("stats.active", &active, size_t);
- CTL_GET("stats.mapped", &mapped, size_t);
- malloc_cprintf(write_cb, cbopaque,
- "Allocated: %zu, active: %zu, mapped: %zu\n",
- allocated, active, mapped);
- malloc_cprintf(write_cb, cbopaque,
- "Current active ceiling: %zu\n", atomic_read_z(cactive));
-
- /* Print chunk stats. */
- CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
- CTL_GET("stats.chunks.high", &chunks_high, size_t);
- CTL_GET("stats.chunks.current", &chunks_current, size_t);
- malloc_cprintf(write_cb, cbopaque, "chunks: nchunks "
- "highchunks curchunks\n");
- malloc_cprintf(write_cb, cbopaque,
- " %13"PRIu64" %12zu %12zu\n",
- chunks_total, chunks_high, chunks_current);
-
- /* Print huge stats. */
- CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
- CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
- CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
- malloc_cprintf(write_cb, cbopaque,
- "huge: nmalloc ndalloc allocated\n");
- malloc_cprintf(write_cb, cbopaque,
- " %12"PRIu64" %12"PRIu64" %12zu\n",
- huge_nmalloc, huge_ndalloc, huge_allocated);
-
- if (merged) {
- unsigned narenas;
-
- CTL_GET("arenas.narenas", &narenas, unsigned);
- {
- VARIABLE_ARRAY(bool, initialized, narenas);
- size_t isz;
- unsigned i, ninitialized;
-
- isz = sizeof(bool) * narenas;
- xmallctl("arenas.initialized", initialized,
- &isz, NULL, 0);
- for (i = ninitialized = 0; i < narenas; i++) {
- if (initialized[i])
- ninitialized++;
- }
-
- if (ninitialized > 1 || unmerged == false) {
- /* Print merged arena stats. */
- malloc_cprintf(write_cb, cbopaque,
- "\nMerged arenas stats:\n");
- stats_arena_print(write_cb, cbopaque,
- narenas, bins, large);
- }
- }
- }
-
- if (unmerged) {
- unsigned narenas;
-
- /* Print stats for each arena. */
-
- CTL_GET("arenas.narenas", &narenas, unsigned);
- {
- VARIABLE_ARRAY(bool, initialized, narenas);
- size_t isz;
- unsigned i;
-
- isz = sizeof(bool) * narenas;
- xmallctl("arenas.initialized", initialized,
- &isz, NULL, 0);
-
- for (i = 0; i < narenas; i++) {
- if (initialized[i]) {
- malloc_cprintf(write_cb,
- cbopaque,
- "\narenas[%u]:\n", i);
- stats_arena_print(write_cb,
- cbopaque, i, bins, large);
- }
- }
- }
- }
+ stats_print_helper(&emitter, merged, destroyed, unmerged,
+ bins, large, mutex);
}
- malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
+
+ emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */
+ emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n");
+ emitter_end(&emitter);
}
diff --git a/deps/jemalloc/src/sz.c b/deps/jemalloc/src/sz.c
new file mode 100644
index 000000000..9de77e45f
--- /dev/null
+++ b/deps/jemalloc/src/sz.c
@@ -0,0 +1,107 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_pind2sz_tab[NPSIZES+1] = {
+#define PSZ_yes(lg_grp, ndelta, lg_delta) \
+ (((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define PSZ_no(lg_grp, ndelta, lg_delta)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+ PSZ_##psz(lg_grp, ndelta, lg_delta)
+ SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+ (LARGE_MAXCLASS + PAGE)
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_index2size_tab[NSIZES] = {
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+ ((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
+ SIZE_CLASSES
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t sz_size2index_tab[] = {
+#if LG_TINY_MIN == 0
+/* The div module doesn't support division by 1. */
+#error "Unsupported LG_TINY_MIN"
+#define S2B_0(i) i,
+#elif LG_TINY_MIN == 1
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_1(i) i,
+#elif LG_TINY_MIN == 2
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_2(i) i,
+#elif LG_TINY_MIN == 3
+#define S2B_3(i) i,
+#elif LG_TINY_MIN == 4
+#define S2B_4(i) i,
+#elif LG_TINY_MIN == 5
+#define S2B_5(i) i,
+#elif LG_TINY_MIN == 6
+#define S2B_6(i) i,
+#elif LG_TINY_MIN == 7
+#define S2B_7(i) i,
+#elif LG_TINY_MIN == 8
+#define S2B_8(i) i,
+#elif LG_TINY_MIN == 9
+#define S2B_9(i) i,
+#elif LG_TINY_MIN == 10
+#define S2B_10(i) i,
+#elif LG_TINY_MIN == 11
+#define S2B_11(i) i,
+#else
+#error "Unsupported LG_TINY_MIN"
+#endif
+#if LG_TINY_MIN < 1
+#define S2B_1(i) S2B_0(i) S2B_0(i)
+#endif
+#if LG_TINY_MIN < 2
+#define S2B_2(i) S2B_1(i) S2B_1(i)
+#endif
+#if LG_TINY_MIN < 3
+#define S2B_3(i) S2B_2(i) S2B_2(i)
+#endif
+#if LG_TINY_MIN < 4
+#define S2B_4(i) S2B_3(i) S2B_3(i)
+#endif
+#if LG_TINY_MIN < 5
+#define S2B_5(i) S2B_4(i) S2B_4(i)
+#endif
+#if LG_TINY_MIN < 6
+#define S2B_6(i) S2B_5(i) S2B_5(i)
+#endif
+#if LG_TINY_MIN < 7
+#define S2B_7(i) S2B_6(i) S2B_6(i)
+#endif
+#if LG_TINY_MIN < 8
+#define S2B_8(i) S2B_7(i) S2B_7(i)
+#endif
+#if LG_TINY_MIN < 9
+#define S2B_9(i) S2B_8(i) S2B_8(i)
+#endif
+#if LG_TINY_MIN < 10
+#define S2B_10(i) S2B_9(i) S2B_9(i)
+#endif
+#if LG_TINY_MIN < 11
+#define S2B_11(i) S2B_10(i) S2B_10(i)
+#endif
+#define S2B_no(i)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+ S2B_##lg_delta_lookup(index)
+ SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
+#undef S2B_8
+#undef S2B_9
+#undef S2B_10
+#undef S2B_11
+#undef S2B_no
+#undef SC
+};
diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c
index 6de92960b..a769a6b17 100644
--- a/deps/jemalloc/src/tcache.c
+++ b/deps/jemalloc/src/tcache.c
@@ -1,131 +1,153 @@
-#define JEMALLOC_TCACHE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/size_classes.h"
/******************************************************************************/
/* Data. */
-malloc_tsd_data(, tcache, tcache_t *, NULL)
-malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
-
bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
-tcache_bin_info_t *tcache_bin_info;
+cache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */
-size_t nhbins;
+unsigned nhbins;
size_t tcache_maxclass;
-/******************************************************************************/
+tcaches_t *tcaches;
+
+/* Index of first element within tcaches that has never been used. */
+static unsigned tcaches_past;
+
+/* Head of singly linked list tracking available tcaches elements. */
+static tcaches_t *tcaches_avail;
-size_t tcache_salloc(const void *ptr)
-{
+/* Protects tcaches{,_past,_avail}. */
+static malloc_mutex_t tcaches_mtx;
- return (arena_salloc(ptr, false));
+/******************************************************************************/
+
+size_t
+tcache_salloc(tsdn_t *tsdn, const void *ptr) {
+ return arena_salloc(tsdn, ptr);
}
void
-tcache_event_hard(tcache_t *tcache)
-{
- size_t binind = tcache->next_gc_bin;
- tcache_bin_t *tbin = &tcache->tbins[binind];
- tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
-
+tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
+ szind_t binind = tcache->next_gc_bin;
+
+ cache_bin_t *tbin;
+ if (binind < NBINS) {
+ tbin = tcache_small_bin_get(tcache, binind);
+ } else {
+ tbin = tcache_large_bin_get(tcache, binind);
+ }
if (tbin->low_water > 0) {
/*
* Flush (ceiling) 3/4 of the objects below the low water mark.
*/
if (binind < NBINS) {
- tcache_bin_flush_small(tbin, binind, tbin->ncached -
- tbin->low_water + (tbin->low_water >> 2), tcache);
+ tcache_bin_flush_small(tsd, tcache, tbin, binind,
+ tbin->ncached - tbin->low_water + (tbin->low_water
+ >> 2));
+ /*
+ * Reduce fill count by 2X. Limit lg_fill_div such that
+ * the fill count is always at least 1.
+ */
+ cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+ if ((tbin_info->ncached_max >>
+ (tcache->lg_fill_div[binind] + 1)) >= 1) {
+ tcache->lg_fill_div[binind]++;
+ }
} else {
- tcache_bin_flush_large(tbin, binind, tbin->ncached -
- tbin->low_water + (tbin->low_water >> 2), tcache);
+ tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
+ - tbin->low_water + (tbin->low_water >> 2), tcache);
}
- /*
- * Reduce fill count by 2X. Limit lg_fill_div such that the
- * fill count is always at least 1.
- */
- if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
- tbin->lg_fill_div++;
} else if (tbin->low_water < 0) {
/*
- * Increase fill count by 2X. Make sure lg_fill_div stays
- * greater than 0.
+ * Increase fill count by 2X for small bins. Make sure
+ * lg_fill_div stays greater than 0.
*/
- if (tbin->lg_fill_div > 1)
- tbin->lg_fill_div--;
+ if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+ tcache->lg_fill_div[binind]--;
+ }
}
tbin->low_water = tbin->ncached;
tcache->next_gc_bin++;
- if (tcache->next_gc_bin == nhbins)
+ if (tcache->next_gc_bin == nhbins) {
tcache->next_gc_bin = 0;
- tcache->ev_cnt = 0;
+ }
}
void *
-tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
-{
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+ cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
void *ret;
- arena_tcache_fill_small(tcache->arena, tbin, binind,
+ assert(tcache->arena != NULL);
+ arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
config_prof ? tcache->prof_accumbytes : 0);
- if (config_prof)
+ if (config_prof) {
tcache->prof_accumbytes = 0;
- ret = tcache_alloc_easy(tbin);
+ }
+ ret = cache_bin_alloc_easy(tbin, tcache_success);
- return (ret);
+ return ret;
}
void
-tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache)
-{
- void *ptr;
- unsigned i, nflush, ndeferred;
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+ szind_t binind, unsigned rem) {
bool merged_stats = false;
assert(binind < NBINS);
- assert(rem <= tbin->ncached);
+ assert((cache_bin_sz_t)rem <= tbin->ncached);
+
+ arena_t *arena = tcache->arena;
+ assert(arena != NULL);
+ unsigned nflush = tbin->ncached - rem;
+ VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+ /* Look up extent once per item. */
+ for (unsigned i = 0 ; i < nflush; i++) {
+ item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+ }
- for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+ while (nflush > 0) {
/* Lock the arena bin associated with the first object. */
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
- tbin->avail[0]);
- arena_t *arena = chunk->arena;
- arena_bin_t *bin = &arena->bins[binind];
-
- if (config_prof && arena == tcache->arena) {
- if (arena_prof_accum(arena, tcache->prof_accumbytes))
- prof_idump();
+ extent_t *extent = item_extent[0];
+ arena_t *bin_arena = extent_arena_get(extent);
+ bin_t *bin = &bin_arena->bins[binind];
+
+ if (config_prof && bin_arena == arena) {
+ if (arena_prof_accum(tsd_tsdn(tsd), arena,
+ tcache->prof_accumbytes)) {
+ prof_idump(tsd_tsdn(tsd));
+ }
tcache->prof_accumbytes = 0;
}
- malloc_mutex_lock(&bin->lock);
- if (config_stats && arena == tcache->arena) {
- assert(merged_stats == false);
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+ if (config_stats && bin_arena == arena) {
+ assert(!merged_stats);
merged_stats = true;
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
}
- ndeferred = 0;
- for (i = 0; i < nflush; i++) {
- ptr = tbin->avail[i];
- assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk->arena == arena) {
- size_t pageind = ((uintptr_t)ptr -
- (uintptr_t)chunk) >> LG_PAGE;
- arena_chunk_map_t *mapelm =
- arena_mapp_get(chunk, pageind);
- if (config_fill && opt_junk) {
- arena_alloc_junk_small(ptr,
- &arena_bin_info[binind], true);
- }
- arena_dalloc_bin_locked(arena, chunk, ptr,
- mapelm);
+ unsigned ndeferred = 0;
+ for (unsigned i = 0; i < nflush; i++) {
+ void *ptr = *(tbin->avail - 1 - i);
+ extent = item_extent[i];
+ assert(ptr != NULL && extent != NULL);
+
+ if (extent_arena_get(extent) == bin_arena) {
+ arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+ bin_arena, extent, ptr);
} else {
/*
* This object was allocated via a different
@@ -133,327 +155,532 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
* locked. Stash the object, so that it can be
* handled in a future pass.
*/
- tbin->avail[ndeferred] = ptr;
+ *(tbin->avail - 1 - ndeferred) = ptr;
+ item_extent[ndeferred] = extent;
ndeferred++;
}
}
- malloc_mutex_unlock(&bin->lock);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+ arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+ nflush = ndeferred;
}
- if (config_stats && merged_stats == false) {
+ if (config_stats && !merged_stats) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
- arena_bin_t *bin = &tcache->arena->bins[binind];
- malloc_mutex_lock(&bin->lock);
+ bin_t *bin = &arena->bins[binind];
+ malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
- malloc_mutex_unlock(&bin->lock);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
}
- memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
- rem * sizeof(void *));
+ memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+ sizeof(void *));
tbin->ncached = rem;
- if ((int)tbin->ncached < tbin->low_water)
+ if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
+ }
}
void
-tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
- tcache_t *tcache)
-{
- void *ptr;
- unsigned i, nflush, ndeferred;
+tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
+ unsigned rem, tcache_t *tcache) {
bool merged_stats = false;
assert(binind < nhbins);
- assert(rem <= tbin->ncached);
+ assert((cache_bin_sz_t)rem <= tbin->ncached);
+
+ arena_t *arena = tcache->arena;
+ assert(arena != NULL);
+ unsigned nflush = tbin->ncached - rem;
+ VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+ /* Look up extent once per item. */
+ for (unsigned i = 0 ; i < nflush; i++) {
+ item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+ }
- for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+ while (nflush > 0) {
/* Lock the arena associated with the first object. */
- arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
- tbin->avail[0]);
- arena_t *arena = chunk->arena;
+ extent_t *extent = item_extent[0];
+ arena_t *locked_arena = extent_arena_get(extent);
UNUSED bool idump;
- if (config_prof)
+ if (config_prof) {
idump = false;
- malloc_mutex_lock(&arena->lock);
- if ((config_prof || config_stats) && arena == tcache->arena) {
+ }
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+ for (unsigned i = 0; i < nflush; i++) {
+ void *ptr = *(tbin->avail - 1 - i);
+ assert(ptr != NULL);
+ extent = item_extent[i];
+ if (extent_arena_get(extent) == locked_arena) {
+ large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
+ extent);
+ }
+ }
+ if ((config_prof || config_stats) && locked_arena == arena) {
if (config_prof) {
- idump = arena_prof_accum_locked(arena,
+ idump = arena_prof_accum(tsd_tsdn(tsd), arena,
tcache->prof_accumbytes);
tcache->prof_accumbytes = 0;
}
if (config_stats) {
merged_stats = true;
- arena->stats.nrequests_large +=
- tbin->tstats.nrequests;
- arena->stats.lstats[binind - NBINS].nrequests +=
- tbin->tstats.nrequests;
+ arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+ &arena->stats, binind,
+ tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
}
}
- ndeferred = 0;
- for (i = 0; i < nflush; i++) {
- ptr = tbin->avail[i];
- assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk->arena == arena)
- arena_dalloc_large_locked(arena, chunk, ptr);
- else {
+ malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+
+ unsigned ndeferred = 0;
+ for (unsigned i = 0; i < nflush; i++) {
+ void *ptr = *(tbin->avail - 1 - i);
+ extent = item_extent[i];
+ assert(ptr != NULL && extent != NULL);
+
+ if (extent_arena_get(extent) == locked_arena) {
+ large_dalloc_finish(tsd_tsdn(tsd), extent);
+ } else {
/*
* This object was allocated via a different
* arena than the one that is currently locked.
* Stash the object, so that it can be handled
* in a future pass.
*/
- tbin->avail[ndeferred] = ptr;
+ *(tbin->avail - 1 - ndeferred) = ptr;
+ item_extent[ndeferred] = extent;
ndeferred++;
}
}
- malloc_mutex_unlock(&arena->lock);
- if (config_prof && idump)
- prof_idump();
+ if (config_prof && idump) {
+ prof_idump(tsd_tsdn(tsd));
+ }
+ arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+ ndeferred);
+ nflush = ndeferred;
}
- if (config_stats && merged_stats == false) {
+ if (config_stats && !merged_stats) {
/*
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
- arena_t *arena = tcache->arena;
- malloc_mutex_lock(&arena->lock);
- arena->stats.nrequests_large += tbin->tstats.nrequests;
- arena->stats.lstats[binind - NBINS].nrequests +=
- tbin->tstats.nrequests;
+ arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
+ binind, tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
- malloc_mutex_unlock(&arena->lock);
}
- memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
- rem * sizeof(void *));
+ memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+ sizeof(void *));
tbin->ncached = rem;
- if ((int)tbin->ncached < tbin->low_water)
+ if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
+ }
}
void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
-{
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+ assert(tcache->arena == NULL);
+ tcache->arena = arena;
if (config_stats) {
/* Link into list of extant tcaches. */
- malloc_mutex_lock(&arena->lock);
+ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+
ql_elm_new(tcache, link);
ql_tail_insert(&arena->tcache_ql, tcache, link);
- malloc_mutex_unlock(&arena->lock);
+ cache_bin_array_descriptor_init(
+ &tcache->cache_bin_array_descriptor, tcache->bins_small,
+ tcache->bins_large);
+ ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+ &tcache->cache_bin_array_descriptor, link);
+
+ malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
}
- tcache->arena = arena;
}
-void
-tcache_arena_dissociate(tcache_t *tcache)
-{
-
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
+ arena_t *arena = tcache->arena;
+ assert(arena != NULL);
if (config_stats) {
/* Unlink from list of extant tcaches. */
- malloc_mutex_lock(&tcache->arena->lock);
- ql_remove(&tcache->arena->tcache_ql, tcache, link);
- tcache_stats_merge(tcache, tcache->arena);
- malloc_mutex_unlock(&tcache->arena->lock);
+ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+ if (config_debug) {
+ bool in_ql = false;
+ tcache_t *iter;
+ ql_foreach(iter, &arena->tcache_ql, link) {
+ if (iter == tcache) {
+ in_ql = true;
+ break;
+ }
+ }
+ assert(in_ql);
+ }
+ ql_remove(&arena->tcache_ql, tcache, link);
+ ql_remove(&arena->cache_bin_array_descriptor_ql,
+ &tcache->cache_bin_array_descriptor, link);
+ tcache_stats_merge(tsdn, tcache, arena);
+ malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
}
+ tcache->arena = NULL;
+}
+
+void
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+ tcache_arena_dissociate(tsdn, tcache);
+ tcache_arena_associate(tsdn, tcache, arena);
}
+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+ /* Called upon tsd initialization. */
+ tsd_tcache_enabled_set(tsd, opt_tcache);
+ tsd_slow_update(tsd);
+
+ if (opt_tcache) {
+ /* Trigger tcache init. */
+ tsd_tcache_data_init(tsd);
+ }
+
+ return false;
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+static void
+tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+ memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
+ tcache->prof_accumbytes = 0;
+ tcache->next_gc_bin = 0;
+ tcache->arena = NULL;
+
+ ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
+
+ size_t stack_offset = 0;
+ assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+ memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
+ memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
+ unsigned i = 0;
+ for (; i < NBINS; i++) {
+ tcache->lg_fill_div[i] = 1;
+ stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+ /*
+ * avail points past the available space. Allocations will
+ * access the slots toward higher addresses (for the benefit of
+ * prefetch).
+ */
+ tcache_small_bin_get(tcache, i)->avail =
+ (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+ }
+ for (; i < nhbins; i++) {
+ stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+ tcache_large_bin_get(tcache, i)->avail =
+ (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+ }
+ assert(stack_offset == stack_nelms * sizeof(void *));
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+bool
+tsd_tcache_data_init(tsd_t *tsd) {
+ tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+ assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+ size_t size = stack_nelms * sizeof(void *);
+ /* Avoid false cacheline sharing. */
+ size = sz_sa2u(size, CACHELINE);
+
+ void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
+ NULL, true, arena_get(TSDN_NULL, 0, true));
+ if (avail_array == NULL) {
+ return true;
+ }
+
+ tcache_init(tsd, tcache, avail_array);
+ /*
+ * Initialization is a bit tricky here. After malloc init is done, all
+ * threads can rely on arena_choose and associate tcache accordingly.
+ * However, the thread that does actual malloc bootstrapping relies on
+ * functional tsd, and it can only rely on a0. In that case, we
+ * associate its tcache to a0 temporarily, and later on
+ * arena_choose_hard() will re-associate properly.
+ */
+ tcache->arena = NULL;
+ arena_t *arena;
+ if (!malloc_initialized()) {
+ /* If in initialization, assign to a0. */
+ arena = arena_get(tsd_tsdn(tsd), 0, false);
+ tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+ } else {
+ arena = arena_choose(tsd, NULL);
+ /* This may happen if thread.tcache.enabled is used. */
+ if (tcache->arena == NULL) {
+ tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+ }
+ }
+ assert(arena == tcache->arena);
+
+ return false;
+}
+
+/* Created manual tcache for tcache.create mallctl. */
tcache_t *
-tcache_create(arena_t *arena)
-{
+tcache_create_explicit(tsd_t *tsd) {
tcache_t *tcache;
size_t size, stack_offset;
- unsigned i;
- size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+ size = sizeof(tcache_t);
/* Naturally align the pointer stacks. */
size = PTR_CEILING(size);
stack_offset = size;
size += stack_nelms * sizeof(void *);
- /*
- * Round up to the nearest multiple of the cacheline size, in order to
- * avoid the possibility of false cacheline sharing.
- *
- * That this works relies on the same logic as in ipalloc(), but we
- * cannot directly call ipalloc() here due to tcache bootstrapping
- * issues.
- */
- size = (size + CACHELINE_MASK) & (-CACHELINE);
+ /* Avoid false cacheline sharing. */
+ size = sz_sa2u(size, CACHELINE);
- if (size <= SMALL_MAXCLASS)
- tcache = (tcache_t *)arena_malloc_small(arena, size, true);
- else if (size <= tcache_maxclass)
- tcache = (tcache_t *)arena_malloc_large(arena, size, true);
- else
- tcache = (tcache_t *)icalloct(size, false, arena);
-
- if (tcache == NULL)
- return (NULL);
-
- tcache_arena_associate(tcache, arena);
-
- assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
- for (i = 0; i < nhbins; i++) {
- tcache->tbins[i].lg_fill_div = 1;
- tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
- (uintptr_t)stack_offset);
- stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+ tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
+ arena_get(TSDN_NULL, 0, true));
+ if (tcache == NULL) {
+ return NULL;
}
- tcache_tsd_set(&tcache);
+ tcache_init(tsd, tcache,
+ (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
+ tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
- return (tcache);
+ return tcache;
}
-void
-tcache_destroy(tcache_t *tcache)
-{
- unsigned i;
- size_t tcache_size;
+static void
+tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
+ assert(tcache->arena != NULL);
- tcache_arena_dissociate(tcache);
+ for (unsigned i = 0; i < NBINS; i++) {
+ cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+ tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
- for (i = 0; i < NBINS; i++) {
- tcache_bin_t *tbin = &tcache->tbins[i];
- tcache_bin_flush_small(tbin, i, 0, tcache);
-
- if (config_stats && tbin->tstats.nrequests != 0) {
- arena_t *arena = tcache->arena;
- arena_bin_t *bin = &arena->bins[i];
- malloc_mutex_lock(&bin->lock);
- bin->stats.nrequests += tbin->tstats.nrequests;
- malloc_mutex_unlock(&bin->lock);
+ if (config_stats) {
+ assert(tbin->tstats.nrequests == 0);
}
}
+ for (unsigned i = NBINS; i < nhbins; i++) {
+ cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+ tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
- for (; i < nhbins; i++) {
- tcache_bin_t *tbin = &tcache->tbins[i];
- tcache_bin_flush_large(tbin, i, 0, tcache);
-
- if (config_stats && tbin->tstats.nrequests != 0) {
- arena_t *arena = tcache->arena;
- malloc_mutex_lock(&arena->lock);
- arena->stats.nrequests_large += tbin->tstats.nrequests;
- arena->stats.lstats[i - NBINS].nrequests +=
- tbin->tstats.nrequests;
- malloc_mutex_unlock(&arena->lock);
+ if (config_stats) {
+ assert(tbin->tstats.nrequests == 0);
}
}
if (config_prof && tcache->prof_accumbytes > 0 &&
- arena_prof_accum(tcache->arena, tcache->prof_accumbytes))
- prof_idump();
-
- tcache_size = arena_salloc(tcache, false);
- if (tcache_size <= SMALL_MAXCLASS) {
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
- arena_t *arena = chunk->arena;
- size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
- LG_PAGE;
- arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
-
- arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
- } else if (tcache_size <= tcache_maxclass) {
- arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
- arena_t *arena = chunk->arena;
-
- arena_dalloc_large(arena, chunk, tcache);
- } else
- idalloct(tcache, false);
+ arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
+ tcache->prof_accumbytes)) {
+ prof_idump(tsd_tsdn(tsd));
+ }
}
void
-tcache_thread_cleanup(void *arg)
-{
- tcache_t *tcache = *(tcache_t **)arg;
+tcache_flush(tsd_t *tsd) {
+ assert(tcache_available(tsd));
+ tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
+}
- if (tcache == TCACHE_STATE_DISABLED) {
- /* Do nothing. */
- } else if (tcache == TCACHE_STATE_REINCARNATED) {
- /*
- * Another destructor called an allocator function after this
- * destructor was called. Reset tcache to
- * TCACHE_STATE_PURGATORY in order to receive another callback.
- */
- tcache = TCACHE_STATE_PURGATORY;
- tcache_tsd_set(&tcache);
- } else if (tcache == TCACHE_STATE_PURGATORY) {
- /*
- * The previous time this destructor was called, we set the key
- * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
- * cause re-creation of the tcache. This time, do nothing, so
- * that the destructor will not be called again.
- */
- } else if (tcache != NULL) {
- assert(tcache != TCACHE_STATE_PURGATORY);
- tcache_destroy(tcache);
- tcache = TCACHE_STATE_PURGATORY;
- tcache_tsd_set(&tcache);
+static void
+tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
+ tcache_flush_cache(tsd, tcache);
+ tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+
+ if (tsd_tcache) {
+ /* Release the avail array for the TSD embedded auto tcache. */
+ void *avail_array =
+ (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
+ (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
+ idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
+ } else {
+ /* Release both the tcache struct and avail array. */
+ idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
}
}
-/* Caller must own arena->lock. */
+/* For auto tcache (embedded in TSD) only. */
void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
-{
+tcache_cleanup(tsd_t *tsd) {
+ tcache_t *tcache = tsd_tcachep_get(tsd);
+ if (!tcache_available(tsd)) {
+ assert(tsd_tcache_enabled_get(tsd) == false);
+ if (config_debug) {
+ assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+ }
+ return;
+ }
+ assert(tsd_tcache_enabled_get(tsd));
+ assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
+
+ tcache_destroy(tsd, tcache, true);
+ if (config_debug) {
+ tcache_small_bin_get(tcache, 0)->avail = NULL;
+ }
+}
+
+void
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
unsigned i;
cassert(config_stats);
/* Merge and reset tcache stats. */
for (i = 0; i < NBINS; i++) {
- arena_bin_t *bin = &arena->bins[i];
- tcache_bin_t *tbin = &tcache->tbins[i];
- malloc_mutex_lock(&bin->lock);
+ bin_t *bin = &arena->bins[i];
+ cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+ malloc_mutex_lock(tsdn, &bin->lock);
bin->stats.nrequests += tbin->tstats.nrequests;
- malloc_mutex_unlock(&bin->lock);
+ malloc_mutex_unlock(tsdn, &bin->lock);
tbin->tstats.nrequests = 0;
}
for (; i < nhbins; i++) {
- malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
- tcache_bin_t *tbin = &tcache->tbins[i];
- arena->stats.nrequests_large += tbin->tstats.nrequests;
- lstats->nrequests += tbin->tstats.nrequests;
+ cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+ arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
+ tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
}
}
+static bool
+tcaches_create_prep(tsd_t *tsd) {
+ bool err;
+
+ malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+
+ if (tcaches == NULL) {
+ tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
+ * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+ if (tcaches == NULL) {
+ err = true;
+ goto label_return;
+ }
+ }
+
+ if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
+ err = true;
+ goto label_return;
+ }
+
+ err = false;
+label_return:
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+ return err;
+}
+
bool
-tcache_boot0(void)
-{
- unsigned i;
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+ witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
- /*
- * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
- * known.
- */
- if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+ bool err;
+
+ if (tcaches_create_prep(tsd)) {
+ err = true;
+ goto label_return;
+ }
+
+ tcache_t *tcache = tcache_create_explicit(tsd);
+ if (tcache == NULL) {
+ err = true;
+ goto label_return;
+ }
+
+ tcaches_t *elm;
+ malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+ if (tcaches_avail != NULL) {
+ elm = tcaches_avail;
+ tcaches_avail = tcaches_avail->next;
+ elm->tcache = tcache;
+ *r_ind = (unsigned)(elm - tcaches);
+ } else {
+ elm = &tcaches[tcaches_past];
+ elm->tcache = tcache;
+ *r_ind = tcaches_past;
+ tcaches_past++;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+
+ err = false;
+label_return:
+ witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
+ return err;
+}
+
+static tcache_t *
+tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
+ malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
+
+ if (elm->tcache == NULL) {
+ return NULL;
+ }
+ tcache_t *tcache = elm->tcache;
+ elm->tcache = NULL;
+ return tcache;
+}
+
+void
+tcaches_flush(tsd_t *tsd, unsigned ind) {
+ malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+ tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+ if (tcache != NULL) {
+ tcache_destroy(tsd, tcache, false);
+ }
+}
+
+void
+tcaches_destroy(tsd_t *tsd, unsigned ind) {
+ malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+ tcaches_t *elm = &tcaches[ind];
+ tcache_t *tcache = tcaches_elm_remove(tsd, elm);
+ elm->next = tcaches_avail;
+ tcaches_avail = elm;
+ malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+ if (tcache != NULL) {
+ tcache_destroy(tsd, tcache, false);
+ }
+}
+
+bool
+tcache_boot(tsdn_t *tsdn) {
+ /* If necessary, clamp opt_lg_tcache_max. */
+ if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
+ SMALL_MAXCLASS) {
tcache_maxclass = SMALL_MAXCLASS;
- else if ((1U << opt_lg_tcache_max) > arena_maxclass)
- tcache_maxclass = arena_maxclass;
- else
- tcache_maxclass = (1U << opt_lg_tcache_max);
+ } else {
+ tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
+ }
- nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
+ if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
+ malloc_mutex_rank_exclusive)) {
+ return true;
+ }
+
+ nhbins = sz_size2index(tcache_maxclass) + 1;
/* Initialize tcache_bin_info. */
- tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
- sizeof(tcache_bin_info_t));
- if (tcache_bin_info == NULL)
- return (true);
+ tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
+ * sizeof(cache_bin_info_t), CACHELINE);
+ if (tcache_bin_info == NULL) {
+ return true;
+ }
stack_nelms = 0;
+ unsigned i;
for (i = 0; i < NBINS; i++) {
- if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+ if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+ tcache_bin_info[i].ncached_max =
+ TCACHE_NSLOTS_SMALL_MIN;
+ } else if ((bin_infos[i].nregs << 1) <=
+ TCACHE_NSLOTS_SMALL_MAX) {
tcache_bin_info[i].ncached_max =
- (arena_bin_info[i].nregs << 1);
+ (bin_infos[i].nregs << 1);
} else {
tcache_bin_info[i].ncached_max =
TCACHE_NSLOTS_SMALL_MAX;
@@ -465,15 +692,26 @@ tcache_boot0(void)
stack_nelms += tcache_bin_info[i].ncached_max;
}
- return (false);
+ return false;
}
-bool
-tcache_boot1(void)
-{
+void
+tcache_prefork(tsdn_t *tsdn) {
+ if (!config_prof && opt_tcache) {
+ malloc_mutex_prefork(tsdn, &tcaches_mtx);
+ }
+}
- if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
- return (true);
+void
+tcache_postfork_parent(tsdn_t *tsdn) {
+ if (!config_prof && opt_tcache) {
+ malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
+ }
+}
- return (false);
+void
+tcache_postfork_child(tsdn_t *tsdn) {
+ if (!config_prof && opt_tcache) {
+ malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
+ }
}
diff --git a/deps/jemalloc/src/ticker.c b/deps/jemalloc/src/ticker.c
new file mode 100644
index 000000000..d7b8cd26c
--- /dev/null
+++ b/deps/jemalloc/src/ticker.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_TICKER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c
index 700caabfe..c1430682d 100644
--- a/deps/jemalloc/src/tsd.c
+++ b/deps/jemalloc/src/tsd.c
@@ -1,5 +1,10 @@
-#define JEMALLOC_TSD_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_TSD_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
/******************************************************************************/
/* Data. */
@@ -7,28 +12,158 @@
static unsigned ncleanups;
static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
+bool tsd_booted = false;
+#elif (defined(JEMALLOC_TLS))
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+pthread_key_t tsd_tsd;
+bool tsd_booted = false;
+#elif (defined(_WIN32))
+DWORD tsd_tsd;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool tsd_booted = false;
+#else
+
+/*
+ * This contains a mutex, but it's pretty convenient to allow the mutex code to
+ * have a dependency on tsd. So we define the struct here, and only refer to it
+ * by pointer in the header.
+ */
+struct tsd_init_head_s {
+ ql_head(tsd_init_block_t) blocks;
+ malloc_mutex_t lock;
+};
+
+pthread_key_t tsd_tsd;
+tsd_init_head_t tsd_init_head = {
+ ql_head_initializer(blocks),
+ MALLOC_MUTEX_INITIALIZER
+};
+tsd_wrapper_t tsd_boot_wrapper = {
+ false,
+ TSD_INITIALIZER
+};
+bool tsd_booted = false;
+#endif
+
+
/******************************************************************************/
-void *
-malloc_tsd_malloc(size_t size)
-{
+void
+tsd_slow_update(tsd_t *tsd) {
+ if (tsd_nominal(tsd)) {
+ if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+ tsd_reentrancy_level_get(tsd) > 0) {
+ tsd->state = tsd_state_nominal_slow;
+ } else {
+ tsd->state = tsd_state_nominal;
+ }
+ }
+}
+
+static bool
+tsd_data_init(tsd_t *tsd) {
+ /*
+ * We initialize the rtree context first (before the tcache), since the
+ * tcache initialization depends on it.
+ */
+ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+
+ /*
+ * A nondeterministic seed based on the address of tsd reduces
+ * the likelihood of lockstep non-uniform cache index
+ * utilization among identical concurrent processes, but at the
+ * cost of test repeatability. For debug builds, instead use a
+ * deterministic seed.
+ */
+ *tsd_offset_statep_get(tsd) = config_debug ? 0 :
+ (uint64_t)(uintptr_t)tsd;
- /* Avoid choose_arena() in order to dodge bootstrapping issues. */
- return (arena_malloc(arenas[0], size, false, false));
+ return tsd_tcache_enabled_data_init(tsd);
}
-void
-malloc_tsd_dalloc(void *wrapper)
-{
+static void
+assert_tsd_data_cleanup_done(tsd_t *tsd) {
+ assert(!tsd_nominal(tsd));
+ assert(*tsd_arenap_get_unsafe(tsd) == NULL);
+ assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
+ assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
+ assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
+ assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
+ assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
+}
+
+static bool
+tsd_data_init_nocleanup(tsd_t *tsd) {
+ assert(tsd->state == tsd_state_reincarnated ||
+ tsd->state == tsd_state_minimal_initialized);
+ /*
+ * During reincarnation, there is no guarantee that the cleanup function
+ * will be called (deallocation may happen after all tsd destructors).
+ * We set up tsd in a way that no cleanup is needed.
+ */
+ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+ *tsd_arenas_tdata_bypassp_get(tsd) = true;
+ *tsd_tcache_enabledp_get_unsafe(tsd) = false;
+ *tsd_reentrancy_levelp_get(tsd) = 1;
+ assert_tsd_data_cleanup_done(tsd);
- idalloct(wrapper, false);
+ return false;
}
-void
-malloc_tsd_no_cleanup(void *arg)
-{
+tsd_t *
+tsd_fetch_slow(tsd_t *tsd, bool minimal) {
+ assert(!tsd_fast(tsd));
+
+ if (tsd->state == tsd_state_nominal_slow) {
+ /* On slow path but no work needed. */
+ assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+ tsd_reentrancy_level_get(tsd) > 0 ||
+ *tsd_arenas_tdata_bypassp_get(tsd));
+ } else if (tsd->state == tsd_state_uninitialized) {
+ if (!minimal) {
+ tsd->state = tsd_state_nominal;
+ tsd_slow_update(tsd);
+ /* Trigger cleanup handler registration. */
+ tsd_set(tsd);
+ tsd_data_init(tsd);
+ } else {
+ tsd->state = tsd_state_minimal_initialized;
+ tsd_set(tsd);
+ tsd_data_init_nocleanup(tsd);
+ }
+ } else if (tsd->state == tsd_state_minimal_initialized) {
+ if (!minimal) {
+ /* Switch to fully initialized. */
+ tsd->state = tsd_state_nominal;
+ assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
+ (*tsd_reentrancy_levelp_get(tsd))--;
+ tsd_slow_update(tsd);
+ tsd_data_init(tsd);
+ } else {
+ assert_tsd_data_cleanup_done(tsd);
+ }
+ } else if (tsd->state == tsd_state_purgatory) {
+ tsd->state = tsd_state_reincarnated;
+ tsd_set(tsd);
+ tsd_data_init_nocleanup(tsd);
+ } else {
+ assert(tsd->state == tsd_state_reincarnated);
+ }
+
+ return tsd;
+}
- not_reached();
+void *
+malloc_tsd_malloc(size_t size) {
+ return a0malloc(CACHELINE_CEILING(size));
+}
+
+void
+malloc_tsd_dalloc(void *wrapper) {
+ a0dalloc(wrapper);
}
#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
@@ -36,21 +171,22 @@ malloc_tsd_no_cleanup(void *arg)
JEMALLOC_EXPORT
#endif
void
-_malloc_thread_cleanup(void)
-{
+_malloc_thread_cleanup(void) {
bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
unsigned i;
- for (i = 0; i < ncleanups; i++)
+ for (i = 0; i < ncleanups; i++) {
pending[i] = true;
+ }
do {
again = false;
for (i = 0; i < ncleanups; i++) {
if (pending[i]) {
pending[i] = cleanups[i]();
- if (pending[i])
+ if (pending[i]) {
again = true;
+ }
}
}
} while (again);
@@ -58,26 +194,92 @@ _malloc_thread_cleanup(void)
#endif
void
-malloc_tsd_cleanup_register(bool (*f)(void))
-{
-
+malloc_tsd_cleanup_register(bool (*f)(void)) {
assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
cleanups[ncleanups] = f;
ncleanups++;
}
+static void
+tsd_do_data_cleanup(tsd_t *tsd) {
+ prof_tdata_cleanup(tsd);
+ iarena_cleanup(tsd);
+ arena_cleanup(tsd);
+ arenas_tdata_cleanup(tsd);
+ tcache_cleanup(tsd);
+ witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
+}
+
void
-malloc_tsd_boot(void)
-{
+tsd_cleanup(void *arg) {
+ tsd_t *tsd = (tsd_t *)arg;
+
+ switch (tsd->state) {
+ case tsd_state_uninitialized:
+ /* Do nothing. */
+ break;
+ case tsd_state_minimal_initialized:
+ /* This implies the thread only did free() in its life time. */
+ /* Fall through. */
+ case tsd_state_reincarnated:
+ /*
+ * Reincarnated means another destructor deallocated memory
+ * after the destructor was called. Cleanup isn't required but
+ * is still called for testing and completeness.
+ */
+ assert_tsd_data_cleanup_done(tsd);
+ /* Fall through. */
+ case tsd_state_nominal:
+ case tsd_state_nominal_slow:
+ tsd_do_data_cleanup(tsd);
+ tsd->state = tsd_state_purgatory;
+ tsd_set(tsd);
+ break;
+ case tsd_state_purgatory:
+ /*
+ * The previous time this destructor was called, we set the
+ * state to tsd_state_purgatory so that other destructors
+ * wouldn't cause re-creation of the tsd. This time, do
+ * nothing, and do not request another callback.
+ */
+ break;
+ default:
+ not_reached();
+ }
+#ifdef JEMALLOC_JET
+ test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
+ int *data = tsd_test_datap_get_unsafe(tsd);
+ if (test_callback != NULL) {
+ test_callback(data);
+ }
+#endif
+}
+
+tsd_t *
+malloc_tsd_boot0(void) {
+ tsd_t *tsd;
ncleanups = 0;
+ if (tsd_boot0()) {
+ return NULL;
+ }
+ tsd = tsd_fetch();
+ *tsd_arenas_tdata_bypassp_get(tsd) = true;
+ return tsd;
+}
+
+void
+malloc_tsd_boot1(void) {
+ tsd_boot1();
+ tsd_t *tsd = tsd_fetch();
+ /* malloc_slow has been set properly. Update tsd_slow. */
+ tsd_slow_update(tsd);
+ *tsd_arenas_tdata_bypassp_get(tsd) = false;
}
#ifdef _WIN32
static BOOL WINAPI
-_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
-{
-
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
switch (fdwReason) {
#ifdef JEMALLOC_LAZY_LOCK
case DLL_THREAD_ATTACH:
@@ -90,52 +292,60 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
default:
break;
}
- return (true);
+ return true;
}
+/*
+ * We need to be able to say "read" here (in the "pragma section"), but have
+ * hooked "read". We won't read for the rest of the file, so we can get away
+ * with unhooking.
+ */
+#ifdef read
+# undef read
+#endif
+
#ifdef _MSC_VER
# ifdef _M_IX86
# pragma comment(linker, "/INCLUDE:__tls_used")
+# pragma comment(linker, "/INCLUDE:_tls_callback")
# else
# pragma comment(linker, "/INCLUDE:_tls_used")
+# pragma comment(linker, "/INCLUDE:tls_callback")
# endif
# pragma section(".CRT$XLY",long,read)
#endif
JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-static const BOOL (WINAPI *tls_callback)(HINSTANCE hinstDLL,
+BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
#endif
#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
!defined(_WIN32))
void *
-tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
-{
+tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
pthread_t self = pthread_self();
tsd_init_block_t *iter;
/* Check whether this thread has already inserted into the list. */
- malloc_mutex_lock(&head->lock);
+ malloc_mutex_lock(TSDN_NULL, &head->lock);
ql_foreach(iter, &head->blocks, link) {
if (iter->thread == self) {
- malloc_mutex_unlock(&head->lock);
- return (iter->data);
+ malloc_mutex_unlock(TSDN_NULL, &head->lock);
+ return iter->data;
}
}
/* Insert block into list. */
ql_elm_new(block, link);
block->thread = self;
ql_tail_insert(&head->blocks, block, link);
- malloc_mutex_unlock(&head->lock);
- return (NULL);
+ malloc_mutex_unlock(TSDN_NULL, &head->lock);
+ return NULL;
}
void
-tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
-{
-
- malloc_mutex_lock(&head->lock);
+tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
+ malloc_mutex_lock(TSDN_NULL, &head->lock);
ql_remove(&head->blocks, block, link);
- malloc_mutex_unlock(&head->lock);
+ malloc_mutex_unlock(TSDN_NULL, &head->lock);
}
#endif
diff --git a/deps/jemalloc/src/witness.c b/deps/jemalloc/src/witness.c
new file mode 100644
index 000000000..f42b72ad1
--- /dev/null
+++ b/deps/jemalloc/src/witness.c
@@ -0,0 +1,100 @@
+#define JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+ witness_comp_t *comp, void *opaque) {
+ witness->name = name;
+ witness->rank = rank;
+ witness->comp = comp;
+ witness->opaque = opaque;
+}
+
+static void
+witness_lock_error_impl(const witness_list_t *witnesses,
+ const witness_t *witness) {
+ witness_t *w;
+
+ malloc_printf("<jemalloc>: Lock rank order reversal:");
+ ql_foreach(w, witnesses, link) {
+ malloc_printf(" %s(%u)", w->name, w->rank);
+ }
+ malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+ abort();
+}
+witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
+
+static void
+witness_owner_error_impl(const witness_t *witness) {
+ malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+ witness->rank);
+ abort();
+}
+witness_owner_error_t *JET_MUTABLE witness_owner_error =
+ witness_owner_error_impl;
+
+static void
+witness_not_owner_error_impl(const witness_t *witness) {
+ malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+ witness->rank);
+ abort();
+}
+witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error =
+ witness_not_owner_error_impl;
+
+static void
+witness_depth_error_impl(const witness_list_t *witnesses,
+ witness_rank_t rank_inclusive, unsigned depth) {
+ witness_t *w;
+
+ malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
+ (depth != 1) ? "s" : "", rank_inclusive);
+ ql_foreach(w, witnesses, link) {
+ malloc_printf(" %s(%u)", w->name, w->rank);
+ }
+ malloc_printf("\n");
+ abort();
+}
+witness_depth_error_t *JET_MUTABLE witness_depth_error =
+ witness_depth_error_impl;
+
+void
+witnesses_cleanup(witness_tsd_t *witness_tsd) {
+ witness_assert_lockless(witness_tsd_tsdn(witness_tsd));
+
+ /* Do nothing. */
+}
+
+void
+witness_prefork(witness_tsd_t *witness_tsd) {
+ if (!config_debug) {
+ return;
+ }
+ witness_tsd->forking = true;
+}
+
+void
+witness_postfork_parent(witness_tsd_t *witness_tsd) {
+ if (!config_debug) {
+ return;
+ }
+ witness_tsd->forking = false;
+}
+
+void
+witness_postfork_child(witness_tsd_t *witness_tsd) {
+ if (!config_debug) {
+ return;
+ }
+#ifndef JEMALLOC_MUTEX_INIT_CB
+ witness_list_t *witnesses;
+
+ witnesses = &witness_tsd->witnesses;
+ ql_new(witnesses);
+#endif
+ witness_tsd->forking = false;
+}
diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c
index e0302ef4e..23dfdd04a 100644
--- a/deps/jemalloc/src/zone.c
+++ b/deps/jemalloc/src/zone.c
@@ -1,10 +1,83 @@
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
#ifndef JEMALLOC_ZONE
# error "This source file is for zones on Darwin (OS X)."
#endif
+/* Definitions of the following structs in malloc/malloc.h might be too old
+ * for the built binary to run on newer versions of OSX. So use the newest
+ * possible version of those structs.
+ */
+typedef struct _malloc_zone_t {
+ void *reserved1;
+ void *reserved2;
+ size_t (*size)(struct _malloc_zone_t *, const void *);
+ void *(*malloc)(struct _malloc_zone_t *, size_t);
+ void *(*calloc)(struct _malloc_zone_t *, size_t, size_t);
+ void *(*valloc)(struct _malloc_zone_t *, size_t);
+ void (*free)(struct _malloc_zone_t *, void *);
+ void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
+ void (*destroy)(struct _malloc_zone_t *);
+ const char *zone_name;
+ unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+ void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
+ struct malloc_introspection_t *introspect;
+ unsigned version;
+ void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
+ void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
+ size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
+} malloc_zone_t;
+
+typedef struct {
+ vm_address_t address;
+ vm_size_t size;
+} vm_range_t;
+
+typedef struct malloc_statistics_t {
+ unsigned blocks_in_use;
+ size_t size_in_use;
+ size_t max_size_in_use;
+ size_t size_allocated;
+} malloc_statistics_t;
+
+typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
+
+typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+
+typedef struct malloc_introspection_t {
+ kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+ size_t (*good_size)(malloc_zone_t *, size_t);
+ boolean_t (*check)(malloc_zone_t *);
+ void (*print)(malloc_zone_t *, boolean_t);
+ void (*log)(malloc_zone_t *, void *);
+ void (*force_lock)(malloc_zone_t *);
+ void (*force_unlock)(malloc_zone_t *);
+ void (*statistics)(malloc_zone_t *, malloc_statistics_t *);
+ boolean_t (*zone_locked)(malloc_zone_t *);
+ boolean_t (*enable_discharge_checking)(malloc_zone_t *);
+ boolean_t (*disable_discharge_checking)(malloc_zone_t *);
+ void (*discharge)(malloc_zone_t *, void *);
+#ifdef __BLOCKS__
+ void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+#else
+ void *enumerate_unavailable_without_blocks;
+#endif
+ void (*reinit_lock)(malloc_zone_t *);
+} malloc_introspection_t;
+
+extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+
+extern malloc_zone_t *malloc_default_zone(void);
+
+extern void malloc_zone_register(malloc_zone_t *zone);
+
+extern void malloc_zone_unregister(malloc_zone_t *zone);
+
/*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
* We need to check whether it is present at runtime, thus the weak_import.
*/
extern malloc_zone_t *malloc_default_purgeable_zone(void)
@@ -13,30 +86,43 @@ JEMALLOC_ATTR(weak_import);
/******************************************************************************/
/* Data. */
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;
+static pid_t zone_force_lock_pid = -1;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
-static size_t zone_size(malloc_zone_t *zone, void *ptr);
+static size_t zone_size(malloc_zone_t *zone, const void *ptr);
static void *zone_malloc(malloc_zone_t *zone, size_t size);
static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
static void *zone_valloc(malloc_zone_t *zone, size_t size);
static void zone_free(malloc_zone_t *zone, void *ptr);
static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-#if (JEMALLOC_ZONE_VERSION >= 5)
static void *zone_memalign(malloc_zone_t *zone, size_t alignment,
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
size_t size);
static void zone_free_definite_size(malloc_zone_t *zone, void *ptr,
size_t size);
-#endif
-static void *zone_destroy(malloc_zone_t *zone);
+static void zone_destroy(malloc_zone_t *zone);
+static unsigned zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+ void **results, unsigned num_requested);
+static void zone_batch_free(struct _malloc_zone_t *zone,
+ void **to_be_freed, unsigned num_to_be_freed);
+static size_t zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t zone_enumerator(task_t task, void *data, unsigned type_mask,
+ vm_address_t zone_address, memory_reader_t reader,
+ vm_range_recorder_t recorder);
+static boolean_t zone_check(malloc_zone_t *zone);
+static void zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void zone_log(malloc_zone_t *zone, void *address);
static void zone_force_lock(malloc_zone_t *zone);
static void zone_force_unlock(malloc_zone_t *zone);
+static void zone_statistics(malloc_zone_t *zone,
+ malloc_statistics_t *stats);
+static boolean_t zone_locked(malloc_zone_t *zone);
+static void zone_reinit_lock(malloc_zone_t *zone);
/******************************************************************************/
/*
@@ -44,9 +130,7 @@ static void zone_force_unlock(malloc_zone_t *zone);
*/
static size_t
-zone_size(malloc_zone_t *zone, void *ptr)
-{
-
+zone_size(malloc_zone_t *zone, const void *ptr) {
/*
* There appear to be places within Darwin (such as setenv(3)) that
* cause calls to this function with pointers that *no* zone owns. If
@@ -54,40 +138,33 @@ zone_size(malloc_zone_t *zone, void *ptr)
* our zone into two parts, and use one as the default allocator and
* the other as the default deallocator/reallocator. Since that will
* not work in practice, we must check all pointers to assure that they
- * reside within a mapped chunk before determining size.
+ * reside within a mapped extent before determining size.
*/
- return (ivsalloc(ptr, config_prof));
+ return ivsalloc(tsdn_fetch(), ptr);
}
static void *
-zone_malloc(malloc_zone_t *zone, size_t size)
-{
-
- return (je_malloc(size));
+zone_malloc(malloc_zone_t *zone, size_t size) {
+ return je_malloc(size);
}
static void *
-zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
-{
-
- return (je_calloc(num, size));
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size) {
+ return je_calloc(num, size);
}
static void *
-zone_valloc(malloc_zone_t *zone, size_t size)
-{
+zone_valloc(malloc_zone_t *zone, size_t size) {
void *ret = NULL; /* Assignment avoids useless compiler warning. */
je_posix_memalign(&ret, PAGE, size);
- return (ret);
+ return ret;
}
static void
-zone_free(malloc_zone_t *zone, void *ptr)
-{
-
- if (ivsalloc(ptr, config_prof) != 0) {
+zone_free(malloc_zone_t *zone, void *ptr) {
+ if (ivsalloc(tsdn_fetch(), ptr) != 0) {
je_free(ptr);
return;
}
@@ -96,163 +173,297 @@ zone_free(malloc_zone_t *zone, void *ptr)
}
static void *
-zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
-{
-
- if (ivsalloc(ptr, config_prof) != 0)
- return (je_realloc(ptr, size));
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
+ if (ivsalloc(tsdn_fetch(), ptr) != 0) {
+ return je_realloc(ptr, size);
+ }
- return (realloc(ptr, size));
+ return realloc(ptr, size);
}
-#if (JEMALLOC_ZONE_VERSION >= 5)
static void *
-zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
-{
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) {
void *ret = NULL; /* Assignment avoids useless compiler warning. */
je_posix_memalign(&ret, alignment, size);
- return (ret);
+ return ret;
}
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
static void
-zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
-{
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) {
+ size_t alloc_size;
- if (ivsalloc(ptr, config_prof) != 0) {
- assert(ivsalloc(ptr, config_prof) == size);
+ alloc_size = ivsalloc(tsdn_fetch(), ptr);
+ if (alloc_size != 0) {
+ assert(alloc_size == size);
je_free(ptr);
return;
}
free(ptr);
}
-#endif
-
-static void *
-zone_destroy(malloc_zone_t *zone)
-{
+static void
+zone_destroy(malloc_zone_t *zone) {
/* This function should never be called. */
not_reached();
- return (NULL);
+}
+
+static unsigned
+zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
+ unsigned num_requested) {
+ unsigned i;
+
+ for (i = 0; i < num_requested; i++) {
+ results[i] = je_malloc(size);
+ if (!results[i])
+ break;
+ }
+
+ return i;
+}
+
+static void
+zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
+ unsigned num_to_be_freed) {
+ unsigned i;
+
+ for (i = 0; i < num_to_be_freed; i++) {
+ zone_free(zone, to_be_freed[i]);
+ to_be_freed[i] = NULL;
+ }
}
static size_t
-zone_good_size(malloc_zone_t *zone, size_t size)
-{
+zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal) {
+ return 0;
+}
- if (size == 0)
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size) {
+ if (size == 0) {
size = 1;
- return (s2u(size));
+ }
+ return sz_s2u(size);
+}
+
+static kern_return_t
+zone_enumerator(task_t task, void *data, unsigned type_mask,
+ vm_address_t zone_address, memory_reader_t reader,
+ vm_range_recorder_t recorder) {
+ return KERN_SUCCESS;
+}
+
+static boolean_t
+zone_check(malloc_zone_t *zone) {
+ return true;
+}
+
+static void
+zone_print(malloc_zone_t *zone, boolean_t verbose) {
}
static void
-zone_force_lock(malloc_zone_t *zone)
-{
+zone_log(malloc_zone_t *zone, void *address) {
+}
- if (isthreaded)
+static void
+zone_force_lock(malloc_zone_t *zone) {
+ if (isthreaded) {
+ /*
+ * See the note in zone_force_unlock, below, to see why we need
+ * this.
+ */
+ assert(zone_force_lock_pid == -1);
+ zone_force_lock_pid = getpid();
jemalloc_prefork();
+ }
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone) {
+ /*
+ * zone_force_lock and zone_force_unlock are the entry points to the
+ * forking machinery on OS X. The tricky thing is, the child is not
+ * allowed to unlock mutexes locked in the parent, even if owned by the
+ * forking thread (and the mutex type we use in OS X will fail an assert
+ * if we try). In the child, we can get away with reinitializing all
+ * the mutexes, which has the effect of unlocking them. In the parent,
+ * doing this would mean we wouldn't wake any waiters blocked on the
+ * mutexes we unlock. So, we record the pid of the current thread in
+ * zone_force_lock, and use that to detect if we're in the parent or
+ * child here, to decide which unlock logic we need.
+ */
+ if (isthreaded) {
+ assert(zone_force_lock_pid != -1);
+ if (getpid() == zone_force_lock_pid) {
+ jemalloc_postfork_parent();
+ } else {
+ jemalloc_postfork_child();
+ }
+ zone_force_lock_pid = -1;
+ }
+}
+
+static void
+zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
+ /* We make no effort to actually fill the values */
+ stats->blocks_in_use = 0;
+ stats->size_in_use = 0;
+ stats->max_size_in_use = 0;
+ stats->size_allocated = 0;
+}
+
+static boolean_t
+zone_locked(malloc_zone_t *zone) {
+ /* Pretend no lock is being held */
+ return false;
+}
+
+static void
+zone_reinit_lock(malloc_zone_t *zone) {
+ /* As of OSX 10.12, this function is only used when force_unlock would
+ * be used if the zone version were < 9. So just use force_unlock. */
+ zone_force_unlock(zone);
+}
+
+static void
+zone_init(void) {
+ jemalloc_zone.size = zone_size;
+ jemalloc_zone.malloc = zone_malloc;
+ jemalloc_zone.calloc = zone_calloc;
+ jemalloc_zone.valloc = zone_valloc;
+ jemalloc_zone.free = zone_free;
+ jemalloc_zone.realloc = zone_realloc;
+ jemalloc_zone.destroy = zone_destroy;
+ jemalloc_zone.zone_name = "jemalloc_zone";
+ jemalloc_zone.batch_malloc = zone_batch_malloc;
+ jemalloc_zone.batch_free = zone_batch_free;
+ jemalloc_zone.introspect = &jemalloc_zone_introspect;
+ jemalloc_zone.version = 9;
+ jemalloc_zone.memalign = zone_memalign;
+ jemalloc_zone.free_definite_size = zone_free_definite_size;
+ jemalloc_zone.pressure_relief = zone_pressure_relief;
+
+ jemalloc_zone_introspect.enumerator = zone_enumerator;
+ jemalloc_zone_introspect.good_size = zone_good_size;
+ jemalloc_zone_introspect.check = zone_check;
+ jemalloc_zone_introspect.print = zone_print;
+ jemalloc_zone_introspect.log = zone_log;
+ jemalloc_zone_introspect.force_lock = zone_force_lock;
+ jemalloc_zone_introspect.force_unlock = zone_force_unlock;
+ jemalloc_zone_introspect.statistics = zone_statistics;
+ jemalloc_zone_introspect.zone_locked = zone_locked;
+ jemalloc_zone_introspect.enable_discharge_checking = NULL;
+ jemalloc_zone_introspect.disable_discharge_checking = NULL;
+ jemalloc_zone_introspect.discharge = NULL;
+#ifdef __BLOCKS__
+ jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#else
+ jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#endif
+ jemalloc_zone_introspect.reinit_lock = zone_reinit_lock;
+}
+
+static malloc_zone_t *
+zone_default_get(void) {
+ malloc_zone_t **zones = NULL;
+ unsigned int num_zones = 0;
+
+ /*
+ * On OSX 10.12, malloc_default_zone returns a special zone that is not
+ * present in the list of registered zones. That zone uses a "lite zone"
+ * if one is present (apparently enabled when malloc stack logging is
+ * enabled), or the first registered zone otherwise. In practice this
+ * means unless malloc stack logging is enabled, the first registered
+ * zone is the default. So get the list of zones to get the first one,
+ * instead of relying on malloc_default_zone.
+ */
+ if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+ (vm_address_t**)&zones, &num_zones)) {
+ /*
+ * Reset the value in case the failure happened after it was
+ * set.
+ */
+ num_zones = 0;
+ }
+
+ if (num_zones) {
+ return zones[0];
+ }
+
+ return malloc_default_zone();
}
+/* As written, this function can only promote jemalloc_zone. */
static void
-zone_force_unlock(malloc_zone_t *zone)
-{
+zone_promote(void) {
+ malloc_zone_t *zone;
+
+ do {
+ /*
+ * Unregister and reregister the default zone. On OSX >= 10.6,
+ * unregistering takes the last registered zone and places it
+ * at the location of the specified zone. Unregistering the
+ * default zone thus makes the last registered one the default.
+ * On OSX < 10.6, unregistering shifts all registered zones.
+ * The first registered zone then becomes the default.
+ */
+ malloc_zone_unregister(default_zone);
+ malloc_zone_register(default_zone);
- if (isthreaded)
- jemalloc_postfork_parent();
+ /*
+ * On OSX 10.6, having the default purgeable zone appear before
+ * the default zone makes some things crash because it thinks it
+ * owns the default zone allocated pointers. We thus
+ * unregister/re-register it in order to ensure it's always
+ * after the default zone. On OSX < 10.6, there is no purgeable
+ * zone, so this does nothing. On OSX >= 10.6, unregistering
+ * replaces the purgeable zone with the last registered zone
+ * above, i.e. the default zone. Registering it again then puts
+ * it at the end, obviously after the default zone.
+ */
+ if (purgeable_zone != NULL) {
+ malloc_zone_unregister(purgeable_zone);
+ malloc_zone_register(purgeable_zone);
+ }
+
+ zone = zone_default_get();
+ } while (zone != &jemalloc_zone);
}
JEMALLOC_ATTR(constructor)
void
-register_zone(void)
-{
-
+zone_register(void) {
/*
* If something else replaced the system default zone allocator, don't
* register jemalloc's.
*/
- malloc_zone_t *default_zone = malloc_default_zone();
- if (!default_zone->zone_name ||
- strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
+ default_zone = zone_default_get();
+ if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+ "DefaultMallocZone") != 0) {
return;
}
- zone.size = (void *)zone_size;
- zone.malloc = (void *)zone_malloc;
- zone.calloc = (void *)zone_calloc;
- zone.valloc = (void *)zone_valloc;
- zone.free = (void *)zone_free;
- zone.realloc = (void *)zone_realloc;
- zone.destroy = (void *)zone_destroy;
- zone.zone_name = "jemalloc_zone";
- zone.batch_malloc = NULL;
- zone.batch_free = NULL;
- zone.introspect = &zone_introspect;
- zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
- zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
- zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
- zone.pressure_relief = NULL;
-#endif
-
- zone_introspect.enumerator = NULL;
- zone_introspect.good_size = (void *)zone_good_size;
- zone_introspect.check = NULL;
- zone_introspect.print = NULL;
- zone_introspect.log = NULL;
- zone_introspect.force_lock = (void *)zone_force_lock;
- zone_introspect.force_unlock = (void *)zone_force_unlock;
- zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
- zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
- zone_introspect.enable_discharge_checking = NULL;
- zone_introspect.disable_discharge_checking = NULL;
- zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
- zone_introspect.enumerate_discharged_pointers = NULL;
-#else
- zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#endif
-#endif
-
/*
* The default purgeable zone is created lazily by OSX's libc. It uses
* the default zone when it is created for "small" allocations
* (< 15 KiB), but assumes the default zone is a scalable_zone. This
* obviously fails when the default zone is the jemalloc zone, so
- * malloc_default_purgeable_zone is called beforehand so that the
+ * malloc_default_purgeable_zone() is called beforehand so that the
* default purgeable zone is created when the default zone is still
* a scalable_zone. As purgeable zones only exist on >= 10.6, we need
* to check for the existence of malloc_default_purgeable_zone() at
* run time.
*/
- if (malloc_default_purgeable_zone != NULL)
- malloc_default_purgeable_zone();
+ purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+ malloc_default_purgeable_zone();
/* Register the custom zone. At this point it won't be the default. */
- malloc_zone_register(&zone);
+ zone_init();
+ malloc_zone_register(&jemalloc_zone);
- /*
- * Unregister and reregister the default zone. On OSX >= 10.6,
- * unregistering takes the last registered zone and places it at the
- * location of the specified zone. Unregistering the default zone thus
- * makes the last registered one the default. On OSX < 10.6,
- * unregistering shifts all registered zones. The first registered zone
- * then becomes the default.
- */
- do {
- default_zone = malloc_default_zone();
- malloc_zone_unregister(default_zone);
- malloc_zone_register(default_zone);
- } while (malloc_default_zone() != &zone);
+ /* Promote the custom zone to be default. */
+ zone_promote();
}
diff --git a/deps/jemalloc/test/include/test/SFMT-alti.h b/deps/jemalloc/test/include/test/SFMT-alti.h
index 0005df6b4..a1885dbf2 100644
--- a/deps/jemalloc/test/include/test/SFMT-alti.h
+++ b/deps/jemalloc/test/include/test/SFMT-alti.h
@@ -33,8 +33,8 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/**
- * @file SFMT-alti.h
+/**
+ * @file SFMT-alti.h
*
* @brief SIMD oriented Fast Mersenne Twister(SFMT)
* pseudorandom number generator
@@ -95,7 +95,7 @@ vector unsigned int vec_recursion(vector unsigned int a,
* This function fills the internal state array with pseudorandom
* integers.
*/
-JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
int i;
vector unsigned int r, r1, r2;
@@ -119,10 +119,10 @@ JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
* This function fills the user-specified array with pseudorandom
* integers.
*
- * @param array an 128-bit array to be filled by pseudorandom numbers.
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
* @param size number of 128-bit pesudorandom numbers to be generated.
*/
-JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
int i, j;
vector unsigned int r, r1, r2;
@@ -173,7 +173,7 @@ JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
* @param array an 128-bit array to be swaped.
* @param size size of 128-bit array.
*/
-JEMALLOC_INLINE void swap(w128_t *array, int size) {
+static inline void swap(w128_t *array, int size) {
int i;
const vector unsigned char perm = ALTI_SWAP;
diff --git a/deps/jemalloc/test/include/test/SFMT-sse2.h b/deps/jemalloc/test/include/test/SFMT-sse2.h
index 0314a163d..169ad5581 100644
--- a/deps/jemalloc/test/include/test/SFMT-sse2.h
+++ b/deps/jemalloc/test/include/test/SFMT-sse2.h
@@ -33,7 +33,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/**
+/**
* @file SFMT-sse2.h
* @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2
*
@@ -60,10 +60,10 @@
* @param mask 128-bit mask
* @return output
*/
-JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
+JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
__m128i c, __m128i d, __m128i mask) {
__m128i v, x, y, z;
-
+
x = _mm_load_si128(a);
y = _mm_srli_epi32(*b, SR1);
z = _mm_srli_si128(c, SR2);
@@ -81,7 +81,7 @@ JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
* This function fills the internal state array with pseudorandom
* integers.
*/
-JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
int i;
__m128i r, r1, r2, mask;
mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
@@ -108,10 +108,10 @@ JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
* This function fills the user-specified array with pseudorandom
* integers.
*
- * @param array an 128-bit array to be filled by pseudorandom numbers.
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
* @param size number of 128-bit pesudorandom numbers to be generated.
*/
-JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
int i, j;
__m128i r, r1, r2, mask;
mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
diff --git a/deps/jemalloc/test/include/test/SFMT.h b/deps/jemalloc/test/include/test/SFMT.h
index 09c1607dd..863fc55e8 100644
--- a/deps/jemalloc/test/include/test/SFMT.h
+++ b/deps/jemalloc/test/include/test/SFMT.h
@@ -81,91 +81,66 @@ const char *get_idstring(void);
int get_min_array_size32(void);
int get_min_array_size64(void);
-#ifndef JEMALLOC_ENABLE_INLINE
-double to_real1(uint32_t v);
-double genrand_real1(sfmt_t *ctx);
-double to_real2(uint32_t v);
-double genrand_real2(sfmt_t *ctx);
-double to_real3(uint32_t v);
-double genrand_real3(sfmt_t *ctx);
-double to_res53(uint64_t v);
-double to_res53_mix(uint32_t x, uint32_t y);
-double genrand_res53(sfmt_t *ctx);
-double genrand_res53_mix(sfmt_t *ctx);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(SFMT_C_))
/* These real versions are due to Isaku Wada */
/** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double to_real1(uint32_t v)
-{
+static inline double to_real1(uint32_t v) {
return v * (1.0/4294967295.0);
/* divided by 2^32-1 */
}
/** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double genrand_real1(sfmt_t *ctx)
-{
+static inline double genrand_real1(sfmt_t *ctx) {
return to_real1(gen_rand32(ctx));
}
/** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double to_real2(uint32_t v)
-{
+static inline double to_real2(uint32_t v) {
return v * (1.0/4294967296.0);
/* divided by 2^32 */
}
/** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real2(sfmt_t *ctx)
-{
+static inline double genrand_real2(sfmt_t *ctx) {
return to_real2(gen_rand32(ctx));
}
/** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double to_real3(uint32_t v)
-{
+static inline double to_real3(uint32_t v) {
return (((double)v) + 0.5)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real3(sfmt_t *ctx)
-{
+static inline double genrand_real3(sfmt_t *ctx) {
return to_real3(gen_rand32(ctx));
}
/** These real versions are due to Isaku Wada */
/** generates a random number on [0,1) with 53-bit resolution*/
-JEMALLOC_INLINE double to_res53(uint64_t v)
-{
+static inline double to_res53(uint64_t v) {
return v * (1.0/18446744073709551616.0L);
}
/** generates a random number on [0,1) with 53-bit resolution from two
* 32 bit integers */
-JEMALLOC_INLINE double to_res53_mix(uint32_t x, uint32_t y)
-{
+static inline double to_res53_mix(uint32_t x, uint32_t y) {
return to_res53(x | ((uint64_t)y << 32));
}
/** generates a random number on [0,1) with 53-bit resolution
*/
-JEMALLOC_INLINE double genrand_res53(sfmt_t *ctx)
-{
+static inline double genrand_res53(sfmt_t *ctx) {
return to_res53(gen_rand64(ctx));
-}
+}
/** generates a random number on [0,1) with 53-bit resolution
using 32bit integer.
*/
-JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx)
-{
+static inline double genrand_res53_mix(sfmt_t *ctx) {
uint32_t x, y;
x = gen_rand32(ctx);
y = gen_rand32(ctx);
return to_res53_mix(x, y);
-}
-#endif
+}
#endif
diff --git a/deps/jemalloc/test/include/test/btalloc.h b/deps/jemalloc/test/include/test/btalloc.h
new file mode 100644
index 000000000..5877ea77e
--- /dev/null
+++ b/deps/jemalloc/test/include/test/btalloc.h
@@ -0,0 +1,30 @@
+/* btalloc() provides a mechanism for allocating via permuted backtraces. */
+void *btalloc(size_t size, unsigned bits);
+
+#define btalloc_n_proto(n) \
+void *btalloc_##n(size_t size, unsigned bits);
+btalloc_n_proto(0)
+btalloc_n_proto(1)
+
+#define btalloc_n_gen(n) \
+void * \
+btalloc_##n(size_t size, unsigned bits) { \
+ void *p; \
+ \
+ if (bits == 0) { \
+ p = mallocx(size, 0); \
+ } else { \
+ switch (bits & 0x1U) { \
+ case 0: \
+ p = (btalloc_0(size, bits >> 1)); \
+ break; \
+ case 1: \
+ p = (btalloc_1(size, bits >> 1)); \
+ break; \
+ default: not_reached(); \
+ } \
+ } \
+ /* Intentionally sabotage tail call optimization. */ \
+ assert_ptr_not_null(p, "Unexpected mallocx() failure"); \
+ return p; \
+}
diff --git a/deps/jemalloc/test/include/test/extent_hooks.h b/deps/jemalloc/test/include/test/extent_hooks.h
new file mode 100644
index 000000000..1f0620154
--- /dev/null
+++ b/deps/jemalloc/test/include/test/extent_hooks.h
@@ -0,0 +1,289 @@
+/*
+ * Boilerplate code used for testing extent hooks via interception and
+ * passthrough.
+ */
+
+static void *extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+ size_t size, size_t alignment, bool *zero, bool *commit,
+ unsigned arena_ind);
+static bool extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, bool committed, unsigned arena_ind);
+static void extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, bool committed, unsigned arena_ind);
+static bool extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_purge_forced_hook(extent_hooks_t *extent_hooks,
+ void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
+ size_t size, size_t size_a, size_t size_b, bool committed,
+ unsigned arena_ind);
+static bool extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
+ size_t size_a, void *addr_b, size_t size_b, bool committed,
+ unsigned arena_ind);
+
+static extent_hooks_t *default_hooks;
+static extent_hooks_t hooks = {
+ extent_alloc_hook,
+ extent_dalloc_hook,
+ extent_destroy_hook,
+ extent_commit_hook,
+ extent_decommit_hook,
+ extent_purge_lazy_hook,
+ extent_purge_forced_hook,
+ extent_split_hook,
+ extent_merge_hook
+};
+
+/* Control whether hook functions pass calls through to default hooks. */
+static bool try_alloc = true;
+static bool try_dalloc = true;
+static bool try_destroy = true;
+static bool try_commit = true;
+static bool try_decommit = true;
+static bool try_purge_lazy = true;
+static bool try_purge_forced = true;
+static bool try_split = true;
+static bool try_merge = true;
+
+/* Set to false prior to operations, then introspect after operations. */
+static bool called_alloc;
+static bool called_dalloc;
+static bool called_destroy;
+static bool called_commit;
+static bool called_decommit;
+static bool called_purge_lazy;
+static bool called_purge_forced;
+static bool called_split;
+static bool called_merge;
+
+/* Set to false prior to operations, then introspect after operations. */
+static bool did_alloc;
+static bool did_dalloc;
+static bool did_destroy;
+static bool did_commit;
+static bool did_decommit;
+static bool did_purge_lazy;
+static bool did_purge_forced;
+static bool did_split;
+static bool did_merge;
+
+#if 0
+# define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#else
+# define TRACE_HOOK(fmt, ...)
+#endif
+
+static void *
+extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+ void *ret;
+
+ TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+ "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
+ new_addr, size, alignment, *zero ? "true" : "false", *commit ?
+ "true" : "false", arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
+ "Wrong hook function");
+ called_alloc = true;
+ if (!try_alloc) {
+ return NULL;
+ }
+ ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
+ zero, commit, 0);
+ did_alloc = (ret != NULL);
+ return ret;
+}
+
+static bool
+extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ bool committed, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+ "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+ "true" : "false", arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
+ "Wrong hook function");
+ called_dalloc = true;
+ if (!try_dalloc) {
+ return true;
+ }
+ err = default_hooks->dalloc(default_hooks, addr, size, committed, 0);
+ did_dalloc = !err;
+ return err;
+}
+
+static void
+extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ bool committed, unsigned arena_ind) {
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+ "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+ "true" : "false", arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
+ "Wrong hook function");
+ called_destroy = true;
+ if (!try_destroy) {
+ return;
+ }
+ default_hooks->destroy(default_hooks, addr, size, committed, 0);
+ did_destroy = true;
+}
+
+static bool
+extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+ "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+ offset, length, arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->commit, extent_commit_hook,
+ "Wrong hook function");
+ called_commit = true;
+ if (!try_commit) {
+ return true;
+ }
+ err = default_hooks->commit(default_hooks, addr, size, offset, length,
+ 0);
+ did_commit = !err;
+ return err;
+}
+
+static bool
+extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+ "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+ offset, length, arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
+ "Wrong hook function");
+ called_decommit = true;
+ if (!try_decommit) {
+ return true;
+ }
+ err = default_hooks->decommit(default_hooks, addr, size, offset, length,
+ 0);
+ did_decommit = !err;
+ return err;
+}
+
+static bool
+extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+ "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+ offset, length, arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
+ "Wrong hook function");
+ called_purge_lazy = true;
+ if (!try_purge_lazy) {
+ return true;
+ }
+ err = default_hooks->purge_lazy == NULL ||
+ default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
+ 0);
+ did_purge_lazy = !err;
+ return err;
+}
+
+static bool
+extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t offset, size_t length, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+ "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+ offset, length, arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
+ "Wrong hook function");
+ called_purge_forced = true;
+ if (!try_purge_forced) {
+ return true;
+ }
+ err = default_hooks->purge_forced == NULL ||
+ default_hooks->purge_forced(default_hooks, addr, size, offset,
+ length, 0);
+ did_purge_forced = !err;
+ return err;
+}
+
+static bool
+extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+ "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+ addr, size, size_a, size_b, committed ? "true" : "false",
+ arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->split, extent_split_hook,
+ "Wrong hook function");
+ called_split = true;
+ if (!try_split) {
+ return true;
+ }
+ err = (default_hooks->split == NULL ||
+ default_hooks->split(default_hooks, addr, size, size_a, size_b,
+ committed, 0));
+ did_split = !err;
+ return err;
+}
+
+static bool
+extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+ void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+ bool err;
+
+ TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+ "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+ addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
+ arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
+ "Wrong hook function");
+ assert_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
+ "Extents not mergeable");
+ called_merge = true;
+ if (!try_merge) {
+ return true;
+ }
+ err = (default_hooks->merge == NULL ||
+ default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
+ committed, 0));
+ did_merge = !err;
+ return err;
+}
+
+static void
+extent_hooks_prep(void) {
+ size_t sz;
+
+ sz = sizeof(default_hooks);
+ assert_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
+ NULL, 0), 0, "Unexpected mallctl() error");
+}
diff --git a/deps/jemalloc/test/include/test/jemalloc_test.h.in b/deps/jemalloc/test/include/test/jemalloc_test.h.in
index 730a55dba..67caa86bf 100644
--- a/deps/jemalloc/test/include/test/jemalloc_test.h.in
+++ b/deps/jemalloc/test/include/test/jemalloc_test.h.in
@@ -1,50 +1,28 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#ifndef SIZE_T_MAX
+# define SIZE_T_MAX SIZE_MAX
+#endif
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <errno.h>
-#include <inttypes.h>
#include <math.h>
#include <string.h>
+#ifdef _WIN32
+# include "msvc_compat/strings.h"
+#endif
#ifdef _WIN32
# include <windows.h>
+# include "msvc_compat/windows_extra.h"
#else
# include <pthread.h>
#endif
-/******************************************************************************/
-/*
- * Define always-enabled assertion macros, so that test assertions execute even
- * if assertions are disabled in the library code. These definitions must
- * exist prior to including "jemalloc/internal/util.h".
- */
-#define assert(e) do { \
- if (!(e)) { \
- malloc_printf( \
- "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
- __FILE__, __LINE__, #e); \
- abort(); \
- } \
-} while (0)
-
-#define not_reached() do { \
- malloc_printf( \
- "<jemalloc>: %s:%d: Unreachable code reached\n", \
- __FILE__, __LINE__); \
- abort(); \
-} while (0)
-
-#define not_implemented() do { \
- malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \
- __FILE__, __LINE__); \
- abort(); \
-} while (0)
-
-#define assert_not_implemented(e) do { \
- if (!(e)) \
- not_implemented(); \
-} while (0)
-
#include "test/jemalloc_test_defs.h"
#ifdef JEMALLOC_OSSPIN
@@ -65,7 +43,8 @@
#ifdef JEMALLOC_UNIT_TEST
# define JEMALLOC_JET
# define JEMALLOC_MANGLE
-# include "jemalloc/internal/jemalloc_internal.h"
+# include "jemalloc/internal/jemalloc_preamble.h"
+# include "jemalloc/internal/jemalloc_internal_includes.h"
/******************************************************************************/
/*
@@ -73,26 +52,34 @@
* expose the minimum necessary internal utility code (to avoid re-implementing
* essentially identical code within the test infrastructure).
*/
-#elif defined(JEMALLOC_INTEGRATION_TEST)
+#elif defined(JEMALLOC_INTEGRATION_TEST) || \
+ defined(JEMALLOC_INTEGRATION_CPP_TEST)
# define JEMALLOC_MANGLE
# include "jemalloc/jemalloc@install_suffix@.h"
# include "jemalloc/internal/jemalloc_internal_defs.h"
# include "jemalloc/internal/jemalloc_internal_macros.h"
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+ true
+#else
+ false
+#endif
+ ;
+
# define JEMALLOC_N(n) @private_namespace@##n
# include "jemalloc/internal/private_namespace.h"
+# include "jemalloc/internal/hooks.h"
-# define JEMALLOC_H_TYPES
-# define JEMALLOC_H_STRUCTS
-# define JEMALLOC_H_EXTERNS
-# define JEMALLOC_H_INLINES
+/* Hermetic headers. */
+# include "jemalloc/internal/assert.h"
+# include "jemalloc/internal/malloc_io.h"
+# include "jemalloc/internal/nstime.h"
# include "jemalloc/internal/util.h"
+
+/* Non-hermetic headers. */
# include "jemalloc/internal/qr.h"
# include "jemalloc/internal/ql.h"
-# undef JEMALLOC_H_TYPES
-# undef JEMALLOC_H_STRUCTS
-# undef JEMALLOC_H_EXTERNS
-# undef JEMALLOC_H_INLINES
/******************************************************************************/
/*
@@ -107,7 +94,8 @@
# include "jemalloc/jemalloc_protos_jet.h"
# define JEMALLOC_JET
-# include "jemalloc/internal/jemalloc_internal.h"
+# include "jemalloc/internal/jemalloc_preamble.h"
+# include "jemalloc/internal/jemalloc_internal_includes.h"
# include "jemalloc/internal/public_unnamespace.h"
# undef JEMALLOC_JET
@@ -132,10 +120,54 @@
/*
* Common test utilities.
*/
+#include "test/btalloc.h"
#include "test/math.h"
#include "test/mtx.h"
#include "test/mq.h"
#include "test/test.h"
+#include "test/timer.h"
#include "test/thd.h"
-#define MEXP 19937
+#define MEXP 19937
#include "test/SFMT.h"
+
+/******************************************************************************/
+/*
+ * Define always-enabled assertion macros, so that test assertions execute even
+ * if assertions are disabled in the library code.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+
+#define assert(e) do { \
+ if (!(e)) { \
+ malloc_printf( \
+ "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
+ __FILE__, __LINE__, #e); \
+ abort(); \
+ } \
+} while (0)
+
+#define not_reached() do { \
+ malloc_printf( \
+ "<jemalloc>: %s:%d: Unreachable code reached\n", \
+ __FILE__, __LINE__); \
+ abort(); \
+} while (0)
+
+#define not_implemented() do { \
+ malloc_printf("<jemalloc>: %s:%d: Not implemented\n", \
+ __FILE__, __LINE__); \
+ abort(); \
+} while (0)
+
+#define assert_not_implemented(e) do { \
+ if (!(e)) { \
+ not_implemented(); \
+ } \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in b/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
index 18a9773d7..5cc8532a3 100644
--- a/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
+++ b/deps/jemalloc/test/include/test/jemalloc_test_defs.h.in
@@ -1,5 +1,9 @@
#include "jemalloc/internal/jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
-/* For use by SFMT. */
+/*
+ * For use by SFMT. configure.ac doesn't actually define HAVE_SSE2 because its
+ * dependencies are notoriously unportable in practice.
+ */
#undef HAVE_SSE2
#undef HAVE_ALTIVEC
diff --git a/deps/jemalloc/test/include/test/math.h b/deps/jemalloc/test/include/test/math.h
index a862ed7db..efba086dd 100644
--- a/deps/jemalloc/test/include/test/math.h
+++ b/deps/jemalloc/test/include/test/math.h
@@ -1,12 +1,3 @@
-#ifndef JEMALLOC_ENABLE_INLINE
-double ln_gamma(double x);
-double i_gamma(double x, double p, double ln_gamma_p);
-double pt_norm(double p);
-double pt_chi2(double p, double df, double ln_gamma_df_2);
-double pt_gamma(double p, double shape, double scale, double ln_gamma_shape);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MATH_C_))
/*
* Compute the natural log of Gamma(x), accurate to 10 decimal places.
*
@@ -15,9 +6,8 @@ double pt_gamma(double p, double shape, double scale, double ln_gamma_shape);
* Pike, M.C., I.D. Hill (1966) Algorithm 291: Logarithm of Gamma function
* [S14]. Communications of the ACM 9(9):684.
*/
-JEMALLOC_INLINE double
-ln_gamma(double x)
-{
+static inline double
+ln_gamma(double x) {
double f, z;
assert(x > 0.0);
@@ -31,14 +21,15 @@ ln_gamma(double x)
}
x = z;
f = -log(f);
- } else
+ } else {
f = 0.0;
+ }
z = 1.0 / (x * x);
- return (f + (x-0.5) * log(x) - x + 0.918938533204673 +
+ return f + (x-0.5) * log(x) - x + 0.918938533204673 +
(((-0.000595238095238 * z + 0.000793650793651) * z -
- 0.002777777777778) * z + 0.083333333333333) / x);
+ 0.002777777777778) * z + 0.083333333333333) / x;
}
/*
@@ -50,9 +41,8 @@ ln_gamma(double x)
* Bhattacharjee, G.P. (1970) Algorithm AS 32: The incomplete Gamma integral.
* Applied Statistics 19:285-287.
*/
-JEMALLOC_INLINE double
-i_gamma(double x, double p, double ln_gamma_p)
-{
+static inline double
+i_gamma(double x, double p, double ln_gamma_p) {
double acu, factor, oflo, gin, term, rn, a, b, an, dif;
double pn[6];
unsigned i;
@@ -60,8 +50,9 @@ i_gamma(double x, double p, double ln_gamma_p)
assert(p > 0.0);
assert(x >= 0.0);
- if (x == 0.0)
- return (0.0);
+ if (x == 0.0) {
+ return 0.0;
+ }
acu = 1.0e-10;
oflo = 1.0e30;
@@ -80,7 +71,7 @@ i_gamma(double x, double p, double ln_gamma_p)
gin += term;
if (term <= acu) {
gin *= factor / p;
- return (gin);
+ return gin;
}
}
} else {
@@ -99,23 +90,26 @@ i_gamma(double x, double p, double ln_gamma_p)
b += 2.0;
term += 1.0;
an = a * term;
- for (i = 0; i < 2; i++)
+ for (i = 0; i < 2; i++) {
pn[i+4] = b * pn[i+2] - an * pn[i];
+ }
if (pn[5] != 0.0) {
rn = pn[4] / pn[5];
dif = fabs(gin - rn);
if (dif <= acu && dif <= acu * rn) {
gin = 1.0 - factor * gin;
- return (gin);
+ return gin;
}
gin = rn;
}
- for (i = 0; i < 4; i++)
+ for (i = 0; i < 4; i++) {
pn[i] = pn[i+2];
+ }
if (fabs(pn[4]) >= oflo) {
- for (i = 0; i < 4; i++)
+ for (i = 0; i < 4; i++) {
pn[i] /= oflo;
+ }
}
}
}
@@ -131,9 +125,8 @@ i_gamma(double x, double p, double ln_gamma_p)
* Wichura, M.J. (1988) Algorithm AS 241: The percentage points of the normal
* distribution. Applied Statistics 37(3):477-484.
*/
-JEMALLOC_INLINE double
-pt_norm(double p)
-{
+static inline double
+pt_norm(double p) {
double q, r, ret;
assert(p > 0.0 && p < 1.0);
@@ -142,7 +135,7 @@ pt_norm(double p)
if (fabs(q) <= 0.425) {
/* p close to 1/2. */
r = 0.180625 - q * q;
- return (q * (((((((2.5090809287301226727e3 * r +
+ return q * (((((((2.5090809287301226727e3 * r +
3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r
+ 4.5921953931549871457e4) * r + 1.3731693765509461125e4) *
r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2)
@@ -151,12 +144,13 @@ pt_norm(double p)
2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r
+ 2.1213794301586595867e4) * r + 5.3941960214247511077e3) *
r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
- * r + 1.0));
+ * r + 1.0);
} else {
- if (q < 0.0)
+ if (q < 0.0) {
r = p;
- else
+ } else {
r = 1.0 - p;
+ }
assert(r > 0.0);
r = sqrt(-log(r));
@@ -198,9 +192,10 @@ pt_norm(double p)
5.99832206555887937690e-1)
* r + 1.0));
}
- if (q < 0.0)
+ if (q < 0.0) {
ret = -ret;
- return (ret);
+ }
+ return ret;
}
}
@@ -218,9 +213,8 @@ pt_norm(double p)
* Shea, B.L. (1991) Algorithm AS R85: A remark on AS 91: The percentage
* points of the Chi^2 distribution. Applied Statistics 40(1):233-235.
*/
-JEMALLOC_INLINE double
-pt_chi2(double p, double df, double ln_gamma_df_2)
-{
+static inline double
+pt_chi2(double p, double df, double ln_gamma_df_2) {
double e, aa, xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6;
unsigned i;
@@ -236,8 +230,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
if (df < -1.24 * log(p)) {
/* Starting approximation for small Chi^2. */
ch = pow(p * xx * exp(ln_gamma_df_2 + xx * aa), 1.0 / xx);
- if (ch - e < 0.0)
- return (ch);
+ if (ch - e < 0.0) {
+ return ch;
+ }
} else {
if (df > 0.32) {
x = pt_norm(p);
@@ -263,8 +258,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
* (13.32 + 3.0 * ch)) / p2;
ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch +
c * aa) * p2 / p1) / t;
- if (fabs(q / ch - 1.0) - 0.01 <= 0.0)
+ if (fabs(q / ch - 1.0) - 0.01 <= 0.0) {
break;
+ }
}
}
}
@@ -273,8 +269,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
/* Calculation of seven-term Taylor series. */
q = ch;
p1 = 0.5 * ch;
- if (p1 < 0.0)
- return (-1.0);
+ if (p1 < 0.0) {
+ return -1.0;
+ }
p2 = p - i_gamma(p1, xx, ln_gamma_df_2);
t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
b = t / ch;
@@ -290,22 +287,20 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0;
ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3
- b * (s4 - b * (s5 - b * s6))))));
- if (fabs(q / ch - 1.0) <= e)
+ if (fabs(q / ch - 1.0) <= e) {
break;
+ }
}
- return (ch);
+ return ch;
}
/*
* Given a value p in [0..1] and Gamma distribution shape and scale parameters,
- * compute the upper limit on the definite integeral from [0..z] that satisfies
+ * compute the upper limit on the definite integral from [0..z] that satisfies
* p.
*/
-JEMALLOC_INLINE double
-pt_gamma(double p, double shape, double scale, double ln_gamma_shape)
-{
-
- return (pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale);
+static inline double
+pt_gamma(double p, double shape, double scale, double ln_gamma_shape) {
+ return pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale;
}
-#endif
diff --git a/deps/jemalloc/test/include/test/mq.h b/deps/jemalloc/test/include/test/mq.h
index 11188653c..af2c078da 100644
--- a/deps/jemalloc/test/include/test/mq.h
+++ b/deps/jemalloc/test/include/test/mq.h
@@ -1,3 +1,5 @@
+void mq_nanosleep(unsigned ns);
+
/*
* Simple templated message queue implementation that relies on only mutexes for
* synchronization (which reduces portability issues). Given the following
@@ -24,9 +26,9 @@
* does not perform any cleanup of messages, since it knows nothing of their
* payloads.
*/
-#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type)
+#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type)
-#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field) \
+#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field) \
typedef struct { \
mtx_t lock; \
ql_head(a_mq_msg_type) msgs; \
@@ -35,31 +37,28 @@ typedef struct { \
a_attr bool \
a_prefix##init(a_mq_type *mq) { \
\
- if (mtx_init(&mq->lock)) \
- return (true); \
+ if (mtx_init(&mq->lock)) { \
+ return true; \
+ } \
ql_new(&mq->msgs); \
mq->count = 0; \
- return (false); \
+ return false; \
} \
a_attr void \
-a_prefix##fini(a_mq_type *mq) \
-{ \
- \
+a_prefix##fini(a_mq_type *mq) { \
mtx_fini(&mq->lock); \
} \
a_attr unsigned \
-a_prefix##count(a_mq_type *mq) \
-{ \
+a_prefix##count(a_mq_type *mq) { \
unsigned count; \
\
mtx_lock(&mq->lock); \
count = mq->count; \
mtx_unlock(&mq->lock); \
- return (count); \
+ return count; \
} \
a_attr a_mq_msg_type * \
-a_prefix##tryget(a_mq_type *mq) \
-{ \
+a_prefix##tryget(a_mq_type *mq) { \
a_mq_msg_type *msg; \
\
mtx_lock(&mq->lock); \
@@ -69,38 +68,36 @@ a_prefix##tryget(a_mq_type *mq) \
mq->count--; \
} \
mtx_unlock(&mq->lock); \
- return (msg); \
+ return msg; \
} \
a_attr a_mq_msg_type * \
-a_prefix##get(a_mq_type *mq) \
-{ \
+a_prefix##get(a_mq_type *mq) { \
a_mq_msg_type *msg; \
- struct timespec timeout; \
+ unsigned ns; \
\
msg = a_prefix##tryget(mq); \
- if (msg != NULL) \
- return (msg); \
+ if (msg != NULL) { \
+ return msg; \
+ } \
\
- timeout.tv_sec = 0; \
- timeout.tv_nsec = 1; \
+ ns = 1; \
while (true) { \
- nanosleep(&timeout, NULL); \
+ mq_nanosleep(ns); \
msg = a_prefix##tryget(mq); \
- if (msg != NULL) \
- return (msg); \
- if (timeout.tv_sec == 0) { \
+ if (msg != NULL) { \
+ return msg; \
+ } \
+ if (ns < 1000*1000*1000) { \
/* Double sleep time, up to max 1 second. */ \
- timeout.tv_nsec <<= 1; \
- if (timeout.tv_nsec >= 1000*1000*1000) { \
- timeout.tv_sec = 1; \
- timeout.tv_nsec = 0; \
+ ns <<= 1; \
+ if (ns > 1000*1000*1000) { \
+ ns = 1000*1000*1000; \
} \
} \
} \
} \
a_attr void \
-a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) \
-{ \
+a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) { \
\
mtx_lock(&mq->lock); \
ql_elm_new(msg, a_field); \
diff --git a/deps/jemalloc/test/include/test/mtx.h b/deps/jemalloc/test/include/test/mtx.h
index bbe822f54..58afbc3d1 100644
--- a/deps/jemalloc/test/include/test/mtx.h
+++ b/deps/jemalloc/test/include/test/mtx.h
@@ -8,6 +8,8 @@
typedef struct {
#ifdef _WIN32
CRITICAL_SECTION lock;
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ os_unfair_lock lock;
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLock lock;
#else
diff --git a/deps/jemalloc/test/include/test/test.h b/deps/jemalloc/test/include/test/test.h
index a32ec07c4..fd0e5265d 100644
--- a/deps/jemalloc/test/include/test/test.h
+++ b/deps/jemalloc/test/include/test/test.h
@@ -1,6 +1,6 @@
-#define ASSERT_BUFSIZE 256
+#define ASSERT_BUFSIZE 256
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, fmt...) do { \
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do { \
t a_ = (a); \
t b_ = (b); \
if (!(a_ cmp b_)) { \
@@ -8,209 +8,209 @@
char message[ASSERT_BUFSIZE]; \
malloc_snprintf(prefix, sizeof(prefix), \
"%s:%s:%d: Failed assertion: " \
- "(%s) "#cmp" (%s) --> " \
- "%"pri" "#neg_cmp" %"pri": ", \
+ "(%s) " #cmp " (%s) --> " \
+ "%" pri " " #neg_cmp " %" pri ": ", \
__func__, __FILE__, __LINE__, \
#a, #b, a_, b_); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_ptr_eq(a, b, fmt...) assert_cmp(void *, a, b, ==, \
- !=, "p", fmt)
-#define assert_ptr_ne(a, b, fmt...) assert_cmp(void *, a, b, !=, \
- ==, "p", fmt)
-#define assert_ptr_null(a, fmt...) assert_cmp(void *, a, NULL, ==, \
- !=, "p", fmt)
-#define assert_ptr_not_null(a, fmt...) assert_cmp(void *, a, NULL, !=, \
- ==, "p", fmt)
-
-#define assert_c_eq(a, b, fmt...) assert_cmp(char, a, b, ==, !=, "c", fmt)
-#define assert_c_ne(a, b, fmt...) assert_cmp(char, a, b, !=, ==, "c", fmt)
-#define assert_c_lt(a, b, fmt...) assert_cmp(char, a, b, <, >=, "c", fmt)
-#define assert_c_le(a, b, fmt...) assert_cmp(char, a, b, <=, >, "c", fmt)
-#define assert_c_ge(a, b, fmt...) assert_cmp(char, a, b, >=, <, "c", fmt)
-#define assert_c_gt(a, b, fmt...) assert_cmp(char, a, b, >, <=, "c", fmt)
-
-#define assert_x_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "#x", fmt)
-#define assert_x_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "#x", fmt)
-#define assert_x_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "#x", fmt)
-#define assert_x_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "#x", fmt)
-#define assert_x_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "#x", fmt)
-#define assert_x_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "#x", fmt)
-
-#define assert_d_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "d", fmt)
-#define assert_d_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "d", fmt)
-#define assert_d_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "d", fmt)
-#define assert_d_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "d", fmt)
-#define assert_d_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "d", fmt)
-#define assert_d_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "d", fmt)
-
-#define assert_u_eq(a, b, fmt...) assert_cmp(int, a, b, ==, !=, "u", fmt)
-#define assert_u_ne(a, b, fmt...) assert_cmp(int, a, b, !=, ==, "u", fmt)
-#define assert_u_lt(a, b, fmt...) assert_cmp(int, a, b, <, >=, "u", fmt)
-#define assert_u_le(a, b, fmt...) assert_cmp(int, a, b, <=, >, "u", fmt)
-#define assert_u_ge(a, b, fmt...) assert_cmp(int, a, b, >=, <, "u", fmt)
-#define assert_u_gt(a, b, fmt...) assert_cmp(int, a, b, >, <=, "u", fmt)
-
-#define assert_ld_eq(a, b, fmt...) assert_cmp(long, a, b, ==, \
- !=, "ld", fmt)
-#define assert_ld_ne(a, b, fmt...) assert_cmp(long, a, b, !=, \
- ==, "ld", fmt)
-#define assert_ld_lt(a, b, fmt...) assert_cmp(long, a, b, <, \
- >=, "ld", fmt)
-#define assert_ld_le(a, b, fmt...) assert_cmp(long, a, b, <=, \
- >, "ld", fmt)
-#define assert_ld_ge(a, b, fmt...) assert_cmp(long, a, b, >=, \
- <, "ld", fmt)
-#define assert_ld_gt(a, b, fmt...) assert_cmp(long, a, b, >, \
- <=, "ld", fmt)
-
-#define assert_lu_eq(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, ==, !=, "lu", fmt)
-#define assert_lu_ne(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, !=, ==, "lu", fmt)
-#define assert_lu_lt(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, <, >=, "lu", fmt)
-#define assert_lu_le(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, <=, >, "lu", fmt)
-#define assert_lu_ge(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, >=, <, "lu", fmt)
-#define assert_lu_gt(a, b, fmt...) assert_cmp(unsigned long, \
- a, b, >, <=, "lu", fmt)
-
-#define assert_qd_eq(a, b, fmt...) assert_cmp(long long, a, b, ==, \
- !=, "qd", fmt)
-#define assert_qd_ne(a, b, fmt...) assert_cmp(long long, a, b, !=, \
- ==, "qd", fmt)
-#define assert_qd_lt(a, b, fmt...) assert_cmp(long long, a, b, <, \
- >=, "qd", fmt)
-#define assert_qd_le(a, b, fmt...) assert_cmp(long long, a, b, <=, \
- >, "qd", fmt)
-#define assert_qd_ge(a, b, fmt...) assert_cmp(long long, a, b, >=, \
- <, "qd", fmt)
-#define assert_qd_gt(a, b, fmt...) assert_cmp(long long, a, b, >, \
- <=, "qd", fmt)
-
-#define assert_qu_eq(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, ==, !=, "qu", fmt)
-#define assert_qu_ne(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, !=, ==, "qu", fmt)
-#define assert_qu_lt(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, <, >=, "qu", fmt)
-#define assert_qu_le(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, <=, >, "qu", fmt)
-#define assert_qu_ge(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, >=, <, "qu", fmt)
-#define assert_qu_gt(a, b, fmt...) assert_cmp(unsigned long long, \
- a, b, >, <=, "qu", fmt)
-
-#define assert_jd_eq(a, b, fmt...) assert_cmp(intmax_t, a, b, ==, \
- !=, "jd", fmt)
-#define assert_jd_ne(a, b, fmt...) assert_cmp(intmax_t, a, b, !=, \
- ==, "jd", fmt)
-#define assert_jd_lt(a, b, fmt...) assert_cmp(intmax_t, a, b, <, \
- >=, "jd", fmt)
-#define assert_jd_le(a, b, fmt...) assert_cmp(intmax_t, a, b, <=, \
- >, "jd", fmt)
-#define assert_jd_ge(a, b, fmt...) assert_cmp(intmax_t, a, b, >=, \
- <, "jd", fmt)
-#define assert_jd_gt(a, b, fmt...) assert_cmp(intmax_t, a, b, >, \
- <=, "jd", fmt)
-
-#define assert_ju_eq(a, b, fmt...) assert_cmp(uintmax_t, a, b, ==, \
- !=, "ju", fmt)
-#define assert_ju_ne(a, b, fmt...) assert_cmp(uintmax_t, a, b, !=, \
- ==, "ju", fmt)
-#define assert_ju_lt(a, b, fmt...) assert_cmp(uintmax_t, a, b, <, \
- >=, "ju", fmt)
-#define assert_ju_le(a, b, fmt...) assert_cmp(uintmax_t, a, b, <=, \
- >, "ju", fmt)
-#define assert_ju_ge(a, b, fmt...) assert_cmp(uintmax_t, a, b, >=, \
- <, "ju", fmt)
-#define assert_ju_gt(a, b, fmt...) assert_cmp(uintmax_t, a, b, >, \
- <=, "ju", fmt)
-
-#define assert_zd_eq(a, b, fmt...) assert_cmp(ssize_t, a, b, ==, \
- !=, "zd", fmt)
-#define assert_zd_ne(a, b, fmt...) assert_cmp(ssize_t, a, b, !=, \
- ==, "zd", fmt)
-#define assert_zd_lt(a, b, fmt...) assert_cmp(ssize_t, a, b, <, \
- >=, "zd", fmt)
-#define assert_zd_le(a, b, fmt...) assert_cmp(ssize_t, a, b, <=, \
- >, "zd", fmt)
-#define assert_zd_ge(a, b, fmt...) assert_cmp(ssize_t, a, b, >=, \
- <, "zd", fmt)
-#define assert_zd_gt(a, b, fmt...) assert_cmp(ssize_t, a, b, >, \
- <=, "zd", fmt)
-
-#define assert_zu_eq(a, b, fmt...) assert_cmp(size_t, a, b, ==, \
- !=, "zu", fmt)
-#define assert_zu_ne(a, b, fmt...) assert_cmp(size_t, a, b, !=, \
- ==, "zu", fmt)
-#define assert_zu_lt(a, b, fmt...) assert_cmp(size_t, a, b, <, \
- >=, "zu", fmt)
-#define assert_zu_le(a, b, fmt...) assert_cmp(size_t, a, b, <=, \
- >, "zu", fmt)
-#define assert_zu_ge(a, b, fmt...) assert_cmp(size_t, a, b, >=, \
- <, "zu", fmt)
-#define assert_zu_gt(a, b, fmt...) assert_cmp(size_t, a, b, >, \
- <=, "zu", fmt)
-
-#define assert_d32_eq(a, b, fmt...) assert_cmp(int32_t, a, b, ==, \
- !=, PRId32, fmt)
-#define assert_d32_ne(a, b, fmt...) assert_cmp(int32_t, a, b, !=, \
- ==, PRId32, fmt)
-#define assert_d32_lt(a, b, fmt...) assert_cmp(int32_t, a, b, <, \
- >=, PRId32, fmt)
-#define assert_d32_le(a, b, fmt...) assert_cmp(int32_t, a, b, <=, \
- >, PRId32, fmt)
-#define assert_d32_ge(a, b, fmt...) assert_cmp(int32_t, a, b, >=, \
- <, PRId32, fmt)
-#define assert_d32_gt(a, b, fmt...) assert_cmp(int32_t, a, b, >, \
- <=, PRId32, fmt)
-
-#define assert_u32_eq(a, b, fmt...) assert_cmp(uint32_t, a, b, ==, \
- !=, PRIu32, fmt)
-#define assert_u32_ne(a, b, fmt...) assert_cmp(uint32_t, a, b, !=, \
- ==, PRIu32, fmt)
-#define assert_u32_lt(a, b, fmt...) assert_cmp(uint32_t, a, b, <, \
- >=, PRIu32, fmt)
-#define assert_u32_le(a, b, fmt...) assert_cmp(uint32_t, a, b, <=, \
- >, PRIu32, fmt)
-#define assert_u32_ge(a, b, fmt...) assert_cmp(uint32_t, a, b, >=, \
- <, PRIu32, fmt)
-#define assert_u32_gt(a, b, fmt...) assert_cmp(uint32_t, a, b, >, \
- <=, PRIu32, fmt)
-
-#define assert_d64_eq(a, b, fmt...) assert_cmp(int64_t, a, b, ==, \
- !=, PRId64, fmt)
-#define assert_d64_ne(a, b, fmt...) assert_cmp(int64_t, a, b, !=, \
- ==, PRId64, fmt)
-#define assert_d64_lt(a, b, fmt...) assert_cmp(int64_t, a, b, <, \
- >=, PRId64, fmt)
-#define assert_d64_le(a, b, fmt...) assert_cmp(int64_t, a, b, <=, \
- >, PRId64, fmt)
-#define assert_d64_ge(a, b, fmt...) assert_cmp(int64_t, a, b, >=, \
- <, PRId64, fmt)
-#define assert_d64_gt(a, b, fmt...) assert_cmp(int64_t, a, b, >, \
- <=, PRId64, fmt)
-
-#define assert_u64_eq(a, b, fmt...) assert_cmp(uint64_t, a, b, ==, \
- !=, PRIu64, fmt)
-#define assert_u64_ne(a, b, fmt...) assert_cmp(uint64_t, a, b, !=, \
- ==, PRIu64, fmt)
-#define assert_u64_lt(a, b, fmt...) assert_cmp(uint64_t, a, b, <, \
- >=, PRIu64, fmt)
-#define assert_u64_le(a, b, fmt...) assert_cmp(uint64_t, a, b, <=, \
- >, PRIu64, fmt)
-#define assert_u64_ge(a, b, fmt...) assert_cmp(uint64_t, a, b, >=, \
- <, PRIu64, fmt)
-#define assert_u64_gt(a, b, fmt...) assert_cmp(uint64_t, a, b, >, \
- <=, PRIu64, fmt)
-
-#define assert_b_eq(a, b, fmt...) do { \
+#define assert_ptr_eq(a, b, ...) assert_cmp(void *, a, b, ==, \
+ !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...) assert_cmp(void *, a, b, !=, \
+ ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...) assert_cmp(void *, a, NULL, ==, \
+ !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...) assert_cmp(void *, a, NULL, !=, \
+ ==, "p", __VA_ARGS__)
+
+#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+
+#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+
+#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+
+#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+
+#define assert_ld_eq(a, b, ...) assert_cmp(long, a, b, ==, \
+ !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...) assert_cmp(long, a, b, !=, \
+ ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, \
+ >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, \
+ >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, \
+ <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, \
+ <=, "ld", __VA_ARGS__)
+
+#define assert_lu_eq(a, b, ...) assert_cmp(unsigned long, \
+ a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...) assert_cmp(unsigned long, \
+ a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...) assert_cmp(unsigned long, \
+ a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...) assert_cmp(unsigned long, \
+ a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...) assert_cmp(unsigned long, \
+ a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...) assert_cmp(unsigned long, \
+ a, b, >, <=, "lu", __VA_ARGS__)
+
+#define assert_qd_eq(a, b, ...) assert_cmp(long long, a, b, ==, \
+ !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...) assert_cmp(long long, a, b, !=, \
+ ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...) assert_cmp(long long, a, b, <, \
+ >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...) assert_cmp(long long, a, b, <=, \
+ >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...) assert_cmp(long long, a, b, >=, \
+ <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...) assert_cmp(long long, a, b, >, \
+ <=, "qd", __VA_ARGS__)
+
+#define assert_qu_eq(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...) assert_cmp(unsigned long long, \
+ a, b, >, <=, "qu", __VA_ARGS__)
+
+#define assert_jd_eq(a, b, ...) assert_cmp(intmax_t, a, b, ==, \
+ !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...) assert_cmp(intmax_t, a, b, !=, \
+ ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...) assert_cmp(intmax_t, a, b, <, \
+ >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...) assert_cmp(intmax_t, a, b, <=, \
+ >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...) assert_cmp(intmax_t, a, b, >=, \
+ <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...) assert_cmp(intmax_t, a, b, >, \
+ <=, "jd", __VA_ARGS__)
+
+#define assert_ju_eq(a, b, ...) assert_cmp(uintmax_t, a, b, ==, \
+ !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...) assert_cmp(uintmax_t, a, b, !=, \
+ ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...) assert_cmp(uintmax_t, a, b, <, \
+ >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...) assert_cmp(uintmax_t, a, b, <=, \
+ >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...) assert_cmp(uintmax_t, a, b, >=, \
+ <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...) assert_cmp(uintmax_t, a, b, >, \
+ <=, "ju", __VA_ARGS__)
+
+#define assert_zd_eq(a, b, ...) assert_cmp(ssize_t, a, b, ==, \
+ !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...) assert_cmp(ssize_t, a, b, !=, \
+ ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...) assert_cmp(ssize_t, a, b, <, \
+ >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...) assert_cmp(ssize_t, a, b, <=, \
+ >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...) assert_cmp(ssize_t, a, b, >=, \
+ <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...) assert_cmp(ssize_t, a, b, >, \
+ <=, "zd", __VA_ARGS__)
+
+#define assert_zu_eq(a, b, ...) assert_cmp(size_t, a, b, ==, \
+ !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...) assert_cmp(size_t, a, b, !=, \
+ ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...) assert_cmp(size_t, a, b, <, \
+ >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...) assert_cmp(size_t, a, b, <=, \
+ >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...) assert_cmp(size_t, a, b, >=, \
+ <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...) assert_cmp(size_t, a, b, >, \
+ <=, "zu", __VA_ARGS__)
+
+#define assert_d32_eq(a, b, ...) assert_cmp(int32_t, a, b, ==, \
+ !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...) assert_cmp(int32_t, a, b, !=, \
+ ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...) assert_cmp(int32_t, a, b, <, \
+ >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...) assert_cmp(int32_t, a, b, <=, \
+ >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...) assert_cmp(int32_t, a, b, >=, \
+ <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...) assert_cmp(int32_t, a, b, >, \
+ <=, FMTd32, __VA_ARGS__)
+
+#define assert_u32_eq(a, b, ...) assert_cmp(uint32_t, a, b, ==, \
+ !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...) assert_cmp(uint32_t, a, b, !=, \
+ ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...) assert_cmp(uint32_t, a, b, <, \
+ >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...) assert_cmp(uint32_t, a, b, <=, \
+ >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...) assert_cmp(uint32_t, a, b, >=, \
+ <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...) assert_cmp(uint32_t, a, b, >, \
+ <=, FMTu32, __VA_ARGS__)
+
+#define assert_d64_eq(a, b, ...) assert_cmp(int64_t, a, b, ==, \
+ !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...) assert_cmp(int64_t, a, b, !=, \
+ ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...) assert_cmp(int64_t, a, b, <, \
+ >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...) assert_cmp(int64_t, a, b, <=, \
+ >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...) assert_cmp(int64_t, a, b, >=, \
+ <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...) assert_cmp(int64_t, a, b, >, \
+ <=, FMTd64, __VA_ARGS__)
+
+#define assert_u64_eq(a, b, ...) assert_cmp(uint64_t, a, b, ==, \
+ !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...) assert_cmp(uint64_t, a, b, !=, \
+ ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...) assert_cmp(uint64_t, a, b, <, \
+ >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...) assert_cmp(uint64_t, a, b, <=, \
+ >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...) assert_cmp(uint64_t, a, b, >=, \
+ <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...) assert_cmp(uint64_t, a, b, >, \
+ <=, FMTu64, __VA_ARGS__)
+
+#define assert_b_eq(a, b, ...) do { \
bool a_ = (a); \
bool b_ = (b); \
if (!(a_ == b_)) { \
@@ -222,11 +222,11 @@
__func__, __FILE__, __LINE__, \
#a, #b, a_ ? "true" : "false", \
b_ ? "true" : "false"); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_b_ne(a, b, fmt...) do { \
+#define assert_b_ne(a, b, ...) do { \
bool a_ = (a); \
bool b_ = (b); \
if (!(a_ != b_)) { \
@@ -238,14 +238,14 @@
__func__, __FILE__, __LINE__, \
#a, #b, a_ ? "true" : "false", \
b_ ? "true" : "false"); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_true(a, fmt...) assert_b_eq(a, true, fmt)
-#define assert_false(a, fmt...) assert_b_eq(a, false, fmt)
+#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__)
-#define assert_str_eq(a, b, fmt...) do { \
+#define assert_str_eq(a, b, ...) do { \
if (strcmp((a), (b))) { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
@@ -254,11 +254,11 @@
"(%s) same as (%s) --> " \
"\"%s\" differs from \"%s\": ", \
__func__, __FILE__, __LINE__, #a, #b, a, b); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_str_ne(a, b, fmt...) do { \
+#define assert_str_ne(a, b, ...) do { \
if (!strcmp((a), (b))) { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
@@ -267,18 +267,18 @@
"(%s) differs from (%s) --> " \
"\"%s\" same as \"%s\": ", \
__func__, __FILE__, __LINE__, #a, #b, a, b); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} \
} while (0)
-#define assert_not_reached(fmt...) do { \
+#define assert_not_reached(...) do { \
char prefix[ASSERT_BUFSIZE]; \
char message[ASSERT_BUFSIZE]; \
malloc_snprintf(prefix, sizeof(prefix), \
"%s:%s:%d: Unreachable code reached: ", \
__func__, __FILE__, __LINE__); \
- malloc_snprintf(message, sizeof(message), fmt); \
+ malloc_snprintf(message, sizeof(message), __VA_ARGS__); \
p_test_fail(prefix, message); \
} while (0)
@@ -296,22 +296,27 @@ typedef enum {
typedef void (test_t)(void);
-#define TEST_BEGIN(f) \
+#define TEST_BEGIN(f) \
static void \
-f(void) \
-{ \
+f(void) { \
p_test_init(#f);
-#define TEST_END \
+#define TEST_END \
goto label_test_end; \
label_test_end: \
p_test_fini(); \
}
-#define test(tests...) \
- p_test(tests, NULL)
+#define test(...) \
+ p_test(__VA_ARGS__, NULL)
+
+#define test_no_reentrancy(...) \
+ p_test_no_reentrancy(__VA_ARGS__, NULL)
-#define test_skip_if(e) do { \
+#define test_no_malloc_init(...) \
+ p_test_no_malloc_init(__VA_ARGS__, NULL)
+
+#define test_skip_if(e) do { \
if (e) { \
test_skip("%s:%s:%d: Test skipped: (%s)", \
__func__, __FILE__, __LINE__, #e); \
@@ -319,11 +324,15 @@ label_test_end: \
} \
} while (0)
-void test_skip(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2));
-void test_fail(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2));
+bool test_is_reentrant();
+
+void test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
/* For private use by macros. */
-test_status_t p_test(test_t* t, ...);
+test_status_t p_test(test_t *t, ...);
+test_status_t p_test_no_reentrancy(test_t *t, ...);
+test_status_t p_test_no_malloc_init(test_t *t, ...);
void p_test_init(const char *name);
void p_test_fini(void);
void p_test_fail(const char *prefix, const char *message);
diff --git a/deps/jemalloc/test/include/test/thd.h b/deps/jemalloc/test/include/test/thd.h
index f941d7a75..47a51262e 100644
--- a/deps/jemalloc/test/include/test/thd.h
+++ b/deps/jemalloc/test/include/test/thd.h
@@ -1,4 +1,4 @@
-/* Abstraction layer for threading in tests */
+/* Abstraction layer for threading in tests. */
#ifdef _WIN32
typedef HANDLE thd_t;
#else
diff --git a/deps/jemalloc/test/include/test/timer.h b/deps/jemalloc/test/include/test/timer.h
new file mode 100644
index 000000000..ace6191b8
--- /dev/null
+++ b/deps/jemalloc/test/include/test/timer.h
@@ -0,0 +1,11 @@
+/* Simple timer, for use in benchmark reporting. */
+
+typedef struct {
+ nstime_t t0;
+ nstime_t t1;
+} timedelta_t;
+
+void timer_start(timedelta_t *timer);
+void timer_stop(timedelta_t *timer);
+uint64_t timer_usec(const timedelta_t *timer);
+void timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
diff --git a/deps/jemalloc/test/integration/MALLOCX_ARENA.c b/deps/jemalloc/test/integration/MALLOCX_ARENA.c
index 71cf6f255..222164d69 100644
--- a/deps/jemalloc/test/integration/MALLOCX_ARENA.c
+++ b/deps/jemalloc/test/integration/MALLOCX_ARENA.c
@@ -1,41 +1,50 @@
#include "test/jemalloc_test.h"
-#define NTHREADS 10
+#define NTHREADS 10
+
+static bool have_dss =
+#ifdef JEMALLOC_DSS
+ true
+#else
+ false
+#endif
+ ;
void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
unsigned thread_ind = (unsigned)(uintptr_t)arg;
unsigned arena_ind;
void *p;
size_t sz;
sz = sizeof(arena_ind);
- assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
- "Error in arenas.extend");
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Error in arenas.create");
if (thread_ind % 4 != 3) {
size_t mib[3];
size_t miblen = sizeof(mib) / sizeof(size_t);
const char *dss_precs[] = {"disabled", "primary", "secondary"};
- const char *dss = dss_precs[thread_ind %
- (sizeof(dss_precs)/sizeof(char*))];
+ unsigned prec_ind = thread_ind %
+ (sizeof(dss_precs)/sizeof(char*));
+ const char *dss = dss_precs[prec_ind];
+ int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
"Error in mallctlnametomib()");
mib[1] = arena_ind;
assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
- sizeof(const char *)), 0, "Error in mallctlbymib()");
+ sizeof(const char *)), expected_err,
+ "Error in mallctlbymib()");
}
p = mallocx(1, MALLOCX_ARENA(arena_ind));
assert_ptr_not_null(p, "Unexpected mallocx() error");
dallocx(p, 0);
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_ALLOCM_ARENA)
-{
+TEST_BEGIN(test_MALLOCX_ARENA) {
thd_t thds[NTHREADS];
unsigned i;
@@ -44,15 +53,14 @@ TEST_BEGIN(test_ALLOCM_ARENA)
(void *)(uintptr_t)i);
}
- for (i = 0; i < NTHREADS; i++)
+ for (i = 0; i < NTHREADS; i++) {
thd_join(thds[i], NULL);
+ }
}
TEST_END
int
-main(void)
-{
-
- return (test(
- test_ALLOCM_ARENA));
+main(void) {
+ return test(
+ test_MALLOCX_ARENA);
}
diff --git a/deps/jemalloc/test/integration/aligned_alloc.c b/deps/jemalloc/test/integration/aligned_alloc.c
index 609001487..536b67ea8 100644
--- a/deps/jemalloc/test/integration/aligned_alloc.c
+++ b/deps/jemalloc/test/integration/aligned_alloc.c
@@ -1,12 +1,19 @@
#include "test/jemalloc_test.h"
-#define CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define MAXALIGN ((size_t)0x2000000LU)
-#define NITER 4
+#define MAXALIGN (((size_t)1) << 23)
-TEST_BEGIN(test_alignment_errors)
-{
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly. Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void) {
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl error");
+}
+
+TEST_BEGIN(test_alignment_errors) {
size_t alignment;
void *p;
@@ -27,8 +34,7 @@ TEST_BEGIN(test_alignment_errors)
}
TEST_END
-TEST_BEGIN(test_oom_errors)
-{
+TEST_BEGIN(test_oom_errors) {
size_t alignment, size;
void *p;
@@ -72,14 +78,15 @@ TEST_BEGIN(test_oom_errors)
}
TEST_END
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
+#define NITER 4
size_t alignment, size, total;
unsigned i;
void *ps[NITER];
- for (i = 0; i < NITER; i++)
+ for (i = 0; i < NITER; i++) {
ps[i] = NULL;
+ }
for (alignment = 8;
alignment <= MAXALIGN;
@@ -100,8 +107,9 @@ TEST_BEGIN(test_alignment_and_size)
alignment, size, size, buf);
}
total += malloc_usable_size(ps[i]);
- if (total >= (MAXALIGN << 1))
+ if (total >= (MAXALIGN << 1)) {
break;
+ }
}
for (i = 0; i < NITER; i++) {
if (ps[i] != NULL) {
@@ -110,16 +118,16 @@ TEST_BEGIN(test_alignment_and_size)
}
}
}
+ purge();
}
+#undef NITER
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_alignment_errors,
test_oom_errors,
- test_alignment_and_size));
+ test_alignment_and_size);
}
diff --git a/deps/jemalloc/test/integration/allocated.c b/deps/jemalloc/test/integration/allocated.c
index 3630e80ce..1425fd0aa 100644
--- a/deps/jemalloc/test/integration/allocated.c
+++ b/deps/jemalloc/test/integration/allocated.c
@@ -9,8 +9,7 @@ static const bool config_stats =
;
void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
int err;
void *p;
uint64_t a0, a1, d0, d1;
@@ -18,16 +17,18 @@ thd_start(void *arg)
size_t sz, usize;
sz = sizeof(a0);
- if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) {
- if (err == ENOENT)
+ if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
+ if (err == ENOENT) {
goto label_ENOENT;
+ }
test_fail("%s(): Error in mallctl(): %s", __func__,
strerror(err));
}
sz = sizeof(ap0);
- if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) {
- if (err == ENOENT)
+ if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
+ if (err == ENOENT) {
goto label_ENOENT;
+ }
test_fail("%s(): Error in mallctl(): %s", __func__,
strerror(err));
}
@@ -36,16 +37,19 @@ thd_start(void *arg)
"storage");
sz = sizeof(d0);
- if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) {
- if (err == ENOENT)
+ if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) {
+ if (err == ENOENT) {
goto label_ENOENT;
+ }
test_fail("%s(): Error in mallctl(): %s", __func__,
strerror(err));
}
sz = sizeof(dp0);
- if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) {
- if (err == ENOENT)
+ if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
+ 0))) {
+ if (err == ENOENT) {
goto label_ENOENT;
+ }
test_fail("%s(): Error in mallctl(): %s", __func__,
strerror(err));
}
@@ -57,9 +61,9 @@ thd_start(void *arg)
assert_ptr_not_null(p, "Unexpected malloc() error");
sz = sizeof(a1);
- mallctl("thread.allocated", &a1, &sz, NULL, 0);
+ mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0);
sz = sizeof(ap1);
- mallctl("thread.allocatedp", &ap1, &sz, NULL, 0);
+ mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0);
assert_u64_eq(*ap1, a1,
"Dereferenced \"thread.allocatedp\" value should equal "
"\"thread.allocated\" value");
@@ -74,9 +78,9 @@ thd_start(void *arg)
free(p);
sz = sizeof(d1);
- mallctl("thread.deallocated", &d1, &sz, NULL, 0);
+ mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0);
sz = sizeof(dp1);
- mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0);
+ mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0);
assert_u64_eq(*dp1, d1,
"Dereferenced \"thread.deallocatedp\" value should equal "
"\"thread.deallocated\" value");
@@ -87,23 +91,20 @@ thd_start(void *arg)
"Deallocated memory counter should increase by at least the amount "
"explicitly deallocated");
- return (NULL);
+ return NULL;
label_ENOENT:
assert_false(config_stats,
"ENOENT should only be returned if stats are disabled");
test_skip("\"thread.allocated\" mallctl not available");
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_main_thread)
-{
-
+TEST_BEGIN(test_main_thread) {
thd_start(NULL);
}
TEST_END
-TEST_BEGIN(test_subthread)
-{
+TEST_BEGIN(test_subthread) {
thd_t thd;
thd_create(&thd, thd_start, NULL);
@@ -112,14 +113,12 @@ TEST_BEGIN(test_subthread)
TEST_END
int
-main(void)
-{
-
+main(void) {
/* Run tests multiple times to check for bad interactions. */
- return (test(
+ return test(
test_main_thread,
test_subthread,
test_main_thread,
test_subthread,
- test_main_thread));
+ test_main_thread);
}
diff --git a/deps/jemalloc/test/integration/allocm.c b/deps/jemalloc/test/integration/allocm.c
deleted file mode 100644
index 7b4ea0c2c..000000000
--- a/deps/jemalloc/test/integration/allocm.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define CHUNK 0x400000
-#define MAXALIGN (((size_t)1) << 25)
-#define NITER 4
-
-TEST_BEGIN(test_basic)
-{
- size_t nsz, rsz, sz;
- void *p;
-
- sz = 42;
- nsz = 0;
- assert_d_eq(nallocm(&nsz, sz, 0), ALLOCM_SUCCESS,
- "Unexpected nallocm() error");
- rsz = 0;
- assert_d_eq(allocm(&p, &rsz, sz, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_zu_ge(rsz, sz, "Real size smaller than expected");
- assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-
- assert_d_eq(allocm(&p, NULL, sz, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-
- nsz = 0;
- assert_d_eq(nallocm(&nsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS,
- "Unexpected nallocm() error");
- rsz = 0;
- assert_d_eq(allocm(&p, &rsz, sz, ALLOCM_ZERO), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
- assert_zu_eq(nsz, rsz, "nallocm()/allocm() rsize mismatch");
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_alignment_and_size)
-{
- int r;
- size_t nsz, rsz, sz, alignment, total;
- unsigned i;
- void *ps[NITER];
-
- for (i = 0; i < NITER; i++)
- ps[i] = NULL;
-
- for (alignment = 8;
- alignment <= MAXALIGN;
- alignment <<= 1) {
- total = 0;
- for (sz = 1;
- sz < 3 * alignment && sz < (1U << 31);
- sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
- for (i = 0; i < NITER; i++) {
- nsz = 0;
- r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment) |
- ALLOCM_ZERO);
- assert_d_eq(r, ALLOCM_SUCCESS,
- "nallocm() error for alignment=%zu, "
- "size=%zu (%#zx): %d",
- alignment, sz, sz, r);
- rsz = 0;
- r = allocm(&ps[i], &rsz, sz,
- ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
- assert_d_eq(r, ALLOCM_SUCCESS,
- "allocm() error for alignment=%zu, "
- "size=%zu (%#zx): %d",
- alignment, sz, sz, r);
- assert_zu_ge(rsz, sz,
- "Real size smaller than expected for "
- "alignment=%zu, size=%zu", alignment, sz);
- assert_zu_eq(nsz, rsz,
- "nallocm()/allocm() rsize mismatch for "
- "alignment=%zu, size=%zu", alignment, sz);
- assert_ptr_null(
- (void *)((uintptr_t)ps[i] & (alignment-1)),
- "%p inadequately aligned for"
- " alignment=%zu, size=%zu", ps[i],
- alignment, sz);
- sallocm(ps[i], &rsz, 0);
- total += rsz;
- if (total >= (MAXALIGN << 1))
- break;
- }
- for (i = 0; i < NITER; i++) {
- if (ps[i] != NULL) {
- dallocm(ps[i], 0);
- ps[i] = NULL;
- }
- }
- }
- }
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_basic,
- test_alignment_and_size));
-}
diff --git a/deps/jemalloc/test/integration/cpp/basic.cpp b/deps/jemalloc/test/integration/cpp/basic.cpp
new file mode 100644
index 000000000..65890ecd5
--- /dev/null
+++ b/deps/jemalloc/test/integration/cpp/basic.cpp
@@ -0,0 +1,25 @@
+#include <memory>
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_basic) {
+ auto foo = new long(4);
+ assert_ptr_not_null(foo, "Unexpected new[] failure");
+ delete foo;
+ // Test nullptr handling.
+ foo = nullptr;
+ delete foo;
+
+ auto bar = new long;
+ assert_ptr_not_null(bar, "Unexpected new failure");
+ delete bar;
+ // Test nullptr handling.
+ bar = nullptr;
+ delete bar;
+}
+TEST_END
+
+int
+main() {
+ return test(
+ test_basic);
+}
diff --git a/deps/jemalloc/test/integration/extent.c b/deps/jemalloc/test/integration/extent.c
new file mode 100644
index 000000000..b5db08766
--- /dev/null
+++ b/deps/jemalloc/test/integration/extent.c
@@ -0,0 +1,248 @@
+#include "test/jemalloc_test.h"
+
+#include "test/extent_hooks.h"
+
+static bool
+check_background_thread_enabled(void) {
+ bool enabled;
+ size_t sz = sizeof(bool);
+ int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+ if (ret == ENOENT) {
+ return false;
+ }
+ assert_d_eq(ret, 0, "Unexpected mallctl error");
+ return enabled;
+}
+
+static void
+test_extent_body(unsigned arena_ind) {
+ void *p;
+ size_t large0, large1, large2, sz;
+ size_t purge_mib[3];
+ size_t purge_miblen;
+ int flags;
+ bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
+
+ flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+ /* Get large size classes. */
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+ 0), 0, "Unexpected arenas.lextent.0.size failure");
+ assert_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
+ 0), 0, "Unexpected arenas.lextent.1.size failure");
+ assert_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
+ 0), 0, "Unexpected arenas.lextent.2.size failure");
+
+ /* Test dalloc/decommit/purge cascade. */
+ purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+ 0, "Unexpected mallctlnametomib() failure");
+ purge_mib[1] = (size_t)arena_ind;
+ called_alloc = false;
+ try_alloc = true;
+ try_dalloc = false;
+ try_decommit = false;
+ p = mallocx(large0 * 2, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ assert_true(called_alloc, "Expected alloc call");
+ called_dalloc = false;
+ called_decommit = false;
+ did_purge_lazy = false;
+ did_purge_forced = false;
+ called_split = false;
+ xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
+ assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+ 0, "Unexpected arena.%u.purge error", arena_ind);
+ if (xallocx_success_a) {
+ assert_true(called_dalloc, "Expected dalloc call");
+ assert_true(called_decommit, "Expected decommit call");
+ assert_true(did_purge_lazy || did_purge_forced,
+ "Expected purge");
+ }
+ assert_true(called_split, "Expected split call");
+ dallocx(p, flags);
+ try_dalloc = true;
+
+ /* Test decommit/commit and observe split/merge. */
+ try_dalloc = false;
+ try_decommit = true;
+ p = mallocx(large0 * 2, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ did_decommit = false;
+ did_commit = false;
+ called_split = false;
+ did_split = false;
+ did_merge = false;
+ xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
+ assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+ 0, "Unexpected arena.%u.purge error", arena_ind);
+ if (xallocx_success_b) {
+ assert_true(did_split, "Expected split");
+ }
+ xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
+ if (did_split) {
+ assert_b_eq(did_decommit, did_commit,
+ "Expected decommit/commit match");
+ }
+ if (xallocx_success_b && xallocx_success_c) {
+ assert_true(did_merge, "Expected merge");
+ }
+ dallocx(p, flags);
+ try_dalloc = true;
+ try_decommit = false;
+
+ /* Make sure non-large allocation succeeds. */
+ p = mallocx(42, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ dallocx(p, flags);
+}
+
+static void
+test_manual_hook_auto_arena(void) {
+ unsigned narenas;
+ size_t old_size, new_size, sz;
+ size_t hooks_mib[3];
+ size_t hooks_miblen;
+ extent_hooks_t *new_hooks, *old_hooks;
+
+ extent_hooks_prep();
+
+ sz = sizeof(unsigned);
+ /* Get number of auto arenas. */
+ assert_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+ if (narenas == 1) {
+ return;
+ }
+
+ /* Install custom extent hooks on arena 1 (might not be initialized). */
+ hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+ &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+ hooks_mib[1] = 1;
+ old_size = sizeof(extent_hooks_t *);
+ new_hooks = &hooks;
+ new_size = sizeof(extent_hooks_t *);
+ assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+ &old_size, (void *)&new_hooks, new_size), 0,
+ "Unexpected extent_hooks error");
+ static bool auto_arena_created = false;
+ if (old_hooks != &hooks) {
+ assert_b_eq(auto_arena_created, false,
+ "Expected auto arena 1 created only once.");
+ auto_arena_created = true;
+ }
+}
+
+static void
+test_manual_hook_body(void) {
+ unsigned arena_ind;
+ size_t old_size, new_size, sz;
+ size_t hooks_mib[3];
+ size_t hooks_miblen;
+ extent_hooks_t *new_hooks, *old_hooks;
+
+ extent_hooks_prep();
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+
+ /* Install custom extent hooks. */
+ hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+ &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+ hooks_mib[1] = (size_t)arena_ind;
+ old_size = sizeof(extent_hooks_t *);
+ new_hooks = &hooks;
+ new_size = sizeof(extent_hooks_t *);
+ assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+ &old_size, (void *)&new_hooks, new_size), 0,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->alloc, extent_alloc_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->commit, extent_commit_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->decommit, extent_decommit_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->split, extent_split_hook,
+ "Unexpected extent_hooks error");
+ assert_ptr_ne(old_hooks->merge, extent_merge_hook,
+ "Unexpected extent_hooks error");
+
+ if (!check_background_thread_enabled()) {
+ test_extent_body(arena_ind);
+ }
+
+ /* Restore extent hooks. */
+ assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+ (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
+ assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+ &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->alloc, default_hooks->alloc,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->commit, default_hooks->commit,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->decommit, default_hooks->decommit,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->split, default_hooks->split,
+ "Unexpected extent_hooks error");
+ assert_ptr_eq(old_hooks->merge, default_hooks->merge,
+ "Unexpected extent_hooks error");
+}
+
+TEST_BEGIN(test_extent_manual_hook) {
+ test_manual_hook_auto_arena();
+ test_manual_hook_body();
+
+ /* Test failure paths. */
+ try_split = false;
+ test_manual_hook_body();
+ try_merge = false;
+ test_manual_hook_body();
+ try_purge_lazy = false;
+ try_purge_forced = false;
+ test_manual_hook_body();
+
+ try_split = try_merge = try_purge_lazy = try_purge_forced = true;
+}
+TEST_END
+
+TEST_BEGIN(test_extent_auto_hook) {
+ unsigned arena_ind;
+ size_t new_size, sz;
+ extent_hooks_t *new_hooks;
+
+ extent_hooks_prep();
+
+ sz = sizeof(unsigned);
+ new_hooks = &hooks;
+ new_size = sizeof(extent_hooks_t *);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+ (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
+
+ test_skip_if(check_background_thread_enabled());
+ test_extent_body(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_extent_manual_hook,
+ test_extent_auto_hook);
+}
diff --git a/deps/jemalloc/test/integration/extent.sh b/deps/jemalloc/test/integration/extent.sh
new file mode 100644
index 000000000..0cc218737
--- /dev/null
+++ b/deps/jemalloc/test/integration/extent.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="junk:false"
+fi
diff --git a/deps/jemalloc/test/integration/mallocx.c b/deps/jemalloc/test/integration/mallocx.c
index 123e041fa..fd960f30c 100644
--- a/deps/jemalloc/test/integration/mallocx.c
+++ b/deps/jemalloc/test/integration/mallocx.c
@@ -1,46 +1,173 @@
#include "test/jemalloc_test.h"
-#define CHUNK 0x400000
-#define MAXALIGN (((size_t)1) << 25)
-#define NITER 4
-
-TEST_BEGIN(test_basic)
-{
- size_t nsz, rsz, sz;
- void *p;
-
- sz = 42;
- nsz = nallocx(sz, 0);
- assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
- p = mallocx(sz, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- rsz = sallocx(p, 0);
- assert_zu_ge(rsz, sz, "Real size smaller than expected");
- assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
- dallocx(p, 0);
-
- p = mallocx(sz, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- dallocx(p, 0);
-
- nsz = nallocx(sz, MALLOCX_ZERO);
- assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
- p = mallocx(sz, MALLOCX_ZERO);
- assert_ptr_not_null(p, "Unexpected mallocx() error");
- rsz = sallocx(p, 0);
- assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
- dallocx(p, 0);
+static unsigned
+get_nsizes_impl(const char *cmd) {
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return ret;
+}
+
+static unsigned
+get_nlarge(void) {
+ return get_nsizes_impl("arenas.nlextents");
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind) {
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return ret;
+}
+
+static size_t
+get_large_size(size_t ind) {
+ return get_size_impl("arenas.lextent.0.size", ind);
+}
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly. Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void) {
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl error");
+}
+
+TEST_BEGIN(test_overflow) {
+ size_t largemax;
+
+ largemax = get_large_size(get_nlarge()-1);
+
+ assert_ptr_null(mallocx(largemax+1, 0),
+ "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
+
+ assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
+ "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+
+ assert_ptr_null(mallocx(SIZE_T_MAX, 0),
+ "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
+
+ assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+ "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
+ ZU(PTRDIFF_MAX)+1);
+}
+TEST_END
+
+TEST_BEGIN(test_oom) {
+ size_t largemax;
+ bool oom;
+ void *ptrs[3];
+ unsigned i;
+
+ /*
+ * It should be impossible to allocate three objects that each consume
+ * nearly half the virtual address space.
+ */
+ largemax = get_large_size(get_nlarge()-1);
+ oom = false;
+ for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+ ptrs[i] = mallocx(largemax, 0);
+ if (ptrs[i] == NULL) {
+ oom = true;
+ }
+ }
+ assert_true(oom,
+ "Expected OOM during series of calls to mallocx(size=%zu, 0)",
+ largemax);
+ for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+ if (ptrs[i] != NULL) {
+ dallocx(ptrs[i], 0);
+ }
+ }
+ purge();
+
+#if LG_SIZEOF_PTR == 3
+ assert_ptr_null(mallocx(0x8000000000000000ULL,
+ MALLOCX_ALIGN(0x8000000000000000ULL)),
+ "Expected OOM for mallocx()");
+ assert_ptr_null(mallocx(0x8000000000000000ULL,
+ MALLOCX_ALIGN(0x80000000)),
+ "Expected OOM for mallocx()");
+#else
+ assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+ "Expected OOM for mallocx()");
+#endif
+}
+TEST_END
+
+TEST_BEGIN(test_basic) {
+#define MAXSZ (((size_t)1) << 23)
+ size_t sz;
+
+ for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
+ size_t nsz, rsz;
+ void *p;
+ nsz = nallocx(sz, 0);
+ assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+ p = mallocx(sz, 0);
+ assert_ptr_not_null(p,
+ "Unexpected mallocx(size=%zx, flags=0) error", sz);
+ rsz = sallocx(p, 0);
+ assert_zu_ge(rsz, sz, "Real size smaller than expected");
+ assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+ dallocx(p, 0);
+
+ p = mallocx(sz, 0);
+ assert_ptr_not_null(p,
+ "Unexpected mallocx(size=%zx, flags=0) error", sz);
+ dallocx(p, 0);
+
+ nsz = nallocx(sz, MALLOCX_ZERO);
+ assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+ p = mallocx(sz, MALLOCX_ZERO);
+ assert_ptr_not_null(p,
+ "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
+ nsz);
+ rsz = sallocx(p, 0);
+ assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+ dallocx(p, 0);
+ purge();
+ }
+#undef MAXSZ
}
TEST_END
-TEST_BEGIN(test_alignment_and_size)
-{
- size_t nsz, rsz, sz, alignment, total;
+TEST_BEGIN(test_alignment_and_size) {
+ const char *percpu_arena;
+ size_t sz = sizeof(percpu_arena);
+
+ if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
+ strcmp(percpu_arena, "disabled") != 0) {
+ test_skip("test_alignment_and_size skipped: "
+ "not working with percpu arena.");
+ };
+#define MAXALIGN (((size_t)1) << 23)
+#define NITER 4
+ size_t nsz, rsz, alignment, total;
unsigned i;
void *ps[NITER];
- for (i = 0; i < NITER; i++)
+ for (i = 0; i < NITER; i++) {
ps[i] = NULL;
+ }
for (alignment = 8;
alignment <= MAXALIGN;
@@ -73,8 +200,9 @@ TEST_BEGIN(test_alignment_and_size)
" alignment=%zu, size=%zu", ps[i],
alignment, sz);
total += rsz;
- if (total >= (MAXALIGN << 1))
+ if (total >= (MAXALIGN << 1)) {
break;
+ }
}
for (i = 0; i < NITER; i++) {
if (ps[i] != NULL) {
@@ -83,15 +211,18 @@ TEST_BEGIN(test_alignment_and_size)
}
}
}
+ purge();
}
+#undef MAXALIGN
+#undef NITER
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
+ test_overflow,
+ test_oom,
test_basic,
- test_alignment_and_size));
+ test_alignment_and_size);
}
diff --git a/deps/jemalloc/test/integration/mallocx.sh b/deps/jemalloc/test/integration/mallocx.sh
new file mode 100644
index 000000000..0cc218737
--- /dev/null
+++ b/deps/jemalloc/test/integration/mallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="junk:false"
+fi
diff --git a/deps/jemalloc/test/integration/mremap.c b/deps/jemalloc/test/integration/mremap.c
deleted file mode 100644
index a7fb7ef0a..000000000
--- a/deps/jemalloc/test/integration/mremap.c
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "test/jemalloc_test.h"
-
-TEST_BEGIN(test_mremap)
-{
- int err;
- size_t sz, lg_chunk, chunksize, i;
- char *p, *q;
-
- sz = sizeof(lg_chunk);
- err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0);
- assert_d_eq(err, 0, "Error in mallctl(): %s", strerror(err));
- chunksize = ((size_t)1U) << lg_chunk;
-
- p = (char *)malloc(chunksize);
- assert_ptr_not_null(p, "malloc(%zu) --> %p", chunksize, p);
- memset(p, 'a', chunksize);
-
- q = (char *)realloc(p, chunksize * 2);
- assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize * 2,
- q);
- for (i = 0; i < chunksize; i++) {
- assert_c_eq(q[i], 'a',
- "realloc() should preserve existing bytes across copies");
- }
-
- p = q;
-
- q = (char *)realloc(p, chunksize);
- assert_ptr_not_null(q, "realloc(%p, %zu) --> %p", p, chunksize, q);
- for (i = 0; i < chunksize; i++) {
- assert_c_eq(q[i], 'a',
- "realloc() should preserve existing bytes across copies");
- }
-
- free(q);
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_mremap));
-}
diff --git a/deps/jemalloc/test/integration/overflow.c b/deps/jemalloc/test/integration/overflow.c
new file mode 100644
index 000000000..6a9785b2e
--- /dev/null
+++ b/deps/jemalloc/test/integration/overflow.c
@@ -0,0 +1,46 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_overflow) {
+ unsigned nlextents;
+ size_t mib[4];
+ size_t sz, miblen, max_size_class;
+ void *p;
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+ 0), 0, "Unexpected mallctl() error");
+
+ miblen = sizeof(mib) / sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+ mib[2] = nlextents - 1;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+ NULL, 0), 0, "Unexpected mallctlbymib() error");
+
+ assert_ptr_null(malloc(max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(malloc(SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+
+ assert_ptr_null(calloc(1, max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(calloc(1, SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+
+ p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() OOM");
+ assert_ptr_null(realloc(p, max_size_class + 1),
+ "Expected OOM due to over-sized allocation request");
+ assert_ptr_null(realloc(p, SIZE_T_MAX),
+ "Expected OOM due to over-sized allocation request");
+ free(p);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_overflow);
+}
diff --git a/deps/jemalloc/test/integration/posix_memalign.c b/deps/jemalloc/test/integration/posix_memalign.c
index 19741c6cb..2c2726de8 100644
--- a/deps/jemalloc/test/integration/posix_memalign.c
+++ b/deps/jemalloc/test/integration/posix_memalign.c
@@ -1,12 +1,19 @@
#include "test/jemalloc_test.h"
-#define CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define MAXALIGN ((size_t)0x2000000LU)
-#define NITER 4
+#define MAXALIGN (((size_t)1) << 23)
-TEST_BEGIN(test_alignment_errors)
-{
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly. Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void) {
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl error");
+}
+
+TEST_BEGIN(test_alignment_errors) {
size_t alignment;
void *p;
@@ -25,8 +32,7 @@ TEST_BEGIN(test_alignment_errors)
}
TEST_END
-TEST_BEGIN(test_oom_errors)
-{
+TEST_BEGIN(test_oom_errors) {
size_t alignment, size;
void *p;
@@ -64,15 +70,16 @@ TEST_BEGIN(test_oom_errors)
}
TEST_END
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
+#define NITER 4
size_t alignment, size, total;
unsigned i;
int err;
void *ps[NITER];
- for (i = 0; i < NITER; i++)
+ for (i = 0; i < NITER; i++) {
ps[i] = NULL;
+ }
for (alignment = 8;
alignment <= MAXALIGN;
@@ -94,8 +101,9 @@ TEST_BEGIN(test_alignment_and_size)
alignment, size, size, buf);
}
total += malloc_usable_size(ps[i]);
- if (total >= (MAXALIGN << 1))
+ if (total >= (MAXALIGN << 1)) {
break;
+ }
}
for (i = 0; i < NITER; i++) {
if (ps[i] != NULL) {
@@ -104,16 +112,16 @@ TEST_BEGIN(test_alignment_and_size)
}
}
}
+ purge();
}
+#undef NITER
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_alignment_errors,
test_oom_errors,
- test_alignment_and_size));
+ test_alignment_and_size);
}
diff --git a/deps/jemalloc/test/integration/rallocm.c b/deps/jemalloc/test/integration/rallocm.c
deleted file mode 100644
index 33c11bb7c..000000000
--- a/deps/jemalloc/test/integration/rallocm.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "test/jemalloc_test.h"
-
-TEST_BEGIN(test_same_size)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE), ALLOCM_SUCCESS,
- "Unexpected rallocm() error");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_extra_no_move)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz, sz-42, ALLOCM_NO_MOVE),
- ALLOCM_SUCCESS, "Unexpected rallocm() error");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_no_move_fail)
-{
- void *p, *q;
- size_t sz, tsz;
-
- assert_d_eq(allocm(&p, &sz, 42, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- q = p;
- assert_d_eq(rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE),
- ALLOCM_ERR_NOT_MOVED, "Unexpected rallocm() result");
- assert_ptr_eq(q, p, "Unexpected object move");
- assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-TEST_BEGIN(test_grow_and_shrink)
-{
- void *p, *q;
- size_t tsz;
-#define NCYCLES 3
- unsigned i, j;
-#define NSZS 2500
- size_t szs[NSZS];
-#define MAXSZ ZU(12 * 1024 * 1024)
-
- assert_d_eq(allocm(&p, &szs[0], 1, 0), ALLOCM_SUCCESS,
- "Unexpected allocm() error");
-
- for (i = 0; i < NCYCLES; i++) {
- for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
- q = p;
- assert_d_eq(rallocm(&q, &szs[j], szs[j-1]+1, 0, 0),
- ALLOCM_SUCCESS,
- "Unexpected rallocm() error for size=%zu-->%zu",
- szs[j-1], szs[j-1]+1);
- assert_zu_ne(szs[j], szs[j-1]+1,
- "Expected size to at least: %zu", szs[j-1]+1);
- p = q;
- }
-
- for (j--; j > 0; j--) {
- q = p;
- assert_d_eq(rallocm(&q, &tsz, szs[j-1], 0, 0),
- ALLOCM_SUCCESS,
- "Unexpected rallocm() error for size=%zu-->%zu",
- szs[j], szs[j-1]);
- assert_zu_eq(tsz, szs[j-1],
- "Expected size=%zu, got size=%zu", szs[j-1], tsz);
- p = q;
- }
- }
-
- assert_d_eq(dallocm(p, 0), ALLOCM_SUCCESS,
- "Unexpected dallocm() error");
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_same_size,
- test_extra_no_move,
- test_no_move_fail,
- test_grow_and_shrink));
-}
diff --git a/deps/jemalloc/test/integration/rallocx.c b/deps/jemalloc/test/integration/rallocx.c
index ee21aedff..7821ca5f5 100644
--- a/deps/jemalloc/test/integration/rallocx.c
+++ b/deps/jemalloc/test/integration/rallocx.c
@@ -1,14 +1,53 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_grow_and_shrink)
-{
+static unsigned
+get_nsizes_impl(const char *cmd) {
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return ret;
+}
+
+static unsigned
+get_nlarge(void) {
+ return get_nsizes_impl("arenas.nlextents");
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind) {
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return ret;
+}
+
+static size_t
+get_large_size(size_t ind) {
+ return get_size_impl("arenas.lextent.0.size", ind);
+}
+
+TEST_BEGIN(test_grow_and_shrink) {
void *p, *q;
size_t tsz;
-#define NCYCLES 3
+#define NCYCLES 3
unsigned i, j;
-#define NSZS 2500
+#define NSZS 1024
size_t szs[NSZS];
-#define MAXSZ ZU(12 * 1024 * 1024)
+#define MAXSZ ZU(12 * 1024 * 1024)
p = mallocx(1, 0);
assert_ptr_not_null(p, "Unexpected mallocx() error");
@@ -22,7 +61,7 @@ TEST_BEGIN(test_grow_and_shrink)
szs[j-1], szs[j-1]+1);
szs[j] = sallocx(q, 0);
assert_zu_ne(szs[j], szs[j-1]+1,
- "Expected size to at least: %zu", szs[j-1]+1);
+ "Expected size to be at least: %zu", szs[j-1]+1);
p = q;
}
@@ -46,8 +85,7 @@ TEST_BEGIN(test_grow_and_shrink)
TEST_END
static bool
-validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
-{
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
bool ret = false;
const uint8_t *buf = (const uint8_t *)p;
size_t i;
@@ -55,22 +93,22 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
for (i = 0; i < len; i++) {
uint8_t b = buf[offset+i];
if (b != c) {
- test_fail("Allocation at %p contains %#x rather than "
- "%#x at offset %zu", p, b, c, offset+i);
+ test_fail("Allocation at %p (len=%zu) contains %#x "
+ "rather than %#x at offset %zu", p, len, b, c,
+ offset+i);
ret = true;
}
}
- return (ret);
+ return ret;
}
-TEST_BEGIN(test_zero)
-{
+TEST_BEGIN(test_zero) {
void *p, *q;
size_t psz, qsz, i, j;
size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
-#define FILL_BYTE 0xaaU
-#define RANGE 2048
+#define FILL_BYTE 0xaaU
+#define RANGE 2048
for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
size_t start_size = start_sizes[i];
@@ -95,7 +133,8 @@ TEST_BEGIN(test_zero)
"Expected zeroed memory");
}
if (psz != qsz) {
- memset(q+psz, FILL_BYTE, qsz-psz);
+ memset((void *)((uintptr_t)q+psz), FILL_BYTE,
+ qsz-psz);
psz = qsz;
}
p = q;
@@ -108,11 +147,10 @@ TEST_BEGIN(test_zero)
}
TEST_END
-TEST_BEGIN(test_align)
-{
+TEST_BEGIN(test_align) {
void *p, *q;
size_t align;
-#define MAX_ALIGN (ZU(1) << 25)
+#define MAX_ALIGN (ZU(1) << 25)
align = ZU(1);
p = mallocx(1, MALLOCX_ALIGN(align));
@@ -133,25 +171,24 @@ TEST_BEGIN(test_align)
}
TEST_END
-TEST_BEGIN(test_lg_align_and_zero)
-{
+TEST_BEGIN(test_lg_align_and_zero) {
void *p, *q;
- size_t lg_align, sz;
-#define MAX_LG_ALIGN 25
-#define MAX_VALIDATE (ZU(1) << 22)
+ unsigned lg_align;
+ size_t sz;
+#define MAX_LG_ALIGN 25
+#define MAX_VALIDATE (ZU(1) << 22)
- lg_align = ZU(0);
+ lg_align = 0;
p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
assert_ptr_not_null(p, "Unexpected mallocx() error");
for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
assert_ptr_not_null(q,
- "Unexpected rallocx() error for lg_align=%zu", lg_align);
+ "Unexpected rallocx() error for lg_align=%u", lg_align);
assert_ptr_null(
(void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
- "%p inadequately aligned for lg_align=%zu",
- q, lg_align);
+ "%p inadequately aligned for lg_align=%u", q, lg_align);
sz = sallocx(q, 0);
if ((sz << 1) <= MAX_VALIDATE) {
assert_false(validate_fill(q, 0, 0, sz),
@@ -159,8 +196,9 @@ TEST_BEGIN(test_lg_align_and_zero)
} else {
assert_false(validate_fill(q, 0, 0, MAX_VALIDATE),
"Expected zeroed memory");
- assert_false(validate_fill(q+sz-MAX_VALIDATE, 0, 0,
- MAX_VALIDATE), "Expected zeroed memory");
+ assert_false(validate_fill(
+ (void *)((uintptr_t)q+sz-MAX_VALIDATE),
+ 0, 0, MAX_VALIDATE), "Expected zeroed memory");
}
p = q;
}
@@ -170,13 +208,38 @@ TEST_BEGIN(test_lg_align_and_zero)
}
TEST_END
-int
-main(void)
-{
+TEST_BEGIN(test_overflow) {
+ size_t largemax;
+ void *p;
+
+ largemax = get_large_size(get_nlarge()-1);
- return (test(
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+ assert_ptr_null(rallocx(p, largemax+1, 0),
+ "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
+
+ assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
+ "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+
+ assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
+ "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
+
+ assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+ "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
+ ZU(PTRDIFF_MAX)+1);
+
+ dallocx(p, 0);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
test_grow_and_shrink,
test_zero,
test_align,
- test_lg_align_and_zero));
+ test_lg_align_and_zero,
+ test_overflow);
}
diff --git a/deps/jemalloc/test/integration/sdallocx.c b/deps/jemalloc/test/integration/sdallocx.c
new file mode 100644
index 000000000..ca0144855
--- /dev/null
+++ b/deps/jemalloc/test/integration/sdallocx.c
@@ -0,0 +1,55 @@
+#include "test/jemalloc_test.h"
+
+#define MAXALIGN (((size_t)1) << 22)
+#define NITER 3
+
+TEST_BEGIN(test_basic) {
+ void *ptr = mallocx(64, 0);
+ sdallocx(ptr, 64, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_alignment_and_size) {
+ size_t nsz, sz, alignment, total;
+ unsigned i;
+ void *ps[NITER];
+
+ for (i = 0; i < NITER; i++) {
+ ps[i] = NULL;
+ }
+
+ for (alignment = 8;
+ alignment <= MAXALIGN;
+ alignment <<= 1) {
+ total = 0;
+ for (sz = 1;
+ sz < 3 * alignment && sz < (1U << 31);
+ sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+ for (i = 0; i < NITER; i++) {
+ nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
+ MALLOCX_ZERO);
+ total += nsz;
+ if (total >= (MAXALIGN << 1)) {
+ break;
+ }
+ }
+ for (i = 0; i < NITER; i++) {
+ if (ps[i] != NULL) {
+ sdallocx(ps[i], sz,
+ MALLOCX_ALIGN(alignment));
+ ps[i] = NULL;
+ }
+ }
+ }
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_basic,
+ test_alignment_and_size);
+}
diff --git a/deps/jemalloc/test/integration/thread_arena.c b/deps/jemalloc/test/integration/thread_arena.c
index 67be53513..1e5ec05d8 100644
--- a/deps/jemalloc/test/integration/thread_arena.c
+++ b/deps/jemalloc/test/integration/thread_arena.c
@@ -1,10 +1,9 @@
#include "test/jemalloc_test.h"
-#define NTHREADS 10
+#define NTHREADS 10
void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
unsigned main_arena_ind = *(unsigned *)arg;
void *p;
unsigned arena_ind;
@@ -16,8 +15,8 @@ thd_start(void *arg)
free(p);
size = sizeof(arena_ind);
- if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind,
- sizeof(main_arena_ind)))) {
+ if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
+ (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
char buf[BUFERROR_BUF];
buferror(err, buf, sizeof(buf));
@@ -25,7 +24,8 @@ thd_start(void *arg)
}
size = sizeof(arena_ind);
- if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+ if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
+ 0))) {
char buf[BUFERROR_BUF];
buferror(err, buf, sizeof(buf));
@@ -34,14 +34,19 @@ thd_start(void *arg)
assert_u_eq(arena_ind, main_arena_ind,
"Arena index should be same as for main thread");
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_thread_arena)
-{
+static void
+mallctl_failure(int err) {
+ char buf[BUFERROR_BUF];
+
+ buferror(err, buf, sizeof(buf));
+ test_fail("Error in mallctl(): %s", buf);
+}
+
+TEST_BEGIN(test_thread_arena) {
void *p;
- unsigned arena_ind;
- size_t size;
int err;
thd_t thds[NTHREADS];
unsigned i;
@@ -49,12 +54,15 @@ TEST_BEGIN(test_thread_arena)
p = malloc(1);
assert_ptr_not_null(p, "Error in malloc()");
- size = sizeof(arena_ind);
- if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
- char buf[BUFERROR_BUF];
+ unsigned arena_ind, old_arena_ind;
+ size_t sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Arena creation failure");
- buferror(err, buf, sizeof(buf));
- test_fail("Error in mallctl(): %s", buf);
+ size_t size = sizeof(arena_ind);
+ if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
+ (void *)&arena_ind, sizeof(arena_ind))) != 0) {
+ mallctl_failure(err);
}
for (i = 0; i < NTHREADS; i++) {
@@ -67,13 +75,12 @@ TEST_BEGIN(test_thread_arena)
thd_join(thds[i], (void *)&join_ret);
assert_zd_eq(join_ret, 0, "Unexpected thread join error");
}
+ free(p);
}
TEST_END
int
-main(void)
-{
-
- return (test(
- test_thread_arena));
+main(void) {
+ return test(
+ test_thread_arena);
}
diff --git a/deps/jemalloc/test/integration/thread_tcache_enabled.c b/deps/jemalloc/test/integration/thread_tcache_enabled.c
index f4e89c682..95c9acc13 100644
--- a/deps/jemalloc/test/integration/thread_tcache_enabled.c
+++ b/deps/jemalloc/test/integration/thread_tcache_enabled.c
@@ -1,97 +1,73 @@
#include "test/jemalloc_test.h"
-static const bool config_tcache =
-#ifdef JEMALLOC_TCACHE
- true
-#else
- false
-#endif
- ;
-
void *
-thd_start(void *arg)
-{
- int err;
- size_t sz;
+thd_start(void *arg) {
bool e0, e1;
-
- sz = sizeof(bool);
- if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) {
- if (err == ENOENT) {
- assert_false(config_tcache,
- "ENOENT should only be returned if tcache is "
- "disabled");
- }
- goto label_ENOENT;
- }
+ size_t sz = sizeof(bool);
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure");
if (e0) {
e1 = false;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz),
- 0, "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_true(e0, "tcache should be enabled");
}
e1 = true;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_false(e0, "tcache should be disabled");
e1 = true;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_true(e0, "tcache should be enabled");
e1 = false;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_true(e0, "tcache should be enabled");
e1 = false;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_false(e0, "tcache should be disabled");
free(malloc(1));
e1 = true;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_false(e0, "tcache should be disabled");
free(malloc(1));
e1 = true;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_true(e0, "tcache should be enabled");
free(malloc(1));
e1 = false;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_true(e0, "tcache should be enabled");
free(malloc(1));
e1 = false;
- assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
- "Unexpected mallctl() error");
+ assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+ (void *)&e1, sz), 0, "Unexpected mallctl() error");
assert_false(e0, "tcache should be disabled");
free(malloc(1));
- return (NULL);
-label_ENOENT:
- test_skip("\"thread.tcache.enabled\" mallctl not available");
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_main_thread)
-{
-
+TEST_BEGIN(test_main_thread) {
thd_start(NULL);
}
TEST_END
-TEST_BEGIN(test_subthread)
-{
+TEST_BEGIN(test_subthread) {
thd_t thd;
thd_create(&thd, thd_start, NULL);
@@ -100,14 +76,12 @@ TEST_BEGIN(test_subthread)
TEST_END
int
-main(void)
-{
-
+main(void) {
/* Run tests multiple times to check for bad interactions. */
- return (test(
+ return test(
test_main_thread,
test_subthread,
test_main_thread,
test_subthread,
- test_main_thread));
+ test_main_thread);
}
diff --git a/deps/jemalloc/test/integration/xallocx.c b/deps/jemalloc/test/integration/xallocx.c
index ab4cf945e..cd0ca048d 100644
--- a/deps/jemalloc/test/integration/xallocx.c
+++ b/deps/jemalloc/test/integration/xallocx.c
@@ -1,7 +1,24 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_same_size)
-{
+/*
+ * Use a separate arena for xallocx() extension/contraction tests so that
+ * internal allocation e.g. by heap profiling can't interpose allocations where
+ * xallocx() would ordinarily be able to extend.
+ */
+static unsigned
+arena_ind(void) {
+ static unsigned ind = 0;
+
+ if (ind == 0) {
+ size_t sz = sizeof(ind);
+ assert_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure creating arena");
+ }
+
+ return ind;
+}
+
+TEST_BEGIN(test_same_size) {
void *p;
size_t sz, tsz;
@@ -16,8 +33,7 @@ TEST_BEGIN(test_same_size)
}
TEST_END
-TEST_BEGIN(test_extra_no_move)
-{
+TEST_BEGIN(test_extra_no_move) {
void *p;
size_t sz, tsz;
@@ -32,8 +48,7 @@ TEST_BEGIN(test_extra_no_move)
}
TEST_END
-TEST_BEGIN(test_no_move_fail)
-{
+TEST_BEGIN(test_no_move_fail) {
void *p;
size_t sz, tsz;
@@ -48,12 +63,322 @@ TEST_BEGIN(test_no_move_fail)
}
TEST_END
-int
-main(void)
-{
+static unsigned
+get_nsizes_impl(const char *cmd) {
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return ret;
+}
+
+static unsigned
+get_nsmall(void) {
+ return get_nsizes_impl("arenas.nbins");
+}
+
+static unsigned
+get_nlarge(void) {
+ return get_nsizes_impl("arenas.nlextents");
+}
- return (test(
+static size_t
+get_size_impl(const char *cmd, size_t ind) {
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return ret;
+}
+
+static size_t
+get_small_size(size_t ind) {
+ return get_size_impl("arenas.bin.0.size", ind);
+}
+
+static size_t
+get_large_size(size_t ind) {
+ return get_size_impl("arenas.lextent.0.size", ind);
+}
+
+TEST_BEGIN(test_size) {
+ size_t small0, largemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ largemax = get_large_size(get_nlarge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ /* Test smallest supported size. */
+ assert_zu_eq(xallocx(p, 1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ /* Test largest supported size. */
+ assert_zu_le(xallocx(p, largemax, 0, 0), largemax,
+ "Unexpected xallocx() behavior");
+
+ /* Test size overflow. */
+ assert_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_size_extra_overflow) {
+ size_t small0, largemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ largemax = get_large_size(get_nlarge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ /* Test overflows that can be resolved by clamping extra. */
+ assert_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, largemax, 1, 0), largemax,
+ "Unexpected xallocx() behavior");
+
+ /* Test overflow such that largemax-size underflows. */
+ assert_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_small) {
+ size_t small0, small1, largemax;
+ void *p;
+
+ /* Get size classes. */
+ small0 = get_small_size(0);
+ small1 = get_small_size(1);
+ largemax = get_large_size(get_nlarge()-1);
+
+ p = mallocx(small0, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ /* Test size+extra overflow. */
+ assert_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_large) {
+ int flags = MALLOCX_ARENA(arena_ind());
+ size_t smallmax, large1, large2, large3, largemax;
+ void *p;
+
+ /* Get size classes. */
+ smallmax = get_small_size(get_nsmall()-1);
+ large1 = get_large_size(1);
+ large2 = get_large_size(2);
+ large3 = get_large_size(3);
+ largemax = get_large_size(get_nlarge()-1);
+
+ p = mallocx(large3, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+ assert_zu_eq(xallocx(p, large3, 0, flags), large3,
+ "Unexpected xallocx() behavior");
+ /* Test size decrease with zero extra. */
+ assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+ "Unexpected xallocx() behavior");
+ assert_zu_ge(xallocx(p, smallmax, 0, flags), large1,
+ "Unexpected xallocx() behavior");
+
+ if (xallocx(p, large3, 0, flags) != large3) {
+ p = rallocx(p, large3, flags);
+ assert_ptr_not_null(p, "Unexpected rallocx() failure");
+ }
+ /* Test size decrease with non-zero extra. */
+ assert_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
+ "Unexpected xallocx() behavior");
+ assert_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
+ "Unexpected xallocx() behavior");
+ assert_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
+ "Unexpected xallocx() behavior");
+ assert_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with zero extra. */
+ assert_zu_le(xallocx(p, large3, 0, flags), large3,
+ "Unexpected xallocx() behavior");
+ assert_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
+ "Unexpected xallocx() behavior");
+
+ assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+ "Unexpected xallocx() behavior");
+ /* Test size increase with non-zero extra. */
+ assert_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
+ "Unexpected xallocx() behavior");
+
+ if (xallocx(p, large3, 0, flags) != large3) {
+ p = rallocx(p, large3, flags);
+ assert_ptr_not_null(p, "Unexpected rallocx() failure");
+ }
+ /* Test size+extra overflow. */
+ assert_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
+ "Unexpected xallocx() behavior");
+
+ dallocx(p, flags);
+}
+TEST_END
+
+static void
+print_filled_extents(const void *p, uint8_t c, size_t len) {
+ const uint8_t *pc = (const uint8_t *)p;
+ size_t i, range0;
+ uint8_t c0;
+
+ malloc_printf(" p=%p, c=%#x, len=%zu:", p, c, len);
+ range0 = 0;
+ c0 = pc[0];
+ for (i = 0; i < len; i++) {
+ if (pc[i] != c0) {
+ malloc_printf(" %#x[%zu..%zu)", c0, range0, i);
+ range0 = i;
+ c0 = pc[i];
+ }
+ }
+ malloc_printf(" %#x[%zu..%zu)\n", c0, range0, i);
+}
+
+static bool
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
+ const uint8_t *pc = (const uint8_t *)p;
+ bool err;
+ size_t i;
+
+ for (i = offset, err = false; i < offset+len; i++) {
+ if (pc[i] != c) {
+ err = true;
+ }
+ }
+
+ if (err) {
+ print_filled_extents(p, c, offset + len);
+ }
+
+ return err;
+}
+
+static void
+test_zero(size_t szmin, size_t szmax) {
+ int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
+ size_t sz, nsz;
+ void *p;
+#define FILL_BYTE 0x7aU
+
+ sz = szmax;
+ p = mallocx(sz, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() error");
+ assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
+ sz);
+
+ /*
+ * Fill with non-zero so that non-debug builds are more likely to detect
+ * errors.
+ */
+ memset(p, FILL_BYTE, sz);
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+
+ /* Shrink in place so that we can expect growing in place to succeed. */
+ sz = szmin;
+ if (xallocx(p, sz, 0, flags) != sz) {
+ p = rallocx(p, sz, flags);
+ assert_ptr_not_null(p, "Unexpected rallocx() failure");
+ }
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+
+ for (sz = szmin; sz < szmax; sz = nsz) {
+ nsz = nallocx(sz+1, flags);
+ if (xallocx(p, sz+1, 0, flags) != nsz) {
+ p = rallocx(p, sz+1, flags);
+ assert_ptr_not_null(p, "Unexpected rallocx() failure");
+ }
+ assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+ "Memory not filled: sz=%zu", sz);
+ assert_false(validate_fill(p, 0x00, sz, nsz-sz),
+ "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
+ memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+ assert_false(validate_fill(p, FILL_BYTE, 0, nsz),
+ "Memory not filled: nsz=%zu", nsz);
+ }
+
+ dallocx(p, flags);
+}
+
+TEST_BEGIN(test_zero_large) {
+ size_t large0, large1;
+
+ /* Get size classes. */
+ large0 = get_large_size(0);
+ large1 = get_large_size(1);
+
+ test_zero(large1, large0 * 2);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
test_same_size,
test_extra_no_move,
- test_no_move_fail));
+ test_no_move_fail,
+ test_size,
+ test_size_extra_overflow,
+ test_extra_small,
+ test_extra_large,
+ test_zero_large);
}
diff --git a/deps/jemalloc/test/integration/xallocx.sh b/deps/jemalloc/test/integration/xallocx.sh
new file mode 100644
index 000000000..0cc218737
--- /dev/null
+++ b/deps/jemalloc/test/integration/xallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="junk:false"
+fi
diff --git a/deps/jemalloc/test/src/SFMT.c b/deps/jemalloc/test/src/SFMT.c
index e6f8deecb..c05e2183b 100644
--- a/deps/jemalloc/test/src/SFMT.c
+++ b/deps/jemalloc/test/src/SFMT.c
@@ -33,7 +33,7 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/**
+/**
* @file SFMT.c
* @brief SIMD oriented Fast Mersenne Twister(SFMT)
*
@@ -45,7 +45,7 @@
*
* The new BSD License is applied to this software, see LICENSE.txt
*/
-#define SFMT_C_
+#define SFMT_C_
#include "test/jemalloc_test.h"
#include "test/SFMT-params.h"
@@ -108,7 +108,7 @@ struct sfmt_s {
/*--------------------------------------
FILE GLOBAL VARIABLES
- internal state, index counter and flag
+ internal state, index counter and flag
--------------------------------------*/
/** a parity check vector which certificate the period of 2^{MEXP} */
@@ -117,18 +117,18 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
/*----------------
STATIC FUNCTIONS
----------------*/
-JEMALLOC_INLINE_C int idxof(int i);
+static inline int idxof(int i);
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift);
-JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift);
+static inline void rshift128(w128_t *out, w128_t const *in, int shift);
+static inline void lshift128(w128_t *out, w128_t const *in, int shift);
#endif
-JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx);
-JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
-JEMALLOC_INLINE_C uint32_t func1(uint32_t x);
-JEMALLOC_INLINE_C uint32_t func2(uint32_t x);
+static inline void gen_rand_all(sfmt_t *ctx);
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
+static inline uint32_t func1(uint32_t x);
+static inline uint32_t func2(uint32_t x);
static void period_certification(sfmt_t *ctx);
#if defined(BIG_ENDIAN64) && !defined(ONLY64)
-JEMALLOC_INLINE_C void swap(w128_t *array, int size);
+static inline void swap(w128_t *array, int size);
#endif
#if defined(HAVE_ALTIVEC)
@@ -138,15 +138,15 @@ JEMALLOC_INLINE_C void swap(w128_t *array, int size);
#endif
/**
- * This function simulate a 64-bit index of LITTLE ENDIAN
+ * This function simulate a 64-bit index of LITTLE ENDIAN
* in BIG ENDIAN machine.
*/
#ifdef ONLY64
-JEMALLOC_INLINE_C int idxof(int i) {
+static inline int idxof(int i) {
return i ^ 1;
}
#else
-JEMALLOC_INLINE_C int idxof(int i) {
+static inline int idxof(int i) {
return i;
}
#endif
@@ -160,7 +160,7 @@ JEMALLOC_INLINE_C int idxof(int i) {
*/
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
#ifdef ONLY64
-JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
uint64_t th, tl, oh, ol;
th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
@@ -175,7 +175,7 @@ JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
out->u[3] = (uint32_t)oh;
}
#else
-JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
uint64_t th, tl, oh, ol;
th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
@@ -199,7 +199,7 @@ JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
* @param shift the shift value
*/
#ifdef ONLY64
-JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
uint64_t th, tl, oh, ol;
th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
@@ -214,7 +214,7 @@ JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
out->u[3] = (uint32_t)oh;
}
#else
-JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
uint64_t th, tl, oh, ol;
th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
@@ -241,37 +241,37 @@ JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
*/
#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
#ifdef ONLY64
-JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
+static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
w128_t *d) {
w128_t x;
w128_t y;
lshift128(&x, a, SL2);
rshift128(&y, c, SR2);
- r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
+ r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
^ (d->u[0] << SL1);
- r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
+ r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
^ (d->u[1] << SL1);
- r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
+ r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
^ (d->u[2] << SL1);
- r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
+ r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
^ (d->u[3] << SL1);
}
#else
-JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
+static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
w128_t *d) {
w128_t x;
w128_t y;
lshift128(&x, a, SL2);
rshift128(&y, c, SR2);
- r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
+ r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
^ (d->u[0] << SL1);
- r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
+ r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
^ (d->u[1] << SL1);
- r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
+ r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
^ (d->u[2] << SL1);
- r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
+ r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
^ (d->u[3] << SL1);
}
#endif
@@ -282,7 +282,7 @@ JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
* This function fills the internal state array with pseudorandom
* integers.
*/
-JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
int i;
w128_t *r1, *r2;
@@ -306,10 +306,10 @@ JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx) {
* This function fills the user-specified array with pseudorandom
* integers.
*
- * @param array an 128-bit array to be filled by pseudorandom numbers.
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
* @param size number of 128-bit pseudorandom numbers to be generated.
*/
-JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
int i, j;
w128_t *r1, *r2;
@@ -343,7 +343,7 @@ JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
#endif
#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
-JEMALLOC_INLINE_C void swap(w128_t *array, int size) {
+static inline void swap(w128_t *array, int size) {
int i;
uint32_t x, y;
@@ -463,11 +463,11 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
above = 0xffffffffU - (0xffffffffU % limit);
while (1) {
- ret = gen_rand32(ctx);
- if (ret < above) {
- ret %= limit;
- break;
- }
+ ret = gen_rand32(ctx);
+ if (ret < above) {
+ ret %= limit;
+ break;
+ }
}
return ret;
}
@@ -476,7 +476,7 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
* This function generates and returns 64-bit pseudorandom number.
* init_gen_rand or init_by_array must be called before this function.
* The function gen_rand64 should not be called after gen_rand32,
- * unless an initialization is again executed.
+ * unless an initialization is again executed.
* @return 64-bit pseudorandom number
*/
uint64_t gen_rand64(sfmt_t *ctx) {
@@ -511,13 +511,13 @@ uint64_t gen_rand64(sfmt_t *ctx) {
uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
uint64_t ret, above;
- above = 0xffffffffffffffffLLU - (0xffffffffffffffffLLU % limit);
+ above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
while (1) {
- ret = gen_rand64(ctx);
- if (ret < above) {
- ret %= limit;
- break;
- }
+ ret = gen_rand64(ctx);
+ if (ret < above) {
+ ret %= limit;
+ break;
+ }
}
return ret;
}
@@ -618,7 +618,7 @@ sfmt_t *init_gen_rand(uint32_t seed) {
psfmt32[idxof(0)] = seed;
for (i = 1; i < N32; i++) {
- psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
+ psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
^ (psfmt32[idxof(i - 1)] >> 30))
+ i;
}
@@ -668,7 +668,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
} else {
count = N32;
}
- r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
+ r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
^ psfmt32[idxof(N32 - 1)]);
psfmt32[idxof(mid)] += r;
r += key_length;
@@ -677,7 +677,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
count--;
for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
- r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+ r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
^ psfmt32[idxof((i + N32 - 1) % N32)]);
psfmt32[idxof((i + mid) % N32)] += r;
r += init_key[j] + i;
@@ -686,7 +686,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
i = (i + 1) % N32;
}
for (; j < count; j++) {
- r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+ r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
^ psfmt32[idxof((i + N32 - 1) % N32)]);
psfmt32[idxof((i + mid) % N32)] += r;
r += i;
@@ -695,7 +695,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
i = (i + 1) % N32;
}
for (j = 0; j < N32; j++) {
- r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
+ r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
+ psfmt32[idxof((i + N32 - 1) % N32)]);
psfmt32[idxof((i + mid) % N32)] ^= r;
r -= i;
diff --git a/deps/jemalloc/test/src/btalloc.c b/deps/jemalloc/test/src/btalloc.c
new file mode 100644
index 000000000..d570952ce
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc.c
@@ -0,0 +1,6 @@
+#include "test/jemalloc_test.h"
+
+void *
+btalloc(size_t size, unsigned bits) {
+ return btalloc_0(size, bits);
+}
diff --git a/deps/jemalloc/test/src/btalloc_0.c b/deps/jemalloc/test/src/btalloc_0.c
new file mode 100644
index 000000000..77d8904ea
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc_0.c
@@ -0,0 +1,3 @@
+#include "test/jemalloc_test.h"
+
+btalloc_n_gen(0)
diff --git a/deps/jemalloc/test/src/btalloc_1.c b/deps/jemalloc/test/src/btalloc_1.c
new file mode 100644
index 000000000..4c126c309
--- /dev/null
+++ b/deps/jemalloc/test/src/btalloc_1.c
@@ -0,0 +1,3 @@
+#include "test/jemalloc_test.h"
+
+btalloc_n_gen(1)
diff --git a/deps/jemalloc/test/src/math.c b/deps/jemalloc/test/src/math.c
index 887a36390..1758c6778 100644
--- a/deps/jemalloc/test/src/math.c
+++ b/deps/jemalloc/test/src/math.c
@@ -1,2 +1,2 @@
-#define MATH_C_
+#define MATH_C_
#include "test/jemalloc_test.h"
diff --git a/deps/jemalloc/test/src/mq.c b/deps/jemalloc/test/src/mq.c
new file mode 100644
index 000000000..9b5f672d6
--- /dev/null
+++ b/deps/jemalloc/test/src/mq.c
@@ -0,0 +1,27 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Sleep for approximately ns nanoseconds. No lower *nor* upper bound on sleep
+ * time is guaranteed.
+ */
+void
+mq_nanosleep(unsigned ns) {
+ assert(ns <= 1000*1000*1000);
+
+#ifdef _WIN32
+ Sleep(ns / 1000);
+#else
+ {
+ struct timespec timeout;
+
+ if (ns < 1000*1000*1000) {
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = ns;
+ } else {
+ timeout.tv_sec = 1;
+ timeout.tv_nsec = 0;
+ }
+ nanosleep(&timeout, NULL);
+ }
+#endif
+}
diff --git a/deps/jemalloc/test/src/mtx.c b/deps/jemalloc/test/src/mtx.c
index 41b95d59d..a393c01fc 100644
--- a/deps/jemalloc/test/src/mtx.c
+++ b/deps/jemalloc/test/src/mtx.c
@@ -1,34 +1,40 @@
#include "test/jemalloc_test.h"
-bool
-mtx_init(mtx_t *mtx)
-{
+#ifndef _CRT_SPINCOUNT
+#define _CRT_SPINCOUNT 4000
+#endif
+bool
+mtx_init(mtx_t *mtx) {
#ifdef _WIN32
- if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
- return (true);
+ if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
+ _CRT_SPINCOUNT)) {
+ return true;
+ }
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ mtx->lock = OS_UNFAIR_LOCK_INIT;
#elif (defined(JEMALLOC_OSSPIN))
mtx->lock = 0;
#else
pthread_mutexattr_t attr;
- if (pthread_mutexattr_init(&attr) != 0)
- return (true);
+ if (pthread_mutexattr_init(&attr) != 0) {
+ return true;
+ }
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
if (pthread_mutex_init(&mtx->lock, &attr) != 0) {
pthread_mutexattr_destroy(&attr);
- return (true);
+ return true;
}
pthread_mutexattr_destroy(&attr);
#endif
- return (false);
+ return false;
}
void
-mtx_fini(mtx_t *mtx)
-{
-
+mtx_fini(mtx_t *mtx) {
#ifdef _WIN32
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
#elif (defined(JEMALLOC_OSSPIN))
#else
pthread_mutex_destroy(&mtx->lock);
@@ -36,11 +42,11 @@ mtx_fini(mtx_t *mtx)
}
void
-mtx_lock(mtx_t *mtx)
-{
-
+mtx_lock(mtx_t *mtx) {
#ifdef _WIN32
EnterCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ os_unfair_lock_lock(&mtx->lock);
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockLock(&mtx->lock);
#else
@@ -49,11 +55,11 @@ mtx_lock(mtx_t *mtx)
}
void
-mtx_unlock(mtx_t *mtx)
-{
-
+mtx_unlock(mtx_t *mtx) {
#ifdef _WIN32
LeaveCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+ os_unfair_lock_unlock(&mtx->lock);
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLockUnlock(&mtx->lock);
#else
diff --git a/deps/jemalloc/test/src/test.c b/deps/jemalloc/test/src/test.c
index 528d85831..01a4d7380 100644
--- a/deps/jemalloc/test/src/test.c
+++ b/deps/jemalloc/test/src/test.c
@@ -1,14 +1,70 @@
#include "test/jemalloc_test.h"
+/* Test status state. */
+
static unsigned test_count = 0;
static test_status_t test_counts[test_status_count] = {0, 0, 0};
static test_status_t test_status = test_status_pass;
static const char * test_name = "";
-JEMALLOC_ATTR(format(printf, 1, 2))
+/* Reentrancy testing helpers. */
+
+#define NUM_REENTRANT_ALLOCS 20
+typedef enum {
+ non_reentrant = 0,
+ libc_reentrant = 1,
+ arena_new_reentrant = 2
+} reentrancy_t;
+static reentrancy_t reentrancy;
+
+static bool libc_hook_ran = false;
+static bool arena_new_hook_ran = false;
+
+static const char *
+reentrancy_t_str(reentrancy_t r) {
+ switch (r) {
+ case non_reentrant:
+ return "non-reentrant";
+ case libc_reentrant:
+ return "libc-reentrant";
+ case arena_new_reentrant:
+ return "arena_new-reentrant";
+ default:
+ unreachable();
+ }
+}
+
+static void
+do_hook(bool *hook_ran, void (**hook)()) {
+ *hook_ran = true;
+ *hook = NULL;
+
+ size_t alloc_size = 1;
+ for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
+ free(malloc(alloc_size));
+ alloc_size *= 2;
+ }
+}
+
+static void
+libc_reentrancy_hook() {
+ do_hook(&libc_hook_ran, &hooks_libc_hook);
+}
+
+static void
+arena_new_reentrancy_hook() {
+ do_hook(&arena_new_hook_ran, &hooks_arena_new_hook);
+}
+
+/* Actual test infrastructure. */
+bool
+test_is_reentrant() {
+ return reentrancy != non_reentrant;
+}
+
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
-test_skip(const char *format, ...)
-{
+test_skip(const char *format, ...) {
va_list ap;
va_start(ap, format);
@@ -18,10 +74,9 @@ test_skip(const char *format, ...)
test_status = test_status_skip;
}
-JEMALLOC_ATTR(format(printf, 1, 2))
+JEMALLOC_FORMAT_PRINTF(1, 2)
void
-test_fail(const char *format, ...)
-{
+test_fail(const char *format, ...) {
va_list ap;
va_start(ap, format);
@@ -32,9 +87,7 @@ test_fail(const char *format, ...)
}
static const char *
-test_status_string(test_status_t test_status)
-{
-
+test_status_string(test_status_t test_status) {
switch (test_status) {
case test_status_pass: return "pass";
case test_status_skip: return "skip";
@@ -44,35 +97,64 @@ test_status_string(test_status_t test_status)
}
void
-p_test_init(const char *name)
-{
-
+p_test_init(const char *name) {
test_count++;
test_status = test_status_pass;
test_name = name;
}
void
-p_test_fini(void)
-{
-
+p_test_fini(void) {
test_counts[test_status]++;
- malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
+ malloc_printf("%s (%s): %s\n", test_name, reentrancy_t_str(reentrancy),
+ test_status_string(test_status));
}
-test_status_t
-p_test(test_t* t, ...)
-{
- test_status_t ret = test_status_pass;
- va_list ap;
+static test_status_t
+p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
+ test_status_t ret;
+
+ if (do_malloc_init) {
+ /*
+ * Make sure initialization occurs prior to running tests.
+ * Tests are special because they may use internal facilities
+ * prior to triggering initialization as a side effect of
+ * calling into the public API.
+ */
+ if (nallocx(1, 0) == 0) {
+ malloc_printf("Initialization error");
+ return test_status_fail;
+ }
+ }
- va_start(ap, t);
- for (; t != NULL; t = va_arg(ap, test_t*)) {
+ ret = test_status_pass;
+ for (; t != NULL; t = va_arg(ap, test_t *)) {
+ /* Non-reentrant run. */
+ reentrancy = non_reentrant;
+ hooks_arena_new_hook = hooks_libc_hook = NULL;
t();
- if (test_status > ret)
+ if (test_status > ret) {
ret = test_status;
+ }
+ /* Reentrant run. */
+ if (do_reentrant) {
+ reentrancy = libc_reentrant;
+ hooks_arena_new_hook = NULL;
+ hooks_libc_hook = &libc_reentrancy_hook;
+ t();
+ if (test_status > ret) {
+ ret = test_status;
+ }
+
+ reentrancy = arena_new_reentrant;
+ hooks_libc_hook = NULL;
+ hooks_arena_new_hook = &arena_new_reentrancy_hook;
+ t();
+ if (test_status > ret) {
+ ret = test_status;
+ }
+ }
}
- va_end(ap);
malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
test_status_string(test_status_pass),
@@ -82,13 +164,54 @@ p_test(test_t* t, ...)
test_status_string(test_status_fail),
test_counts[test_status_fail], test_count);
- return (ret);
+ return ret;
}
-void
-p_test_fail(const char *prefix, const char *message)
-{
+test_status_t
+p_test(test_t *t, ...) {
+ test_status_t ret;
+ va_list ap;
+ ret = test_status_pass;
+ va_start(ap, t);
+ ret = p_test_impl(true, true, t, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+test_status_t
+p_test_no_reentrancy(test_t *t, ...) {
+ test_status_t ret;
+ va_list ap;
+
+ ret = test_status_pass;
+ va_start(ap, t);
+ ret = p_test_impl(true, false, t, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+test_status_t
+p_test_no_malloc_init(test_t *t, ...) {
+ test_status_t ret;
+ va_list ap;
+
+ ret = test_status_pass;
+ va_start(ap, t);
+ /*
+ * We also omit reentrancy from bootstrapping tests, since we don't
+ * (yet) care about general reentrancy during bootstrapping.
+ */
+ ret = p_test_impl(false, false, t, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+void
+p_test_fail(const char *prefix, const char *message) {
malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
test_status = test_status_fail;
}
diff --git a/deps/jemalloc/test/src/thd.c b/deps/jemalloc/test/src/thd.c
index 233242a16..9a15eabbf 100644
--- a/deps/jemalloc/test/src/thd.c
+++ b/deps/jemalloc/test/src/thd.c
@@ -2,34 +2,33 @@
#ifdef _WIN32
void
-thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
-{
+thd_create(thd_t *thd, void *(*proc)(void *), void *arg) {
LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc;
*thd = CreateThread(NULL, 0, routine, arg, 0, NULL);
- if (*thd == NULL)
+ if (*thd == NULL) {
test_fail("Error in CreateThread()\n");
+ }
}
void
-thd_join(thd_t thd, void **ret)
-{
-
- WaitForSingleObject(thd, INFINITE);
+thd_join(thd_t thd, void **ret) {
+ if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
+ DWORD exit_code;
+ GetExitCodeThread(thd, (LPDWORD) &exit_code);
+ *ret = (void *)(uintptr_t)exit_code;
+ }
}
#else
void
-thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
-{
-
- if (pthread_create(thd, NULL, proc, arg) != 0)
+thd_create(thd_t *thd, void *(*proc)(void *), void *arg) {
+ if (pthread_create(thd, NULL, proc, arg) != 0) {
test_fail("Error in pthread_create()\n");
+ }
}
void
-thd_join(thd_t thd, void **ret)
-{
-
+thd_join(thd_t thd, void **ret) {
pthread_join(thd, ret);
}
#endif
diff --git a/deps/jemalloc/test/src/timer.c b/deps/jemalloc/test/src/timer.c
new file mode 100644
index 000000000..c451c6391
--- /dev/null
+++ b/deps/jemalloc/test/src/timer.c
@@ -0,0 +1,56 @@
+#include "test/jemalloc_test.h"
+
+void
+timer_start(timedelta_t *timer) {
+ nstime_init(&timer->t0, 0);
+ nstime_update(&timer->t0);
+}
+
+void
+timer_stop(timedelta_t *timer) {
+ nstime_copy(&timer->t1, &timer->t0);
+ nstime_update(&timer->t1);
+}
+
+uint64_t
+timer_usec(const timedelta_t *timer) {
+ nstime_t delta;
+
+ nstime_copy(&delta, &timer->t1);
+ nstime_subtract(&delta, &timer->t0);
+ return nstime_ns(&delta) / 1000;
+}
+
+void
+timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
+ uint64_t t0 = timer_usec(a);
+ uint64_t t1 = timer_usec(b);
+ uint64_t mult;
+ size_t i = 0;
+ size_t j, n;
+
+ /* Whole. */
+ n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
+ i += n;
+ if (i >= buflen) {
+ return;
+ }
+ mult = 1;
+ for (j = 0; j < n; j++) {
+ mult *= 10;
+ }
+
+ /* Decimal. */
+ n = malloc_snprintf(&buf[i], buflen-i, ".");
+ i += n;
+
+ /* Fraction. */
+ while (i < buflen-1) {
+ uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10
+ >= 5)) ? 1 : 0;
+ n = malloc_snprintf(&buf[i], buflen-i,
+ "%"FMTu64, (t0 * mult / t1) % 10 + round);
+ i += n;
+ mult *= 10;
+ }
+}
diff --git a/deps/jemalloc/test/stress/microbench.c b/deps/jemalloc/test/stress/microbench.c
new file mode 100644
index 000000000..988b7938f
--- /dev/null
+++ b/deps/jemalloc/test/stress/microbench.c
@@ -0,0 +1,165 @@
+#include "test/jemalloc_test.h"
+
+static inline void
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+ void (*func)(void)) {
+ uint64_t i;
+
+ for (i = 0; i < nwarmup; i++) {
+ func();
+ }
+ timer_start(timer);
+ for (i = 0; i < niter; i++) {
+ func();
+ }
+ timer_stop(timer);
+}
+
+void
+compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
+ void (*func_a), const char *name_b, void (*func_b)) {
+ timedelta_t timer_a, timer_b;
+ char ratio_buf[6];
+ void *p;
+
+ p = mallocx(1, 0);
+ if (p == NULL) {
+ test_fail("Unexpected mallocx() failure");
+ return;
+ }
+
+ time_func(&timer_a, nwarmup, niter, func_a);
+ time_func(&timer_b, nwarmup, niter, func_b);
+
+ timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
+ malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
+ "%s=%"FMTu64"us, ratio=1:%s\n",
+ niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
+ ratio_buf);
+
+ dallocx(p, 0);
+}
+
+static void
+malloc_free(void) {
+ /* The compiler can optimize away free(malloc(1))! */
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ free(p);
+}
+
+static void
+mallocx_free(void) {
+ void *p = mallocx(1, 0);
+ if (p == NULL) {
+ test_fail("Unexpected mallocx() failure");
+ return;
+ }
+ free(p);
+}
+
+TEST_BEGIN(test_malloc_vs_mallocx) {
+ compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
+ malloc_free, "mallocx", mallocx_free);
+}
+TEST_END
+
+static void
+malloc_dallocx(void) {
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ dallocx(p, 0);
+}
+
+static void
+malloc_sdallocx(void) {
+ void *p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ sdallocx(p, 1, 0);
+}
+
+TEST_BEGIN(test_free_vs_dallocx) {
+ compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
+ "dallocx", malloc_dallocx);
+}
+TEST_END
+
+TEST_BEGIN(test_dallocx_vs_sdallocx) {
+ compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
+ "sdallocx", malloc_sdallocx);
+}
+TEST_END
+
+static void
+malloc_mus_free(void) {
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ malloc_usable_size(p);
+ free(p);
+}
+
+static void
+malloc_sallocx_free(void) {
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ if (sallocx(p, 0) < 1) {
+ test_fail("Unexpected sallocx() failure");
+ }
+ free(p);
+}
+
+TEST_BEGIN(test_mus_vs_sallocx) {
+ compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
+ malloc_mus_free, "sallocx", malloc_sallocx_free);
+}
+TEST_END
+
+static void
+malloc_nallocx_free(void) {
+ void *p;
+
+ p = malloc(1);
+ if (p == NULL) {
+ test_fail("Unexpected malloc() failure");
+ return;
+ }
+ if (nallocx(1, 0) < 1) {
+ test_fail("Unexpected nallocx() failure");
+ }
+ free(p);
+}
+
+TEST_BEGIN(test_sallocx_vs_nallocx) {
+ compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
+ malloc_sallocx_free, "nallocx", malloc_nallocx_free);
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_malloc_vs_mallocx,
+ test_free_vs_dallocx,
+ test_dallocx_vs_sdallocx,
+ test_mus_vs_sallocx,
+ test_sallocx_vs_nallocx);
+}
diff --git a/deps/jemalloc/test/test.sh.in b/deps/jemalloc/test/test.sh.in
index a39f99f6b..39302fff4 100644
--- a/deps/jemalloc/test/test.sh.in
+++ b/deps/jemalloc/test/test.sh.in
@@ -11,6 +11,18 @@ case @abi@ in
;;
esac
+# Make a copy of the @JEMALLOC_CPREFIX@MALLOC_CONF passed in to this script, so
+# it can be repeatedly concatenated with per test settings.
+export MALLOC_CONF_ALL=${@JEMALLOC_CPREFIX@MALLOC_CONF}
+# Concatenate the individual test's MALLOC_CONF and MALLOC_CONF_ALL.
+export_malloc_conf() {
+ if [ "x${MALLOC_CONF}" != "x" -a "x${MALLOC_CONF_ALL}" != "x" ] ; then
+ export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF},${MALLOC_CONF_ALL}"
+ else
+ export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF}${MALLOC_CONF_ALL}"
+ fi
+}
+
# Corresponds to test_status_t.
pass_code=0
skip_code=1
@@ -24,7 +36,21 @@ for t in $@; do
echo
fi
echo "=== ${t} ==="
- ${t}@exe@ @abs_srcroot@ @abs_objroot@
+ if [ -e "@srcroot@${t}.sh" ] ; then
+ # Source the shell script corresponding to the test in a subshell and
+ # execute the test. This allows the shell script to set MALLOC_CONF, which
+ # is then used to set @JEMALLOC_CPREFIX@MALLOC_CONF (thus allowing the
+ # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
+ enable_fill=@enable_fill@ \
+ enable_prof=@enable_prof@ \
+ . @srcroot@${t}.sh && \
+ export_malloc_conf && \
+ $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
+ else
+ export MALLOC_CONF= && \
+ export_malloc_conf && \
+ $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
+ fi
result_code=$?
case ${result_code} in
${pass_code})
@@ -37,7 +63,8 @@ for t in $@; do
fail_count=$((fail_count+1))
;;
*)
- echo "Test harness error" 1>&2
+ echo "Test harness error: ${t} w/ MALLOC_CONF=\"${MALLOC_CONF}\"" 1>&2
+ echo "Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh ${t}" 1>&2
exit 1
esac
done
diff --git a/deps/jemalloc/test/unit/SFMT.c b/deps/jemalloc/test/unit/SFMT.c
index c57bd68df..1fc8cf1bc 100644
--- a/deps/jemalloc/test/unit/SFMT.c
+++ b/deps/jemalloc/test/unit/SFMT.c
@@ -35,10 +35,10 @@
*/
#include "test/jemalloc_test.h"
-#define BLOCK_SIZE 10000
-#define BLOCK_SIZE64 (BLOCK_SIZE / 2)
-#define COUNT_1 1000
-#define COUNT_2 700
+#define BLOCK_SIZE 10000
+#define BLOCK_SIZE64 (BLOCK_SIZE / 2)
+#define COUNT_1 1000
+#define COUNT_2 700
static const uint32_t init_gen_rand_32_expected[] = {
3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U,
@@ -445,1012 +445,1011 @@ static const uint32_t init_by_array_32_expected[] = {
2750138839U, 3518055702U, 733072558U, 4169325400U, 788493625U
};
static const uint64_t init_gen_rand_64_expected[] = {
- QU(16924766246869039260LLU), QU( 8201438687333352714LLU),
- QU( 2265290287015001750LLU), QU(18397264611805473832LLU),
- QU( 3375255223302384358LLU), QU( 6345559975416828796LLU),
- QU(18229739242790328073LLU), QU( 7596792742098800905LLU),
- QU( 255338647169685981LLU), QU( 2052747240048610300LLU),
- QU(18328151576097299343LLU), QU(12472905421133796567LLU),
- QU(11315245349717600863LLU), QU(16594110197775871209LLU),
- QU(15708751964632456450LLU), QU(10452031272054632535LLU),
- QU(11097646720811454386LLU), QU( 4556090668445745441LLU),
- QU(17116187693090663106LLU), QU(14931526836144510645LLU),
- QU( 9190752218020552591LLU), QU( 9625800285771901401LLU),
- QU(13995141077659972832LLU), QU( 5194209094927829625LLU),
- QU( 4156788379151063303LLU), QU( 8523452593770139494LLU),
- QU(14082382103049296727LLU), QU( 2462601863986088483LLU),
- QU( 3030583461592840678LLU), QU( 5221622077872827681LLU),
- QU( 3084210671228981236LLU), QU(13956758381389953823LLU),
- QU(13503889856213423831LLU), QU(15696904024189836170LLU),
- QU( 4612584152877036206LLU), QU( 6231135538447867881LLU),
- QU(10172457294158869468LLU), QU( 6452258628466708150LLU),
- QU(14044432824917330221LLU), QU( 370168364480044279LLU),
- QU(10102144686427193359LLU), QU( 667870489994776076LLU),
- QU( 2732271956925885858LLU), QU(18027788905977284151LLU),
- QU(15009842788582923859LLU), QU( 7136357960180199542LLU),
- QU(15901736243475578127LLU), QU(16951293785352615701LLU),
- QU(10551492125243691632LLU), QU(17668869969146434804LLU),
- QU(13646002971174390445LLU), QU( 9804471050759613248LLU),
- QU( 5511670439655935493LLU), QU(18103342091070400926LLU),
- QU(17224512747665137533LLU), QU(15534627482992618168LLU),
- QU( 1423813266186582647LLU), QU(15821176807932930024LLU),
- QU( 30323369733607156LLU), QU(11599382494723479403LLU),
- QU( 653856076586810062LLU), QU( 3176437395144899659LLU),
- QU(14028076268147963917LLU), QU(16156398271809666195LLU),
- QU( 3166955484848201676LLU), QU( 5746805620136919390LLU),
- QU(17297845208891256593LLU), QU(11691653183226428483LLU),
- QU(17900026146506981577LLU), QU(15387382115755971042LLU),
- QU(16923567681040845943LLU), QU( 8039057517199388606LLU),
- QU(11748409241468629263LLU), QU( 794358245539076095LLU),
- QU(13438501964693401242LLU), QU(14036803236515618962LLU),
- QU( 5252311215205424721LLU), QU(17806589612915509081LLU),
- QU( 6802767092397596006LLU), QU(14212120431184557140LLU),
- QU( 1072951366761385712LLU), QU(13098491780722836296LLU),
- QU( 9466676828710797353LLU), QU(12673056849042830081LLU),
- QU(12763726623645357580LLU), QU(16468961652999309493LLU),
- QU(15305979875636438926LLU), QU(17444713151223449734LLU),
- QU( 5692214267627883674LLU), QU(13049589139196151505LLU),
- QU( 880115207831670745LLU), QU( 1776529075789695498LLU),
- QU(16695225897801466485LLU), QU(10666901778795346845LLU),
- QU( 6164389346722833869LLU), QU( 2863817793264300475LLU),
- QU( 9464049921886304754LLU), QU( 3993566636740015468LLU),
- QU( 9983749692528514136LLU), QU(16375286075057755211LLU),
- QU(16042643417005440820LLU), QU(11445419662923489877LLU),
- QU( 7999038846885158836LLU), QU( 6721913661721511535LLU),
- QU( 5363052654139357320LLU), QU( 1817788761173584205LLU),
- QU(13290974386445856444LLU), QU( 4650350818937984680LLU),
- QU( 8219183528102484836LLU), QU( 1569862923500819899LLU),
- QU( 4189359732136641860LLU), QU(14202822961683148583LLU),
- QU( 4457498315309429058LLU), QU(13089067387019074834LLU),
- QU(11075517153328927293LLU), QU(10277016248336668389LLU),
- QU( 7070509725324401122LLU), QU(17808892017780289380LLU),
- QU(13143367339909287349LLU), QU( 1377743745360085151LLU),
- QU( 5749341807421286485LLU), QU(14832814616770931325LLU),
- QU( 7688820635324359492LLU), QU(10960474011539770045LLU),
- QU( 81970066653179790LLU), QU(12619476072607878022LLU),
- QU( 4419566616271201744LLU), QU(15147917311750568503LLU),
- QU( 5549739182852706345LLU), QU( 7308198397975204770LLU),
- QU(13580425496671289278LLU), QU(17070764785210130301LLU),
- QU( 8202832846285604405LLU), QU( 6873046287640887249LLU),
- QU( 6927424434308206114LLU), QU( 6139014645937224874LLU),
- QU(10290373645978487639LLU), QU(15904261291701523804LLU),
- QU( 9628743442057826883LLU), QU(18383429096255546714LLU),
- QU( 4977413265753686967LLU), QU( 7714317492425012869LLU),
- QU( 9025232586309926193LLU), QU(14627338359776709107LLU),
- QU(14759849896467790763LLU), QU(10931129435864423252LLU),
- QU( 4588456988775014359LLU), QU(10699388531797056724LLU),
- QU( 468652268869238792LLU), QU( 5755943035328078086LLU),
- QU( 2102437379988580216LLU), QU( 9986312786506674028LLU),
- QU( 2654207180040945604LLU), QU( 8726634790559960062LLU),
- QU( 100497234871808137LLU), QU( 2800137176951425819LLU),
- QU( 6076627612918553487LLU), QU( 5780186919186152796LLU),
- QU( 8179183595769929098LLU), QU( 6009426283716221169LLU),
- QU( 2796662551397449358LLU), QU( 1756961367041986764LLU),
- QU( 6972897917355606205LLU), QU(14524774345368968243LLU),
- QU( 2773529684745706940LLU), QU( 4853632376213075959LLU),
- QU( 4198177923731358102LLU), QU( 8271224913084139776LLU),
- QU( 2741753121611092226LLU), QU(16782366145996731181LLU),
- QU(15426125238972640790LLU), QU(13595497100671260342LLU),
- QU( 3173531022836259898LLU), QU( 6573264560319511662LLU),
- QU(18041111951511157441LLU), QU( 2351433581833135952LLU),
- QU( 3113255578908173487LLU), QU( 1739371330877858784LLU),
- QU(16046126562789165480LLU), QU( 8072101652214192925LLU),
- QU(15267091584090664910LLU), QU( 9309579200403648940LLU),
- QU( 5218892439752408722LLU), QU(14492477246004337115LLU),
- QU(17431037586679770619LLU), QU( 7385248135963250480LLU),
- QU( 9580144956565560660LLU), QU( 4919546228040008720LLU),
- QU(15261542469145035584LLU), QU(18233297270822253102LLU),
- QU( 5453248417992302857LLU), QU( 9309519155931460285LLU),
- QU(10342813012345291756LLU), QU(15676085186784762381LLU),
- QU(15912092950691300645LLU), QU( 9371053121499003195LLU),
- QU( 9897186478226866746LLU), QU(14061858287188196327LLU),
- QU( 122575971620788119LLU), QU(12146750969116317754LLU),
- QU( 4438317272813245201LLU), QU( 8332576791009527119LLU),
- QU(13907785691786542057LLU), QU(10374194887283287467LLU),
- QU( 2098798755649059566LLU), QU( 3416235197748288894LLU),
- QU( 8688269957320773484LLU), QU( 7503964602397371571LLU),
- QU(16724977015147478236LLU), QU( 9461512855439858184LLU),
- QU(13259049744534534727LLU), QU( 3583094952542899294LLU),
- QU( 8764245731305528292LLU), QU(13240823595462088985LLU),
- QU(13716141617617910448LLU), QU(18114969519935960955LLU),
- QU( 2297553615798302206LLU), QU( 4585521442944663362LLU),
- QU(17776858680630198686LLU), QU( 4685873229192163363LLU),
- QU( 152558080671135627LLU), QU(15424900540842670088LLU),
- QU(13229630297130024108LLU), QU(17530268788245718717LLU),
- QU(16675633913065714144LLU), QU( 3158912717897568068LLU),
- QU(15399132185380087288LLU), QU( 7401418744515677872LLU),
- QU(13135412922344398535LLU), QU( 6385314346100509511LLU),
- QU(13962867001134161139LLU), QU(10272780155442671999LLU),
- QU(12894856086597769142LLU), QU(13340877795287554994LLU),
- QU(12913630602094607396LLU), QU(12543167911119793857LLU),
- QU(17343570372251873096LLU), QU(10959487764494150545LLU),
- QU( 6966737953093821128LLU), QU(13780699135496988601LLU),
- QU( 4405070719380142046LLU), QU(14923788365607284982LLU),
- QU( 2869487678905148380LLU), QU( 6416272754197188403LLU),
- QU(15017380475943612591LLU), QU( 1995636220918429487LLU),
- QU( 3402016804620122716LLU), QU(15800188663407057080LLU),
- QU(11362369990390932882LLU), QU(15262183501637986147LLU),
- QU(10239175385387371494LLU), QU( 9352042420365748334LLU),
- QU( 1682457034285119875LLU), QU( 1724710651376289644LLU),
- QU( 2038157098893817966LLU), QU( 9897825558324608773LLU),
- QU( 1477666236519164736LLU), QU(16835397314511233640LLU),
- QU(10370866327005346508LLU), QU(10157504370660621982LLU),
- QU(12113904045335882069LLU), QU(13326444439742783008LLU),
- QU(11302769043000765804LLU), QU(13594979923955228484LLU),
- QU(11779351762613475968LLU), QU( 3786101619539298383LLU),
- QU( 8021122969180846063LLU), QU(15745904401162500495LLU),
- QU(10762168465993897267LLU), QU(13552058957896319026LLU),
- QU(11200228655252462013LLU), QU( 5035370357337441226LLU),
- QU( 7593918984545500013LLU), QU( 5418554918361528700LLU),
- QU( 4858270799405446371LLU), QU( 9974659566876282544LLU),
- QU(18227595922273957859LLU), QU( 2772778443635656220LLU),
- QU(14285143053182085385LLU), QU( 9939700992429600469LLU),
- QU(12756185904545598068LLU), QU( 2020783375367345262LLU),
- QU( 57026775058331227LLU), QU( 950827867930065454LLU),
- QU( 6602279670145371217LLU), QU( 2291171535443566929LLU),
- QU( 5832380724425010313LLU), QU( 1220343904715982285LLU),
- QU(17045542598598037633LLU), QU(15460481779702820971LLU),
- QU(13948388779949365130LLU), QU(13975040175430829518LLU),
- QU(17477538238425541763LLU), QU(11104663041851745725LLU),
- QU(15860992957141157587LLU), QU(14529434633012950138LLU),
- QU( 2504838019075394203LLU), QU( 7512113882611121886LLU),
- QU( 4859973559980886617LLU), QU( 1258601555703250219LLU),
- QU(15594548157514316394LLU), QU( 4516730171963773048LLU),
- QU(11380103193905031983LLU), QU( 6809282239982353344LLU),
- QU(18045256930420065002LLU), QU( 2453702683108791859LLU),
- QU( 977214582986981460LLU), QU( 2006410402232713466LLU),
- QU( 6192236267216378358LLU), QU( 3429468402195675253LLU),
- QU(18146933153017348921LLU), QU(17369978576367231139LLU),
- QU( 1246940717230386603LLU), QU(11335758870083327110LLU),
- QU(14166488801730353682LLU), QU( 9008573127269635732LLU),
- QU(10776025389820643815LLU), QU(15087605441903942962LLU),
- QU( 1359542462712147922LLU), QU(13898874411226454206LLU),
- QU(17911176066536804411LLU), QU( 9435590428600085274LLU),
- QU( 294488509967864007LLU), QU( 8890111397567922046LLU),
- QU( 7987823476034328778LLU), QU(13263827582440967651LLU),
- QU( 7503774813106751573LLU), QU(14974747296185646837LLU),
- QU( 8504765037032103375LLU), QU(17340303357444536213LLU),
- QU( 7704610912964485743LLU), QU( 8107533670327205061LLU),
- QU( 9062969835083315985LLU), QU(16968963142126734184LLU),
- QU(12958041214190810180LLU), QU( 2720170147759570200LLU),
- QU( 2986358963942189566LLU), QU(14884226322219356580LLU),
- QU( 286224325144368520LLU), QU(11313800433154279797LLU),
- QU(18366849528439673248LLU), QU(17899725929482368789LLU),
- QU( 3730004284609106799LLU), QU( 1654474302052767205LLU),
- QU( 5006698007047077032LLU), QU( 8196893913601182838LLU),
- QU(15214541774425211640LLU), QU(17391346045606626073LLU),
- QU( 8369003584076969089LLU), QU( 3939046733368550293LLU),
- QU(10178639720308707785LLU), QU( 2180248669304388697LLU),
- QU( 62894391300126322LLU), QU( 9205708961736223191LLU),
- QU( 6837431058165360438LLU), QU( 3150743890848308214LLU),
- QU(17849330658111464583LLU), QU(12214815643135450865LLU),
- QU(13410713840519603402LLU), QU( 3200778126692046802LLU),
- QU(13354780043041779313LLU), QU( 800850022756886036LLU),
- QU(15660052933953067433LLU), QU( 6572823544154375676LLU),
- QU(11030281857015819266LLU), QU(12682241941471433835LLU),
- QU(11654136407300274693LLU), QU( 4517795492388641109LLU),
- QU( 9757017371504524244LLU), QU(17833043400781889277LLU),
- QU(12685085201747792227LLU), QU(10408057728835019573LLU),
- QU( 98370418513455221LLU), QU( 6732663555696848598LLU),
- QU(13248530959948529780LLU), QU( 3530441401230622826LLU),
- QU(18188251992895660615LLU), QU( 1847918354186383756LLU),
- QU( 1127392190402660921LLU), QU(11293734643143819463LLU),
- QU( 3015506344578682982LLU), QU(13852645444071153329LLU),
- QU( 2121359659091349142LLU), QU( 1294604376116677694LLU),
- QU( 5616576231286352318LLU), QU( 7112502442954235625LLU),
- QU(11676228199551561689LLU), QU(12925182803007305359LLU),
- QU( 7852375518160493082LLU), QU( 1136513130539296154LLU),
- QU( 5636923900916593195LLU), QU( 3221077517612607747LLU),
- QU(17784790465798152513LLU), QU( 3554210049056995938LLU),
- QU(17476839685878225874LLU), QU( 3206836372585575732LLU),
- QU( 2765333945644823430LLU), QU(10080070903718799528LLU),
- QU( 5412370818878286353LLU), QU( 9689685887726257728LLU),
- QU( 8236117509123533998LLU), QU( 1951139137165040214LLU),
- QU( 4492205209227980349LLU), QU(16541291230861602967LLU),
- QU( 1424371548301437940LLU), QU( 9117562079669206794LLU),
- QU(14374681563251691625LLU), QU(13873164030199921303LLU),
- QU( 6680317946770936731LLU), QU(15586334026918276214LLU),
- QU(10896213950976109802LLU), QU( 9506261949596413689LLU),
- QU( 9903949574308040616LLU), QU( 6038397344557204470LLU),
- QU( 174601465422373648LLU), QU(15946141191338238030LLU),
- QU(17142225620992044937LLU), QU( 7552030283784477064LLU),
- QU( 2947372384532947997LLU), QU( 510797021688197711LLU),
- QU( 4962499439249363461LLU), QU( 23770320158385357LLU),
- QU( 959774499105138124LLU), QU( 1468396011518788276LLU),
- QU( 2015698006852312308LLU), QU( 4149400718489980136LLU),
- QU( 5992916099522371188LLU), QU(10819182935265531076LLU),
- QU(16189787999192351131LLU), QU( 342833961790261950LLU),
- QU(12470830319550495336LLU), QU(18128495041912812501LLU),
- QU( 1193600899723524337LLU), QU( 9056793666590079770LLU),
- QU( 2154021227041669041LLU), QU( 4963570213951235735LLU),
- QU( 4865075960209211409LLU), QU( 2097724599039942963LLU),
- QU( 2024080278583179845LLU), QU(11527054549196576736LLU),
- QU(10650256084182390252LLU), QU( 4808408648695766755LLU),
- QU( 1642839215013788844LLU), QU(10607187948250398390LLU),
- QU( 7076868166085913508LLU), QU( 730522571106887032LLU),
- QU(12500579240208524895LLU), QU( 4484390097311355324LLU),
- QU(15145801330700623870LLU), QU( 8055827661392944028LLU),
- QU( 5865092976832712268LLU), QU(15159212508053625143LLU),
- QU( 3560964582876483341LLU), QU( 4070052741344438280LLU),
- QU( 6032585709886855634LLU), QU(15643262320904604873LLU),
- QU( 2565119772293371111LLU), QU( 318314293065348260LLU),
- QU(15047458749141511872LLU), QU( 7772788389811528730LLU),
- QU( 7081187494343801976LLU), QU( 6465136009467253947LLU),
- QU(10425940692543362069LLU), QU( 554608190318339115LLU),
- QU(14796699860302125214LLU), QU( 1638153134431111443LLU),
- QU(10336967447052276248LLU), QU( 8412308070396592958LLU),
- QU( 4004557277152051226LLU), QU( 8143598997278774834LLU),
- QU(16413323996508783221LLU), QU(13139418758033994949LLU),
- QU( 9772709138335006667LLU), QU( 2818167159287157659LLU),
- QU(17091740573832523669LLU), QU(14629199013130751608LLU),
- QU(18268322711500338185LLU), QU( 8290963415675493063LLU),
- QU( 8830864907452542588LLU), QU( 1614839084637494849LLU),
- QU(14855358500870422231LLU), QU( 3472996748392519937LLU),
- QU(15317151166268877716LLU), QU( 5825895018698400362LLU),
- QU(16730208429367544129LLU), QU(10481156578141202800LLU),
- QU( 4746166512382823750LLU), QU(12720876014472464998LLU),
- QU( 8825177124486735972LLU), QU(13733447296837467838LLU),
- QU( 6412293741681359625LLU), QU( 8313213138756135033LLU),
- QU(11421481194803712517LLU), QU( 7997007691544174032LLU),
- QU( 6812963847917605930LLU), QU( 9683091901227558641LLU),
- QU(14703594165860324713LLU), QU( 1775476144519618309LLU),
- QU( 2724283288516469519LLU), QU( 717642555185856868LLU),
- QU( 8736402192215092346LLU), QU(11878800336431381021LLU),
- QU( 4348816066017061293LLU), QU( 6115112756583631307LLU),
- QU( 9176597239667142976LLU), QU(12615622714894259204LLU),
- QU(10283406711301385987LLU), QU( 5111762509485379420LLU),
- QU( 3118290051198688449LLU), QU( 7345123071632232145LLU),
- QU( 9176423451688682359LLU), QU( 4843865456157868971LLU),
- QU(12008036363752566088LLU), QU(12058837181919397720LLU),
- QU( 2145073958457347366LLU), QU( 1526504881672818067LLU),
- QU( 3488830105567134848LLU), QU(13208362960674805143LLU),
- QU( 4077549672899572192LLU), QU( 7770995684693818365LLU),
- QU( 1398532341546313593LLU), QU(12711859908703927840LLU),
- QU( 1417561172594446813LLU), QU(17045191024194170604LLU),
- QU( 4101933177604931713LLU), QU(14708428834203480320LLU),
- QU(17447509264469407724LLU), QU(14314821973983434255LLU),
- QU(17990472271061617265LLU), QU( 5087756685841673942LLU),
- QU(12797820586893859939LLU), QU( 1778128952671092879LLU),
- QU( 3535918530508665898LLU), QU( 9035729701042481301LLU),
- QU(14808661568277079962LLU), QU(14587345077537747914LLU),
- QU(11920080002323122708LLU), QU( 6426515805197278753LLU),
- QU( 3295612216725984831LLU), QU(11040722532100876120LLU),
- QU(12305952936387598754LLU), QU(16097391899742004253LLU),
- QU( 4908537335606182208LLU), QU(12446674552196795504LLU),
- QU(16010497855816895177LLU), QU( 9194378874788615551LLU),
- QU( 3382957529567613384LLU), QU( 5154647600754974077LLU),
- QU( 9801822865328396141LLU), QU( 9023662173919288143LLU),
- QU(17623115353825147868LLU), QU( 8238115767443015816LLU),
- QU(15811444159859002560LLU), QU( 9085612528904059661LLU),
- QU( 6888601089398614254LLU), QU( 258252992894160189LLU),
- QU( 6704363880792428622LLU), QU( 6114966032147235763LLU),
- QU(11075393882690261875LLU), QU( 8797664238933620407LLU),
- QU( 5901892006476726920LLU), QU( 5309780159285518958LLU),
- QU(14940808387240817367LLU), QU(14642032021449656698LLU),
- QU( 9808256672068504139LLU), QU( 3670135111380607658LLU),
- QU(11211211097845960152LLU), QU( 1474304506716695808LLU),
- QU(15843166204506876239LLU), QU( 7661051252471780561LLU),
- QU(10170905502249418476LLU), QU( 7801416045582028589LLU),
- QU( 2763981484737053050LLU), QU( 9491377905499253054LLU),
- QU(16201395896336915095LLU), QU( 9256513756442782198LLU),
- QU( 5411283157972456034LLU), QU( 5059433122288321676LLU),
- QU( 4327408006721123357LLU), QU( 9278544078834433377LLU),
- QU( 7601527110882281612LLU), QU(11848295896975505251LLU),
- QU(12096998801094735560LLU), QU(14773480339823506413LLU),
- QU(15586227433895802149LLU), QU(12786541257830242872LLU),
- QU( 6904692985140503067LLU), QU( 5309011515263103959LLU),
- QU(12105257191179371066LLU), QU(14654380212442225037LLU),
- QU( 2556774974190695009LLU), QU( 4461297399927600261LLU),
- QU(14888225660915118646LLU), QU(14915459341148291824LLU),
- QU( 2738802166252327631LLU), QU( 6047155789239131512LLU),
- QU(12920545353217010338LLU), QU(10697617257007840205LLU),
- QU( 2751585253158203504LLU), QU(13252729159780047496LLU),
- QU(14700326134672815469LLU), QU(14082527904374600529LLU),
- QU(16852962273496542070LLU), QU(17446675504235853907LLU),
- QU(15019600398527572311LLU), QU(12312781346344081551LLU),
- QU(14524667935039810450LLU), QU( 5634005663377195738LLU),
- QU(11375574739525000569LLU), QU( 2423665396433260040LLU),
- QU( 5222836914796015410LLU), QU( 4397666386492647387LLU),
- QU( 4619294441691707638LLU), QU( 665088602354770716LLU),
- QU(13246495665281593610LLU), QU( 6564144270549729409LLU),
- QU(10223216188145661688LLU), QU( 3961556907299230585LLU),
- QU(11543262515492439914LLU), QU(16118031437285993790LLU),
- QU( 7143417964520166465LLU), QU(13295053515909486772LLU),
- QU( 40434666004899675LLU), QU(17127804194038347164LLU),
- QU( 8599165966560586269LLU), QU( 8214016749011284903LLU),
- QU(13725130352140465239LLU), QU( 5467254474431726291LLU),
- QU( 7748584297438219877LLU), QU(16933551114829772472LLU),
- QU( 2169618439506799400LLU), QU( 2169787627665113463LLU),
- QU(17314493571267943764LLU), QU(18053575102911354912LLU),
- QU(11928303275378476973LLU), QU(11593850925061715550LLU),
- QU(17782269923473589362LLU), QU( 3280235307704747039LLU),
- QU( 6145343578598685149LLU), QU(17080117031114086090LLU),
- QU(18066839902983594755LLU), QU( 6517508430331020706LLU),
- QU( 8092908893950411541LLU), QU(12558378233386153732LLU),
- QU( 4476532167973132976LLU), QU(16081642430367025016LLU),
- QU( 4233154094369139361LLU), QU( 8693630486693161027LLU),
- QU(11244959343027742285LLU), QU(12273503967768513508LLU),
- QU(14108978636385284876LLU), QU( 7242414665378826984LLU),
- QU( 6561316938846562432LLU), QU( 8601038474994665795LLU),
- QU(17532942353612365904LLU), QU(17940076637020912186LLU),
- QU( 7340260368823171304LLU), QU( 7061807613916067905LLU),
- QU(10561734935039519326LLU), QU(17990796503724650862LLU),
- QU( 6208732943911827159LLU), QU( 359077562804090617LLU),
- QU(14177751537784403113LLU), QU(10659599444915362902LLU),
- QU(15081727220615085833LLU), QU(13417573895659757486LLU),
- QU(15513842342017811524LLU), QU(11814141516204288231LLU),
- QU( 1827312513875101814LLU), QU( 2804611699894603103LLU),
- QU(17116500469975602763LLU), QU(12270191815211952087LLU),
- QU(12256358467786024988LLU), QU(18435021722453971267LLU),
- QU( 671330264390865618LLU), QU( 476504300460286050LLU),
- QU(16465470901027093441LLU), QU( 4047724406247136402LLU),
- QU( 1322305451411883346LLU), QU( 1388308688834322280LLU),
- QU( 7303989085269758176LLU), QU( 9323792664765233642LLU),
- QU( 4542762575316368936LLU), QU(17342696132794337618LLU),
- QU( 4588025054768498379LLU), QU(13415475057390330804LLU),
- QU(17880279491733405570LLU), QU(10610553400618620353LLU),
- QU( 3180842072658960139LLU), QU(13002966655454270120LLU),
- QU( 1665301181064982826LLU), QU( 7083673946791258979LLU),
- QU( 190522247122496820LLU), QU(17388280237250677740LLU),
- QU( 8430770379923642945LLU), QU(12987180971921668584LLU),
- QU( 2311086108365390642LLU), QU( 2870984383579822345LLU),
- QU(14014682609164653318LLU), QU(14467187293062251484LLU),
- QU( 192186361147413298LLU), QU(15171951713531796524LLU),
- QU( 9900305495015948728LLU), QU(17958004775615466344LLU),
- QU(14346380954498606514LLU), QU(18040047357617407096LLU),
- QU( 5035237584833424532LLU), QU(15089555460613972287LLU),
- QU( 4131411873749729831LLU), QU( 1329013581168250330LLU),
- QU(10095353333051193949LLU), QU(10749518561022462716LLU),
- QU( 9050611429810755847LLU), QU(15022028840236655649LLU),
- QU( 8775554279239748298LLU), QU(13105754025489230502LLU),
- QU(15471300118574167585LLU), QU( 89864764002355628LLU),
- QU( 8776416323420466637LLU), QU( 5280258630612040891LLU),
- QU( 2719174488591862912LLU), QU( 7599309137399661994LLU),
- QU(15012887256778039979LLU), QU(14062981725630928925LLU),
- QU(12038536286991689603LLU), QU( 7089756544681775245LLU),
- QU(10376661532744718039LLU), QU( 1265198725901533130LLU),
- QU(13807996727081142408LLU), QU( 2935019626765036403LLU),
- QU( 7651672460680700141LLU), QU( 3644093016200370795LLU),
- QU( 2840982578090080674LLU), QU(17956262740157449201LLU),
- QU(18267979450492880548LLU), QU(11799503659796848070LLU),
- QU( 9942537025669672388LLU), QU(11886606816406990297LLU),
- QU( 5488594946437447576LLU), QU( 7226714353282744302LLU),
- QU( 3784851653123877043LLU), QU( 878018453244803041LLU),
- QU(12110022586268616085LLU), QU( 734072179404675123LLU),
- QU(11869573627998248542LLU), QU( 469150421297783998LLU),
- QU( 260151124912803804LLU), QU(11639179410120968649LLU),
- QU( 9318165193840846253LLU), QU(12795671722734758075LLU),
- QU(15318410297267253933LLU), QU( 691524703570062620LLU),
- QU( 5837129010576994601LLU), QU(15045963859726941052LLU),
- QU( 5850056944932238169LLU), QU(12017434144750943807LLU),
- QU( 7447139064928956574LLU), QU( 3101711812658245019LLU),
- QU(16052940704474982954LLU), QU(18195745945986994042LLU),
- QU( 8932252132785575659LLU), QU(13390817488106794834LLU),
- QU(11582771836502517453LLU), QU( 4964411326683611686LLU),
- QU( 2195093981702694011LLU), QU(14145229538389675669LLU),
- QU(16459605532062271798LLU), QU( 866316924816482864LLU),
- QU( 4593041209937286377LLU), QU( 8415491391910972138LLU),
- QU( 4171236715600528969LLU), QU(16637569303336782889LLU),
- QU( 2002011073439212680LLU), QU(17695124661097601411LLU),
- QU( 4627687053598611702LLU), QU( 7895831936020190403LLU),
- QU( 8455951300917267802LLU), QU( 2923861649108534854LLU),
- QU( 8344557563927786255LLU), QU( 6408671940373352556LLU),
- QU(12210227354536675772LLU), QU(14294804157294222295LLU),
- QU(10103022425071085127LLU), QU(10092959489504123771LLU),
- QU( 6554774405376736268LLU), QU(12629917718410641774LLU),
- QU( 6260933257596067126LLU), QU( 2460827021439369673LLU),
- QU( 2541962996717103668LLU), QU( 597377203127351475LLU),
- QU( 5316984203117315309LLU), QU( 4811211393563241961LLU),
- QU(13119698597255811641LLU), QU( 8048691512862388981LLU),
- QU(10216818971194073842LLU), QU( 4612229970165291764LLU),
- QU(10000980798419974770LLU), QU( 6877640812402540687LLU),
- QU( 1488727563290436992LLU), QU( 2227774069895697318LLU),
- QU(11237754507523316593LLU), QU(13478948605382290972LLU),
- QU( 1963583846976858124LLU), QU( 5512309205269276457LLU),
- QU( 3972770164717652347LLU), QU( 3841751276198975037LLU),
- QU(10283343042181903117LLU), QU( 8564001259792872199LLU),
- QU(16472187244722489221LLU), QU( 8953493499268945921LLU),
- QU( 3518747340357279580LLU), QU( 4003157546223963073LLU),
- QU( 3270305958289814590LLU), QU( 3966704458129482496LLU),
- QU( 8122141865926661939LLU), QU(14627734748099506653LLU),
- QU(13064426990862560568LLU), QU( 2414079187889870829LLU),
- QU( 5378461209354225306LLU), QU(10841985740128255566LLU),
- QU( 538582442885401738LLU), QU( 7535089183482905946LLU),
- QU(16117559957598879095LLU), QU( 8477890721414539741LLU),
- QU( 1459127491209533386LLU), QU(17035126360733620462LLU),
- QU( 8517668552872379126LLU), QU(10292151468337355014LLU),
- QU(17081267732745344157LLU), QU(13751455337946087178LLU),
- QU(14026945459523832966LLU), QU( 6653278775061723516LLU),
- QU(10619085543856390441LLU), QU( 2196343631481122885LLU),
- QU(10045966074702826136LLU), QU(10082317330452718282LLU),
- QU( 5920859259504831242LLU), QU( 9951879073426540617LLU),
- QU( 7074696649151414158LLU), QU(15808193543879464318LLU),
- QU( 7385247772746953374LLU), QU( 3192003544283864292LLU),
- QU(18153684490917593847LLU), QU(12423498260668568905LLU),
- QU(10957758099756378169LLU), QU(11488762179911016040LLU),
- QU( 2099931186465333782LLU), QU(11180979581250294432LLU),
- QU( 8098916250668367933LLU), QU( 3529200436790763465LLU),
- QU(12988418908674681745LLU), QU( 6147567275954808580LLU),
- QU( 3207503344604030989LLU), QU(10761592604898615360LLU),
- QU( 229854861031893504LLU), QU( 8809853962667144291LLU),
- QU(13957364469005693860LLU), QU( 7634287665224495886LLU),
- QU(12353487366976556874LLU), QU( 1134423796317152034LLU),
- QU( 2088992471334107068LLU), QU( 7393372127190799698LLU),
- QU( 1845367839871058391LLU), QU( 207922563987322884LLU),
- QU(11960870813159944976LLU), QU(12182120053317317363LLU),
- QU(17307358132571709283LLU), QU(13871081155552824936LLU),
- QU(18304446751741566262LLU), QU( 7178705220184302849LLU),
- QU(10929605677758824425LLU), QU(16446976977835806844LLU),
- QU(13723874412159769044LLU), QU( 6942854352100915216LLU),
- QU( 1726308474365729390LLU), QU( 2150078766445323155LLU),
- QU(15345558947919656626LLU), QU(12145453828874527201LLU),
- QU( 2054448620739726849LLU), QU( 2740102003352628137LLU),
- QU(11294462163577610655LLU), QU( 756164283387413743LLU),
- QU(17841144758438810880LLU), QU(10802406021185415861LLU),
- QU( 8716455530476737846LLU), QU( 6321788834517649606LLU),
- QU(14681322910577468426LLU), QU(17330043563884336387LLU),
- QU(12701802180050071614LLU), QU(14695105111079727151LLU),
- QU( 5112098511654172830LLU), QU( 4957505496794139973LLU),
- QU( 8270979451952045982LLU), QU(12307685939199120969LLU),
- QU(12425799408953443032LLU), QU( 8376410143634796588LLU),
- QU(16621778679680060464LLU), QU( 3580497854566660073LLU),
- QU( 1122515747803382416LLU), QU( 857664980960597599LLU),
- QU( 6343640119895925918LLU), QU(12878473260854462891LLU),
- QU(10036813920765722626LLU), QU(14451335468363173812LLU),
- QU( 5476809692401102807LLU), QU(16442255173514366342LLU),
- QU(13060203194757167104LLU), QU(14354124071243177715LLU),
- QU(15961249405696125227LLU), QU(13703893649690872584LLU),
- QU( 363907326340340064LLU), QU( 6247455540491754842LLU),
- QU(12242249332757832361LLU), QU( 156065475679796717LLU),
- QU( 9351116235749732355LLU), QU( 4590350628677701405LLU),
- QU( 1671195940982350389LLU), QU(13501398458898451905LLU),
- QU( 6526341991225002255LLU), QU( 1689782913778157592LLU),
- QU( 7439222350869010334LLU), QU(13975150263226478308LLU),
- QU(11411961169932682710LLU), QU(17204271834833847277LLU),
- QU( 541534742544435367LLU), QU( 6591191931218949684LLU),
- QU( 2645454775478232486LLU), QU( 4322857481256485321LLU),
- QU( 8477416487553065110LLU), QU(12902505428548435048LLU),
- QU( 971445777981341415LLU), QU(14995104682744976712LLU),
- QU( 4243341648807158063LLU), QU( 8695061252721927661LLU),
- QU( 5028202003270177222LLU), QU( 2289257340915567840LLU),
- QU(13870416345121866007LLU), QU(13994481698072092233LLU),
- QU( 6912785400753196481LLU), QU( 2278309315841980139LLU),
- QU( 4329765449648304839LLU), QU( 5963108095785485298LLU),
- QU( 4880024847478722478LLU), QU(16015608779890240947LLU),
- QU( 1866679034261393544LLU), QU( 914821179919731519LLU),
- QU( 9643404035648760131LLU), QU( 2418114953615593915LLU),
- QU( 944756836073702374LLU), QU(15186388048737296834LLU),
- QU( 7723355336128442206LLU), QU( 7500747479679599691LLU),
- QU(18013961306453293634LLU), QU( 2315274808095756456LLU),
- QU(13655308255424029566LLU), QU(17203800273561677098LLU),
- QU( 1382158694422087756LLU), QU( 5090390250309588976LLU),
- QU( 517170818384213989LLU), QU( 1612709252627729621LLU),
- QU( 1330118955572449606LLU), QU( 300922478056709885LLU),
- QU(18115693291289091987LLU), QU(13491407109725238321LLU),
- QU(15293714633593827320LLU), QU( 5151539373053314504LLU),
- QU( 5951523243743139207LLU), QU(14459112015249527975LLU),
- QU( 5456113959000700739LLU), QU( 3877918438464873016LLU),
- QU(12534071654260163555LLU), QU(15871678376893555041LLU),
- QU(11005484805712025549LLU), QU(16353066973143374252LLU),
- QU( 4358331472063256685LLU), QU( 8268349332210859288LLU),
- QU(12485161590939658075LLU), QU(13955993592854471343LLU),
- QU( 5911446886848367039LLU), QU(14925834086813706974LLU),
- QU( 6590362597857994805LLU), QU( 1280544923533661875LLU),
- QU( 1637756018947988164LLU), QU( 4734090064512686329LLU),
- QU(16693705263131485912LLU), QU( 6834882340494360958LLU),
- QU( 8120732176159658505LLU), QU( 2244371958905329346LLU),
- QU(10447499707729734021LLU), QU( 7318742361446942194LLU),
- QU( 8032857516355555296LLU), QU(14023605983059313116LLU),
- QU( 1032336061815461376LLU), QU( 9840995337876562612LLU),
- QU( 9869256223029203587LLU), QU(12227975697177267636LLU),
- QU(12728115115844186033LLU), QU( 7752058479783205470LLU),
- QU( 729733219713393087LLU), QU(12954017801239007622LLU)
+ KQU(16924766246869039260), KQU( 8201438687333352714),
+ KQU( 2265290287015001750), KQU(18397264611805473832),
+ KQU( 3375255223302384358), KQU( 6345559975416828796),
+ KQU(18229739242790328073), KQU( 7596792742098800905),
+ KQU( 255338647169685981), KQU( 2052747240048610300),
+ KQU(18328151576097299343), KQU(12472905421133796567),
+ KQU(11315245349717600863), KQU(16594110197775871209),
+ KQU(15708751964632456450), KQU(10452031272054632535),
+ KQU(11097646720811454386), KQU( 4556090668445745441),
+ KQU(17116187693090663106), KQU(14931526836144510645),
+ KQU( 9190752218020552591), KQU( 9625800285771901401),
+ KQU(13995141077659972832), KQU( 5194209094927829625),
+ KQU( 4156788379151063303), KQU( 8523452593770139494),
+ KQU(14082382103049296727), KQU( 2462601863986088483),
+ KQU( 3030583461592840678), KQU( 5221622077872827681),
+ KQU( 3084210671228981236), KQU(13956758381389953823),
+ KQU(13503889856213423831), KQU(15696904024189836170),
+ KQU( 4612584152877036206), KQU( 6231135538447867881),
+ KQU(10172457294158869468), KQU( 6452258628466708150),
+ KQU(14044432824917330221), KQU( 370168364480044279),
+ KQU(10102144686427193359), KQU( 667870489994776076),
+ KQU( 2732271956925885858), KQU(18027788905977284151),
+ KQU(15009842788582923859), KQU( 7136357960180199542),
+ KQU(15901736243475578127), KQU(16951293785352615701),
+ KQU(10551492125243691632), KQU(17668869969146434804),
+ KQU(13646002971174390445), KQU( 9804471050759613248),
+ KQU( 5511670439655935493), KQU(18103342091070400926),
+ KQU(17224512747665137533), KQU(15534627482992618168),
+ KQU( 1423813266186582647), KQU(15821176807932930024),
+ KQU( 30323369733607156), KQU(11599382494723479403),
+ KQU( 653856076586810062), KQU( 3176437395144899659),
+ KQU(14028076268147963917), KQU(16156398271809666195),
+ KQU( 3166955484848201676), KQU( 5746805620136919390),
+ KQU(17297845208891256593), KQU(11691653183226428483),
+ KQU(17900026146506981577), KQU(15387382115755971042),
+ KQU(16923567681040845943), KQU( 8039057517199388606),
+ KQU(11748409241468629263), KQU( 794358245539076095),
+ KQU(13438501964693401242), KQU(14036803236515618962),
+ KQU( 5252311215205424721), KQU(17806589612915509081),
+ KQU( 6802767092397596006), KQU(14212120431184557140),
+ KQU( 1072951366761385712), KQU(13098491780722836296),
+ KQU( 9466676828710797353), KQU(12673056849042830081),
+ KQU(12763726623645357580), KQU(16468961652999309493),
+ KQU(15305979875636438926), KQU(17444713151223449734),
+ KQU( 5692214267627883674), KQU(13049589139196151505),
+ KQU( 880115207831670745), KQU( 1776529075789695498),
+ KQU(16695225897801466485), KQU(10666901778795346845),
+ KQU( 6164389346722833869), KQU( 2863817793264300475),
+ KQU( 9464049921886304754), KQU( 3993566636740015468),
+ KQU( 9983749692528514136), KQU(16375286075057755211),
+ KQU(16042643417005440820), KQU(11445419662923489877),
+ KQU( 7999038846885158836), KQU( 6721913661721511535),
+ KQU( 5363052654139357320), KQU( 1817788761173584205),
+ KQU(13290974386445856444), KQU( 4650350818937984680),
+ KQU( 8219183528102484836), KQU( 1569862923500819899),
+ KQU( 4189359732136641860), KQU(14202822961683148583),
+ KQU( 4457498315309429058), KQU(13089067387019074834),
+ KQU(11075517153328927293), KQU(10277016248336668389),
+ KQU( 7070509725324401122), KQU(17808892017780289380),
+ KQU(13143367339909287349), KQU( 1377743745360085151),
+ KQU( 5749341807421286485), KQU(14832814616770931325),
+ KQU( 7688820635324359492), KQU(10960474011539770045),
+ KQU( 81970066653179790), KQU(12619476072607878022),
+ KQU( 4419566616271201744), KQU(15147917311750568503),
+ KQU( 5549739182852706345), KQU( 7308198397975204770),
+ KQU(13580425496671289278), KQU(17070764785210130301),
+ KQU( 8202832846285604405), KQU( 6873046287640887249),
+ KQU( 6927424434308206114), KQU( 6139014645937224874),
+ KQU(10290373645978487639), KQU(15904261291701523804),
+ KQU( 9628743442057826883), KQU(18383429096255546714),
+ KQU( 4977413265753686967), KQU( 7714317492425012869),
+ KQU( 9025232586309926193), KQU(14627338359776709107),
+ KQU(14759849896467790763), KQU(10931129435864423252),
+ KQU( 4588456988775014359), KQU(10699388531797056724),
+ KQU( 468652268869238792), KQU( 5755943035328078086),
+ KQU( 2102437379988580216), KQU( 9986312786506674028),
+ KQU( 2654207180040945604), KQU( 8726634790559960062),
+ KQU( 100497234871808137), KQU( 2800137176951425819),
+ KQU( 6076627612918553487), KQU( 5780186919186152796),
+ KQU( 8179183595769929098), KQU( 6009426283716221169),
+ KQU( 2796662551397449358), KQU( 1756961367041986764),
+ KQU( 6972897917355606205), KQU(14524774345368968243),
+ KQU( 2773529684745706940), KQU( 4853632376213075959),
+ KQU( 4198177923731358102), KQU( 8271224913084139776),
+ KQU( 2741753121611092226), KQU(16782366145996731181),
+ KQU(15426125238972640790), KQU(13595497100671260342),
+ KQU( 3173531022836259898), KQU( 6573264560319511662),
+ KQU(18041111951511157441), KQU( 2351433581833135952),
+ KQU( 3113255578908173487), KQU( 1739371330877858784),
+ KQU(16046126562789165480), KQU( 8072101652214192925),
+ KQU(15267091584090664910), KQU( 9309579200403648940),
+ KQU( 5218892439752408722), KQU(14492477246004337115),
+ KQU(17431037586679770619), KQU( 7385248135963250480),
+ KQU( 9580144956565560660), KQU( 4919546228040008720),
+ KQU(15261542469145035584), KQU(18233297270822253102),
+ KQU( 5453248417992302857), KQU( 9309519155931460285),
+ KQU(10342813012345291756), KQU(15676085186784762381),
+ KQU(15912092950691300645), KQU( 9371053121499003195),
+ KQU( 9897186478226866746), KQU(14061858287188196327),
+ KQU( 122575971620788119), KQU(12146750969116317754),
+ KQU( 4438317272813245201), KQU( 8332576791009527119),
+ KQU(13907785691786542057), KQU(10374194887283287467),
+ KQU( 2098798755649059566), KQU( 3416235197748288894),
+ KQU( 8688269957320773484), KQU( 7503964602397371571),
+ KQU(16724977015147478236), KQU( 9461512855439858184),
+ KQU(13259049744534534727), KQU( 3583094952542899294),
+ KQU( 8764245731305528292), KQU(13240823595462088985),
+ KQU(13716141617617910448), KQU(18114969519935960955),
+ KQU( 2297553615798302206), KQU( 4585521442944663362),
+ KQU(17776858680630198686), KQU( 4685873229192163363),
+ KQU( 152558080671135627), KQU(15424900540842670088),
+ KQU(13229630297130024108), KQU(17530268788245718717),
+ KQU(16675633913065714144), KQU( 3158912717897568068),
+ KQU(15399132185380087288), KQU( 7401418744515677872),
+ KQU(13135412922344398535), KQU( 6385314346100509511),
+ KQU(13962867001134161139), KQU(10272780155442671999),
+ KQU(12894856086597769142), KQU(13340877795287554994),
+ KQU(12913630602094607396), KQU(12543167911119793857),
+ KQU(17343570372251873096), KQU(10959487764494150545),
+ KQU( 6966737953093821128), KQU(13780699135496988601),
+ KQU( 4405070719380142046), KQU(14923788365607284982),
+ KQU( 2869487678905148380), KQU( 6416272754197188403),
+ KQU(15017380475943612591), KQU( 1995636220918429487),
+ KQU( 3402016804620122716), KQU(15800188663407057080),
+ KQU(11362369990390932882), KQU(15262183501637986147),
+ KQU(10239175385387371494), KQU( 9352042420365748334),
+ KQU( 1682457034285119875), KQU( 1724710651376289644),
+ KQU( 2038157098893817966), KQU( 9897825558324608773),
+ KQU( 1477666236519164736), KQU(16835397314511233640),
+ KQU(10370866327005346508), KQU(10157504370660621982),
+ KQU(12113904045335882069), KQU(13326444439742783008),
+ KQU(11302769043000765804), KQU(13594979923955228484),
+ KQU(11779351762613475968), KQU( 3786101619539298383),
+ KQU( 8021122969180846063), KQU(15745904401162500495),
+ KQU(10762168465993897267), KQU(13552058957896319026),
+ KQU(11200228655252462013), KQU( 5035370357337441226),
+ KQU( 7593918984545500013), KQU( 5418554918361528700),
+ KQU( 4858270799405446371), KQU( 9974659566876282544),
+ KQU(18227595922273957859), KQU( 2772778443635656220),
+ KQU(14285143053182085385), KQU( 9939700992429600469),
+ KQU(12756185904545598068), KQU( 2020783375367345262),
+ KQU( 57026775058331227), KQU( 950827867930065454),
+ KQU( 6602279670145371217), KQU( 2291171535443566929),
+ KQU( 5832380724425010313), KQU( 1220343904715982285),
+ KQU(17045542598598037633), KQU(15460481779702820971),
+ KQU(13948388779949365130), KQU(13975040175430829518),
+ KQU(17477538238425541763), KQU(11104663041851745725),
+ KQU(15860992957141157587), KQU(14529434633012950138),
+ KQU( 2504838019075394203), KQU( 7512113882611121886),
+ KQU( 4859973559980886617), KQU( 1258601555703250219),
+ KQU(15594548157514316394), KQU( 4516730171963773048),
+ KQU(11380103193905031983), KQU( 6809282239982353344),
+ KQU(18045256930420065002), KQU( 2453702683108791859),
+ KQU( 977214582986981460), KQU( 2006410402232713466),
+ KQU( 6192236267216378358), KQU( 3429468402195675253),
+ KQU(18146933153017348921), KQU(17369978576367231139),
+ KQU( 1246940717230386603), KQU(11335758870083327110),
+ KQU(14166488801730353682), KQU( 9008573127269635732),
+ KQU(10776025389820643815), KQU(15087605441903942962),
+ KQU( 1359542462712147922), KQU(13898874411226454206),
+ KQU(17911176066536804411), KQU( 9435590428600085274),
+ KQU( 294488509967864007), KQU( 8890111397567922046),
+ KQU( 7987823476034328778), KQU(13263827582440967651),
+ KQU( 7503774813106751573), KQU(14974747296185646837),
+ KQU( 8504765037032103375), KQU(17340303357444536213),
+ KQU( 7704610912964485743), KQU( 8107533670327205061),
+ KQU( 9062969835083315985), KQU(16968963142126734184),
+ KQU(12958041214190810180), KQU( 2720170147759570200),
+ KQU( 2986358963942189566), KQU(14884226322219356580),
+ KQU( 286224325144368520), KQU(11313800433154279797),
+ KQU(18366849528439673248), KQU(17899725929482368789),
+ KQU( 3730004284609106799), KQU( 1654474302052767205),
+ KQU( 5006698007047077032), KQU( 8196893913601182838),
+ KQU(15214541774425211640), KQU(17391346045606626073),
+ KQU( 8369003584076969089), KQU( 3939046733368550293),
+ KQU(10178639720308707785), KQU( 2180248669304388697),
+ KQU( 62894391300126322), KQU( 9205708961736223191),
+ KQU( 6837431058165360438), KQU( 3150743890848308214),
+ KQU(17849330658111464583), KQU(12214815643135450865),
+ KQU(13410713840519603402), KQU( 3200778126692046802),
+ KQU(13354780043041779313), KQU( 800850022756886036),
+ KQU(15660052933953067433), KQU( 6572823544154375676),
+ KQU(11030281857015819266), KQU(12682241941471433835),
+ KQU(11654136407300274693), KQU( 4517795492388641109),
+ KQU( 9757017371504524244), KQU(17833043400781889277),
+ KQU(12685085201747792227), KQU(10408057728835019573),
+ KQU( 98370418513455221), KQU( 6732663555696848598),
+ KQU(13248530959948529780), KQU( 3530441401230622826),
+ KQU(18188251992895660615), KQU( 1847918354186383756),
+ KQU( 1127392190402660921), KQU(11293734643143819463),
+ KQU( 3015506344578682982), KQU(13852645444071153329),
+ KQU( 2121359659091349142), KQU( 1294604376116677694),
+ KQU( 5616576231286352318), KQU( 7112502442954235625),
+ KQU(11676228199551561689), KQU(12925182803007305359),
+ KQU( 7852375518160493082), KQU( 1136513130539296154),
+ KQU( 5636923900916593195), KQU( 3221077517612607747),
+ KQU(17784790465798152513), KQU( 3554210049056995938),
+ KQU(17476839685878225874), KQU( 3206836372585575732),
+ KQU( 2765333945644823430), KQU(10080070903718799528),
+ KQU( 5412370818878286353), KQU( 9689685887726257728),
+ KQU( 8236117509123533998), KQU( 1951139137165040214),
+ KQU( 4492205209227980349), KQU(16541291230861602967),
+ KQU( 1424371548301437940), KQU( 9117562079669206794),
+ KQU(14374681563251691625), KQU(13873164030199921303),
+ KQU( 6680317946770936731), KQU(15586334026918276214),
+ KQU(10896213950976109802), KQU( 9506261949596413689),
+ KQU( 9903949574308040616), KQU( 6038397344557204470),
+ KQU( 174601465422373648), KQU(15946141191338238030),
+ KQU(17142225620992044937), KQU( 7552030283784477064),
+ KQU( 2947372384532947997), KQU( 510797021688197711),
+ KQU( 4962499439249363461), KQU( 23770320158385357),
+ KQU( 959774499105138124), KQU( 1468396011518788276),
+ KQU( 2015698006852312308), KQU( 4149400718489980136),
+ KQU( 5992916099522371188), KQU(10819182935265531076),
+ KQU(16189787999192351131), KQU( 342833961790261950),
+ KQU(12470830319550495336), KQU(18128495041912812501),
+ KQU( 1193600899723524337), KQU( 9056793666590079770),
+ KQU( 2154021227041669041), KQU( 4963570213951235735),
+ KQU( 4865075960209211409), KQU( 2097724599039942963),
+ KQU( 2024080278583179845), KQU(11527054549196576736),
+ KQU(10650256084182390252), KQU( 4808408648695766755),
+ KQU( 1642839215013788844), KQU(10607187948250398390),
+ KQU( 7076868166085913508), KQU( 730522571106887032),
+ KQU(12500579240208524895), KQU( 4484390097311355324),
+ KQU(15145801330700623870), KQU( 8055827661392944028),
+ KQU( 5865092976832712268), KQU(15159212508053625143),
+ KQU( 3560964582876483341), KQU( 4070052741344438280),
+ KQU( 6032585709886855634), KQU(15643262320904604873),
+ KQU( 2565119772293371111), KQU( 318314293065348260),
+ KQU(15047458749141511872), KQU( 7772788389811528730),
+ KQU( 7081187494343801976), KQU( 6465136009467253947),
+ KQU(10425940692543362069), KQU( 554608190318339115),
+ KQU(14796699860302125214), KQU( 1638153134431111443),
+ KQU(10336967447052276248), KQU( 8412308070396592958),
+ KQU( 4004557277152051226), KQU( 8143598997278774834),
+ KQU(16413323996508783221), KQU(13139418758033994949),
+ KQU( 9772709138335006667), KQU( 2818167159287157659),
+ KQU(17091740573832523669), KQU(14629199013130751608),
+ KQU(18268322711500338185), KQU( 8290963415675493063),
+ KQU( 8830864907452542588), KQU( 1614839084637494849),
+ KQU(14855358500870422231), KQU( 3472996748392519937),
+ KQU(15317151166268877716), KQU( 5825895018698400362),
+ KQU(16730208429367544129), KQU(10481156578141202800),
+ KQU( 4746166512382823750), KQU(12720876014472464998),
+ KQU( 8825177124486735972), KQU(13733447296837467838),
+ KQU( 6412293741681359625), KQU( 8313213138756135033),
+ KQU(11421481194803712517), KQU( 7997007691544174032),
+ KQU( 6812963847917605930), KQU( 9683091901227558641),
+ KQU(14703594165860324713), KQU( 1775476144519618309),
+ KQU( 2724283288516469519), KQU( 717642555185856868),
+ KQU( 8736402192215092346), KQU(11878800336431381021),
+ KQU( 4348816066017061293), KQU( 6115112756583631307),
+ KQU( 9176597239667142976), KQU(12615622714894259204),
+ KQU(10283406711301385987), KQU( 5111762509485379420),
+ KQU( 3118290051198688449), KQU( 7345123071632232145),
+ KQU( 9176423451688682359), KQU( 4843865456157868971),
+ KQU(12008036363752566088), KQU(12058837181919397720),
+ KQU( 2145073958457347366), KQU( 1526504881672818067),
+ KQU( 3488830105567134848), KQU(13208362960674805143),
+ KQU( 4077549672899572192), KQU( 7770995684693818365),
+ KQU( 1398532341546313593), KQU(12711859908703927840),
+ KQU( 1417561172594446813), KQU(17045191024194170604),
+ KQU( 4101933177604931713), KQU(14708428834203480320),
+ KQU(17447509264469407724), KQU(14314821973983434255),
+ KQU(17990472271061617265), KQU( 5087756685841673942),
+ KQU(12797820586893859939), KQU( 1778128952671092879),
+ KQU( 3535918530508665898), KQU( 9035729701042481301),
+ KQU(14808661568277079962), KQU(14587345077537747914),
+ KQU(11920080002323122708), KQU( 6426515805197278753),
+ KQU( 3295612216725984831), KQU(11040722532100876120),
+ KQU(12305952936387598754), KQU(16097391899742004253),
+ KQU( 4908537335606182208), KQU(12446674552196795504),
+ KQU(16010497855816895177), KQU( 9194378874788615551),
+ KQU( 3382957529567613384), KQU( 5154647600754974077),
+ KQU( 9801822865328396141), KQU( 9023662173919288143),
+ KQU(17623115353825147868), KQU( 8238115767443015816),
+ KQU(15811444159859002560), KQU( 9085612528904059661),
+ KQU( 6888601089398614254), KQU( 258252992894160189),
+ KQU( 6704363880792428622), KQU( 6114966032147235763),
+ KQU(11075393882690261875), KQU( 8797664238933620407),
+ KQU( 5901892006476726920), KQU( 5309780159285518958),
+ KQU(14940808387240817367), KQU(14642032021449656698),
+ KQU( 9808256672068504139), KQU( 3670135111380607658),
+ KQU(11211211097845960152), KQU( 1474304506716695808),
+ KQU(15843166204506876239), KQU( 7661051252471780561),
+ KQU(10170905502249418476), KQU( 7801416045582028589),
+ KQU( 2763981484737053050), KQU( 9491377905499253054),
+ KQU(16201395896336915095), KQU( 9256513756442782198),
+ KQU( 5411283157972456034), KQU( 5059433122288321676),
+ KQU( 4327408006721123357), KQU( 9278544078834433377),
+ KQU( 7601527110882281612), KQU(11848295896975505251),
+ KQU(12096998801094735560), KQU(14773480339823506413),
+ KQU(15586227433895802149), KQU(12786541257830242872),
+ KQU( 6904692985140503067), KQU( 5309011515263103959),
+ KQU(12105257191179371066), KQU(14654380212442225037),
+ KQU( 2556774974190695009), KQU( 4461297399927600261),
+ KQU(14888225660915118646), KQU(14915459341148291824),
+ KQU( 2738802166252327631), KQU( 6047155789239131512),
+ KQU(12920545353217010338), KQU(10697617257007840205),
+ KQU( 2751585253158203504), KQU(13252729159780047496),
+ KQU(14700326134672815469), KQU(14082527904374600529),
+ KQU(16852962273496542070), KQU(17446675504235853907),
+ KQU(15019600398527572311), KQU(12312781346344081551),
+ KQU(14524667935039810450), KQU( 5634005663377195738),
+ KQU(11375574739525000569), KQU( 2423665396433260040),
+ KQU( 5222836914796015410), KQU( 4397666386492647387),
+ KQU( 4619294441691707638), KQU( 665088602354770716),
+ KQU(13246495665281593610), KQU( 6564144270549729409),
+ KQU(10223216188145661688), KQU( 3961556907299230585),
+ KQU(11543262515492439914), KQU(16118031437285993790),
+ KQU( 7143417964520166465), KQU(13295053515909486772),
+ KQU( 40434666004899675), KQU(17127804194038347164),
+ KQU( 8599165966560586269), KQU( 8214016749011284903),
+ KQU(13725130352140465239), KQU( 5467254474431726291),
+ KQU( 7748584297438219877), KQU(16933551114829772472),
+ KQU( 2169618439506799400), KQU( 2169787627665113463),
+ KQU(17314493571267943764), KQU(18053575102911354912),
+ KQU(11928303275378476973), KQU(11593850925061715550),
+ KQU(17782269923473589362), KQU( 3280235307704747039),
+ KQU( 6145343578598685149), KQU(17080117031114086090),
+ KQU(18066839902983594755), KQU( 6517508430331020706),
+ KQU( 8092908893950411541), KQU(12558378233386153732),
+ KQU( 4476532167973132976), KQU(16081642430367025016),
+ KQU( 4233154094369139361), KQU( 8693630486693161027),
+ KQU(11244959343027742285), KQU(12273503967768513508),
+ KQU(14108978636385284876), KQU( 7242414665378826984),
+ KQU( 6561316938846562432), KQU( 8601038474994665795),
+ KQU(17532942353612365904), KQU(17940076637020912186),
+ KQU( 7340260368823171304), KQU( 7061807613916067905),
+ KQU(10561734935039519326), KQU(17990796503724650862),
+ KQU( 6208732943911827159), KQU( 359077562804090617),
+ KQU(14177751537784403113), KQU(10659599444915362902),
+ KQU(15081727220615085833), KQU(13417573895659757486),
+ KQU(15513842342017811524), KQU(11814141516204288231),
+ KQU( 1827312513875101814), KQU( 2804611699894603103),
+ KQU(17116500469975602763), KQU(12270191815211952087),
+ KQU(12256358467786024988), KQU(18435021722453971267),
+ KQU( 671330264390865618), KQU( 476504300460286050),
+ KQU(16465470901027093441), KQU( 4047724406247136402),
+ KQU( 1322305451411883346), KQU( 1388308688834322280),
+ KQU( 7303989085269758176), KQU( 9323792664765233642),
+ KQU( 4542762575316368936), KQU(17342696132794337618),
+ KQU( 4588025054768498379), KQU(13415475057390330804),
+ KQU(17880279491733405570), KQU(10610553400618620353),
+ KQU( 3180842072658960139), KQU(13002966655454270120),
+ KQU( 1665301181064982826), KQU( 7083673946791258979),
+ KQU( 190522247122496820), KQU(17388280237250677740),
+ KQU( 8430770379923642945), KQU(12987180971921668584),
+ KQU( 2311086108365390642), KQU( 2870984383579822345),
+ KQU(14014682609164653318), KQU(14467187293062251484),
+ KQU( 192186361147413298), KQU(15171951713531796524),
+ KQU( 9900305495015948728), KQU(17958004775615466344),
+ KQU(14346380954498606514), KQU(18040047357617407096),
+ KQU( 5035237584833424532), KQU(15089555460613972287),
+ KQU( 4131411873749729831), KQU( 1329013581168250330),
+ KQU(10095353333051193949), KQU(10749518561022462716),
+ KQU( 9050611429810755847), KQU(15022028840236655649),
+ KQU( 8775554279239748298), KQU(13105754025489230502),
+ KQU(15471300118574167585), KQU( 89864764002355628),
+ KQU( 8776416323420466637), KQU( 5280258630612040891),
+ KQU( 2719174488591862912), KQU( 7599309137399661994),
+ KQU(15012887256778039979), KQU(14062981725630928925),
+ KQU(12038536286991689603), KQU( 7089756544681775245),
+ KQU(10376661532744718039), KQU( 1265198725901533130),
+ KQU(13807996727081142408), KQU( 2935019626765036403),
+ KQU( 7651672460680700141), KQU( 3644093016200370795),
+ KQU( 2840982578090080674), KQU(17956262740157449201),
+ KQU(18267979450492880548), KQU(11799503659796848070),
+ KQU( 9942537025669672388), KQU(11886606816406990297),
+ KQU( 5488594946437447576), KQU( 7226714353282744302),
+ KQU( 3784851653123877043), KQU( 878018453244803041),
+ KQU(12110022586268616085), KQU( 734072179404675123),
+ KQU(11869573627998248542), KQU( 469150421297783998),
+ KQU( 260151124912803804), KQU(11639179410120968649),
+ KQU( 9318165193840846253), KQU(12795671722734758075),
+ KQU(15318410297267253933), KQU( 691524703570062620),
+ KQU( 5837129010576994601), KQU(15045963859726941052),
+ KQU( 5850056944932238169), KQU(12017434144750943807),
+ KQU( 7447139064928956574), KQU( 3101711812658245019),
+ KQU(16052940704474982954), KQU(18195745945986994042),
+ KQU( 8932252132785575659), KQU(13390817488106794834),
+ KQU(11582771836502517453), KQU( 4964411326683611686),
+ KQU( 2195093981702694011), KQU(14145229538389675669),
+ KQU(16459605532062271798), KQU( 866316924816482864),
+ KQU( 4593041209937286377), KQU( 8415491391910972138),
+ KQU( 4171236715600528969), KQU(16637569303336782889),
+ KQU( 2002011073439212680), KQU(17695124661097601411),
+ KQU( 4627687053598611702), KQU( 7895831936020190403),
+ KQU( 8455951300917267802), KQU( 2923861649108534854),
+ KQU( 8344557563927786255), KQU( 6408671940373352556),
+ KQU(12210227354536675772), KQU(14294804157294222295),
+ KQU(10103022425071085127), KQU(10092959489504123771),
+ KQU( 6554774405376736268), KQU(12629917718410641774),
+ KQU( 6260933257596067126), KQU( 2460827021439369673),
+ KQU( 2541962996717103668), KQU( 597377203127351475),
+ KQU( 5316984203117315309), KQU( 4811211393563241961),
+ KQU(13119698597255811641), KQU( 8048691512862388981),
+ KQU(10216818971194073842), KQU( 4612229970165291764),
+ KQU(10000980798419974770), KQU( 6877640812402540687),
+ KQU( 1488727563290436992), KQU( 2227774069895697318),
+ KQU(11237754507523316593), KQU(13478948605382290972),
+ KQU( 1963583846976858124), KQU( 5512309205269276457),
+ KQU( 3972770164717652347), KQU( 3841751276198975037),
+ KQU(10283343042181903117), KQU( 8564001259792872199),
+ KQU(16472187244722489221), KQU( 8953493499268945921),
+ KQU( 3518747340357279580), KQU( 4003157546223963073),
+ KQU( 3270305958289814590), KQU( 3966704458129482496),
+ KQU( 8122141865926661939), KQU(14627734748099506653),
+ KQU(13064426990862560568), KQU( 2414079187889870829),
+ KQU( 5378461209354225306), KQU(10841985740128255566),
+ KQU( 538582442885401738), KQU( 7535089183482905946),
+ KQU(16117559957598879095), KQU( 8477890721414539741),
+ KQU( 1459127491209533386), KQU(17035126360733620462),
+ KQU( 8517668552872379126), KQU(10292151468337355014),
+ KQU(17081267732745344157), KQU(13751455337946087178),
+ KQU(14026945459523832966), KQU( 6653278775061723516),
+ KQU(10619085543856390441), KQU( 2196343631481122885),
+ KQU(10045966074702826136), KQU(10082317330452718282),
+ KQU( 5920859259504831242), KQU( 9951879073426540617),
+ KQU( 7074696649151414158), KQU(15808193543879464318),
+ KQU( 7385247772746953374), KQU( 3192003544283864292),
+ KQU(18153684490917593847), KQU(12423498260668568905),
+ KQU(10957758099756378169), KQU(11488762179911016040),
+ KQU( 2099931186465333782), KQU(11180979581250294432),
+ KQU( 8098916250668367933), KQU( 3529200436790763465),
+ KQU(12988418908674681745), KQU( 6147567275954808580),
+ KQU( 3207503344604030989), KQU(10761592604898615360),
+ KQU( 229854861031893504), KQU( 8809853962667144291),
+ KQU(13957364469005693860), KQU( 7634287665224495886),
+ KQU(12353487366976556874), KQU( 1134423796317152034),
+ KQU( 2088992471334107068), KQU( 7393372127190799698),
+ KQU( 1845367839871058391), KQU( 207922563987322884),
+ KQU(11960870813159944976), KQU(12182120053317317363),
+ KQU(17307358132571709283), KQU(13871081155552824936),
+ KQU(18304446751741566262), KQU( 7178705220184302849),
+ KQU(10929605677758824425), KQU(16446976977835806844),
+ KQU(13723874412159769044), KQU( 6942854352100915216),
+ KQU( 1726308474365729390), KQU( 2150078766445323155),
+ KQU(15345558947919656626), KQU(12145453828874527201),
+ KQU( 2054448620739726849), KQU( 2740102003352628137),
+ KQU(11294462163577610655), KQU( 756164283387413743),
+ KQU(17841144758438810880), KQU(10802406021185415861),
+ KQU( 8716455530476737846), KQU( 6321788834517649606),
+ KQU(14681322910577468426), KQU(17330043563884336387),
+ KQU(12701802180050071614), KQU(14695105111079727151),
+ KQU( 5112098511654172830), KQU( 4957505496794139973),
+ KQU( 8270979451952045982), KQU(12307685939199120969),
+ KQU(12425799408953443032), KQU( 8376410143634796588),
+ KQU(16621778679680060464), KQU( 3580497854566660073),
+ KQU( 1122515747803382416), KQU( 857664980960597599),
+ KQU( 6343640119895925918), KQU(12878473260854462891),
+ KQU(10036813920765722626), KQU(14451335468363173812),
+ KQU( 5476809692401102807), KQU(16442255173514366342),
+ KQU(13060203194757167104), KQU(14354124071243177715),
+ KQU(15961249405696125227), KQU(13703893649690872584),
+ KQU( 363907326340340064), KQU( 6247455540491754842),
+ KQU(12242249332757832361), KQU( 156065475679796717),
+ KQU( 9351116235749732355), KQU( 4590350628677701405),
+ KQU( 1671195940982350389), KQU(13501398458898451905),
+ KQU( 6526341991225002255), KQU( 1689782913778157592),
+ KQU( 7439222350869010334), KQU(13975150263226478308),
+ KQU(11411961169932682710), KQU(17204271834833847277),
+ KQU( 541534742544435367), KQU( 6591191931218949684),
+ KQU( 2645454775478232486), KQU( 4322857481256485321),
+ KQU( 8477416487553065110), KQU(12902505428548435048),
+ KQU( 971445777981341415), KQU(14995104682744976712),
+ KQU( 4243341648807158063), KQU( 8695061252721927661),
+ KQU( 5028202003270177222), KQU( 2289257340915567840),
+ KQU(13870416345121866007), KQU(13994481698072092233),
+ KQU( 6912785400753196481), KQU( 2278309315841980139),
+ KQU( 4329765449648304839), KQU( 5963108095785485298),
+ KQU( 4880024847478722478), KQU(16015608779890240947),
+ KQU( 1866679034261393544), KQU( 914821179919731519),
+ KQU( 9643404035648760131), KQU( 2418114953615593915),
+ KQU( 944756836073702374), KQU(15186388048737296834),
+ KQU( 7723355336128442206), KQU( 7500747479679599691),
+ KQU(18013961306453293634), KQU( 2315274808095756456),
+ KQU(13655308255424029566), KQU(17203800273561677098),
+ KQU( 1382158694422087756), KQU( 5090390250309588976),
+ KQU( 517170818384213989), KQU( 1612709252627729621),
+ KQU( 1330118955572449606), KQU( 300922478056709885),
+ KQU(18115693291289091987), KQU(13491407109725238321),
+ KQU(15293714633593827320), KQU( 5151539373053314504),
+ KQU( 5951523243743139207), KQU(14459112015249527975),
+ KQU( 5456113959000700739), KQU( 3877918438464873016),
+ KQU(12534071654260163555), KQU(15871678376893555041),
+ KQU(11005484805712025549), KQU(16353066973143374252),
+ KQU( 4358331472063256685), KQU( 8268349332210859288),
+ KQU(12485161590939658075), KQU(13955993592854471343),
+ KQU( 5911446886848367039), KQU(14925834086813706974),
+ KQU( 6590362597857994805), KQU( 1280544923533661875),
+ KQU( 1637756018947988164), KQU( 4734090064512686329),
+ KQU(16693705263131485912), KQU( 6834882340494360958),
+ KQU( 8120732176159658505), KQU( 2244371958905329346),
+ KQU(10447499707729734021), KQU( 7318742361446942194),
+ KQU( 8032857516355555296), KQU(14023605983059313116),
+ KQU( 1032336061815461376), KQU( 9840995337876562612),
+ KQU( 9869256223029203587), KQU(12227975697177267636),
+ KQU(12728115115844186033), KQU( 7752058479783205470),
+ KQU( 729733219713393087), KQU(12954017801239007622)
};
static const uint64_t init_by_array_64_expected[] = {
- QU( 2100341266307895239LLU), QU( 8344256300489757943LLU),
- QU(15687933285484243894LLU), QU( 8268620370277076319LLU),
- QU(12371852309826545459LLU), QU( 8800491541730110238LLU),
- QU(18113268950100835773LLU), QU( 2886823658884438119LLU),
- QU( 3293667307248180724LLU), QU( 9307928143300172731LLU),
- QU( 7688082017574293629LLU), QU( 900986224735166665LLU),
- QU( 9977972710722265039LLU), QU( 6008205004994830552LLU),
- QU( 546909104521689292LLU), QU( 7428471521869107594LLU),
- QU(14777563419314721179LLU), QU(16116143076567350053LLU),
- QU( 5322685342003142329LLU), QU( 4200427048445863473LLU),
- QU( 4693092150132559146LLU), QU(13671425863759338582LLU),
- QU( 6747117460737639916LLU), QU( 4732666080236551150LLU),
- QU( 5912839950611941263LLU), QU( 3903717554504704909LLU),
- QU( 2615667650256786818LLU), QU(10844129913887006352LLU),
- QU(13786467861810997820LLU), QU(14267853002994021570LLU),
- QU(13767807302847237439LLU), QU(16407963253707224617LLU),
- QU( 4802498363698583497LLU), QU( 2523802839317209764LLU),
- QU( 3822579397797475589LLU), QU( 8950320572212130610LLU),
- QU( 3745623504978342534LLU), QU(16092609066068482806LLU),
- QU( 9817016950274642398LLU), QU(10591660660323829098LLU),
- QU(11751606650792815920LLU), QU( 5122873818577122211LLU),
- QU(17209553764913936624LLU), QU( 6249057709284380343LLU),
- QU(15088791264695071830LLU), QU(15344673071709851930LLU),
- QU( 4345751415293646084LLU), QU( 2542865750703067928LLU),
- QU(13520525127852368784LLU), QU(18294188662880997241LLU),
- QU( 3871781938044881523LLU), QU( 2873487268122812184LLU),
- QU(15099676759482679005LLU), QU(15442599127239350490LLU),
- QU( 6311893274367710888LLU), QU( 3286118760484672933LLU),
- QU( 4146067961333542189LLU), QU(13303942567897208770LLU),
- QU( 8196013722255630418LLU), QU( 4437815439340979989LLU),
- QU(15433791533450605135LLU), QU( 4254828956815687049LLU),
- QU( 1310903207708286015LLU), QU(10529182764462398549LLU),
- QU(14900231311660638810LLU), QU( 9727017277104609793LLU),
- QU( 1821308310948199033LLU), QU(11628861435066772084LLU),
- QU( 9469019138491546924LLU), QU( 3145812670532604988LLU),
- QU( 9938468915045491919LLU), QU( 1562447430672662142LLU),
- QU(13963995266697989134LLU), QU( 3356884357625028695LLU),
- QU( 4499850304584309747LLU), QU( 8456825817023658122LLU),
- QU(10859039922814285279LLU), QU( 8099512337972526555LLU),
- QU( 348006375109672149LLU), QU(11919893998241688603LLU),
- QU( 1104199577402948826LLU), QU(16689191854356060289LLU),
- QU(10992552041730168078LLU), QU( 7243733172705465836LLU),
- QU( 5668075606180319560LLU), QU(18182847037333286970LLU),
- QU( 4290215357664631322LLU), QU( 4061414220791828613LLU),
- QU(13006291061652989604LLU), QU( 7140491178917128798LLU),
- QU(12703446217663283481LLU), QU( 5500220597564558267LLU),
- QU(10330551509971296358LLU), QU(15958554768648714492LLU),
- QU( 5174555954515360045LLU), QU( 1731318837687577735LLU),
- QU( 3557700801048354857LLU), QU(13764012341928616198LLU),
- QU(13115166194379119043LLU), QU( 7989321021560255519LLU),
- QU( 2103584280905877040LLU), QU( 9230788662155228488LLU),
- QU(16396629323325547654LLU), QU( 657926409811318051LLU),
- QU(15046700264391400727LLU), QU( 5120132858771880830LLU),
- QU( 7934160097989028561LLU), QU( 6963121488531976245LLU),
- QU(17412329602621742089LLU), QU(15144843053931774092LLU),
- QU(17204176651763054532LLU), QU(13166595387554065870LLU),
- QU( 8590377810513960213LLU), QU( 5834365135373991938LLU),
- QU( 7640913007182226243LLU), QU( 3479394703859418425LLU),
- QU(16402784452644521040LLU), QU( 4993979809687083980LLU),
- QU(13254522168097688865LLU), QU(15643659095244365219LLU),
- QU( 5881437660538424982LLU), QU(11174892200618987379LLU),
- QU( 254409966159711077LLU), QU(17158413043140549909LLU),
- QU( 3638048789290376272LLU), QU( 1376816930299489190LLU),
- QU( 4622462095217761923LLU), QU(15086407973010263515LLU),
- QU(13253971772784692238LLU), QU( 5270549043541649236LLU),
- QU(11182714186805411604LLU), QU(12283846437495577140LLU),
- QU( 5297647149908953219LLU), QU(10047451738316836654LLU),
- QU( 4938228100367874746LLU), QU(12328523025304077923LLU),
- QU( 3601049438595312361LLU), QU( 9313624118352733770LLU),
- QU(13322966086117661798LLU), QU(16660005705644029394LLU),
- QU(11337677526988872373LLU), QU(13869299102574417795LLU),
- QU(15642043183045645437LLU), QU( 3021755569085880019LLU),
- QU( 4979741767761188161LLU), QU(13679979092079279587LLU),
- QU( 3344685842861071743LLU), QU(13947960059899588104LLU),
- QU( 305806934293368007LLU), QU( 5749173929201650029LLU),
- QU(11123724852118844098LLU), QU(15128987688788879802LLU),
- QU(15251651211024665009LLU), QU( 7689925933816577776LLU),
- QU(16732804392695859449LLU), QU(17087345401014078468LLU),
- QU(14315108589159048871LLU), QU( 4820700266619778917LLU),
- QU(16709637539357958441LLU), QU( 4936227875177351374LLU),
- QU( 2137907697912987247LLU), QU(11628565601408395420LLU),
- QU( 2333250549241556786LLU), QU( 5711200379577778637LLU),
- QU( 5170680131529031729LLU), QU(12620392043061335164LLU),
- QU( 95363390101096078LLU), QU( 5487981914081709462LLU),
- QU( 1763109823981838620LLU), QU( 3395861271473224396LLU),
- QU( 1300496844282213595LLU), QU( 6894316212820232902LLU),
- QU(10673859651135576674LLU), QU( 5911839658857903252LLU),
- QU(17407110743387299102LLU), QU( 8257427154623140385LLU),
- QU(11389003026741800267LLU), QU( 4070043211095013717LLU),
- QU(11663806997145259025LLU), QU(15265598950648798210LLU),
- QU( 630585789434030934LLU), QU( 3524446529213587334LLU),
- QU( 7186424168495184211LLU), QU(10806585451386379021LLU),
- QU(11120017753500499273LLU), QU( 1586837651387701301LLU),
- QU(17530454400954415544LLU), QU( 9991670045077880430LLU),
- QU( 7550997268990730180LLU), QU( 8640249196597379304LLU),
- QU( 3522203892786893823LLU), QU(10401116549878854788LLU),
- QU(13690285544733124852LLU), QU( 8295785675455774586LLU),
- QU(15535716172155117603LLU), QU( 3112108583723722511LLU),
- QU(17633179955339271113LLU), QU(18154208056063759375LLU),
- QU( 1866409236285815666LLU), QU(13326075895396412882LLU),
- QU( 8756261842948020025LLU), QU( 6281852999868439131LLU),
- QU(15087653361275292858LLU), QU(10333923911152949397LLU),
- QU( 5265567645757408500LLU), QU(12728041843210352184LLU),
- QU( 6347959327507828759LLU), QU( 154112802625564758LLU),
- QU(18235228308679780218LLU), QU( 3253805274673352418LLU),
- QU( 4849171610689031197LLU), QU(17948529398340432518LLU),
- QU(13803510475637409167LLU), QU(13506570190409883095LLU),
- QU(15870801273282960805LLU), QU( 8451286481299170773LLU),
- QU( 9562190620034457541LLU), QU( 8518905387449138364LLU),
- QU(12681306401363385655LLU), QU( 3788073690559762558LLU),
- QU( 5256820289573487769LLU), QU( 2752021372314875467LLU),
- QU( 6354035166862520716LLU), QU( 4328956378309739069LLU),
- QU( 449087441228269600LLU), QU( 5533508742653090868LLU),
- QU( 1260389420404746988LLU), QU(18175394473289055097LLU),
- QU( 1535467109660399420LLU), QU( 8818894282874061442LLU),
- QU(12140873243824811213LLU), QU(15031386653823014946LLU),
- QU( 1286028221456149232LLU), QU( 6329608889367858784LLU),
- QU( 9419654354945132725LLU), QU( 6094576547061672379LLU),
- QU(17706217251847450255LLU), QU( 1733495073065878126LLU),
- QU(16918923754607552663LLU), QU( 8881949849954945044LLU),
- QU(12938977706896313891LLU), QU(14043628638299793407LLU),
- QU(18393874581723718233LLU), QU( 6886318534846892044LLU),
- QU(14577870878038334081LLU), QU(13541558383439414119LLU),
- QU(13570472158807588273LLU), QU(18300760537910283361LLU),
- QU( 818368572800609205LLU), QU( 1417000585112573219LLU),
- QU(12337533143867683655LLU), QU(12433180994702314480LLU),
- QU( 778190005829189083LLU), QU(13667356216206524711LLU),
- QU( 9866149895295225230LLU), QU(11043240490417111999LLU),
- QU( 1123933826541378598LLU), QU( 6469631933605123610LLU),
- QU(14508554074431980040LLU), QU(13918931242962026714LLU),
- QU( 2870785929342348285LLU), QU(14786362626740736974LLU),
- QU(13176680060902695786LLU), QU( 9591778613541679456LLU),
- QU( 9097662885117436706LLU), QU( 749262234240924947LLU),
- QU( 1944844067793307093LLU), QU( 4339214904577487742LLU),
- QU( 8009584152961946551LLU), QU(16073159501225501777LLU),
- QU( 3335870590499306217LLU), QU(17088312653151202847LLU),
- QU( 3108893142681931848LLU), QU(16636841767202792021LLU),
- QU(10423316431118400637LLU), QU( 8008357368674443506LLU),
- QU(11340015231914677875LLU), QU(17687896501594936090LLU),
- QU(15173627921763199958LLU), QU( 542569482243721959LLU),
- QU(15071714982769812975LLU), QU( 4466624872151386956LLU),
- QU( 1901780715602332461LLU), QU( 9822227742154351098LLU),
- QU( 1479332892928648780LLU), QU( 6981611948382474400LLU),
- QU( 7620824924456077376LLU), QU(14095973329429406782LLU),
- QU( 7902744005696185404LLU), QU(15830577219375036920LLU),
- QU(10287076667317764416LLU), QU(12334872764071724025LLU),
- QU( 4419302088133544331LLU), QU(14455842851266090520LLU),
- QU(12488077416504654222LLU), QU( 7953892017701886766LLU),
- QU( 6331484925529519007LLU), QU( 4902145853785030022LLU),
- QU(17010159216096443073LLU), QU(11945354668653886087LLU),
- QU(15112022728645230829LLU), QU(17363484484522986742LLU),
- QU( 4423497825896692887LLU), QU( 8155489510809067471LLU),
- QU( 258966605622576285LLU), QU( 5462958075742020534LLU),
- QU( 6763710214913276228LLU), QU( 2368935183451109054LLU),
- QU(14209506165246453811LLU), QU( 2646257040978514881LLU),
- QU( 3776001911922207672LLU), QU( 1419304601390147631LLU),
- QU(14987366598022458284LLU), QU( 3977770701065815721LLU),
- QU( 730820417451838898LLU), QU( 3982991703612885327LLU),
- QU( 2803544519671388477LLU), QU(17067667221114424649LLU),
- QU( 2922555119737867166LLU), QU( 1989477584121460932LLU),
- QU(15020387605892337354LLU), QU( 9293277796427533547LLU),
- QU(10722181424063557247LLU), QU(16704542332047511651LLU),
- QU( 5008286236142089514LLU), QU(16174732308747382540LLU),
- QU(17597019485798338402LLU), QU(13081745199110622093LLU),
- QU( 8850305883842258115LLU), QU(12723629125624589005LLU),
- QU( 8140566453402805978LLU), QU(15356684607680935061LLU),
- QU(14222190387342648650LLU), QU(11134610460665975178LLU),
- QU( 1259799058620984266LLU), QU(13281656268025610041LLU),
- QU( 298262561068153992LLU), QU(12277871700239212922LLU),
- QU(13911297774719779438LLU), QU(16556727962761474934LLU),
- QU(17903010316654728010LLU), QU( 9682617699648434744LLU),
- QU(14757681836838592850LLU), QU( 1327242446558524473LLU),
- QU(11126645098780572792LLU), QU( 1883602329313221774LLU),
- QU( 2543897783922776873LLU), QU(15029168513767772842LLU),
- QU(12710270651039129878LLU), QU(16118202956069604504LLU),
- QU(15010759372168680524LLU), QU( 2296827082251923948LLU),
- QU(10793729742623518101LLU), QU(13829764151845413046LLU),
- QU(17769301223184451213LLU), QU( 3118268169210783372LLU),
- QU(17626204544105123127LLU), QU( 7416718488974352644LLU),
- QU(10450751996212925994LLU), QU( 9352529519128770586LLU),
- QU( 259347569641110140LLU), QU( 8048588892269692697LLU),
- QU( 1774414152306494058LLU), QU(10669548347214355622LLU),
- QU(13061992253816795081LLU), QU(18432677803063861659LLU),
- QU( 8879191055593984333LLU), QU(12433753195199268041LLU),
- QU(14919392415439730602LLU), QU( 6612848378595332963LLU),
- QU( 6320986812036143628LLU), QU(10465592420226092859LLU),
- QU( 4196009278962570808LLU), QU( 3747816564473572224LLU),
- QU(17941203486133732898LLU), QU( 2350310037040505198LLU),
- QU( 5811779859134370113LLU), QU(10492109599506195126LLU),
- QU( 7699650690179541274LLU), QU( 1954338494306022961LLU),
- QU(14095816969027231152LLU), QU( 5841346919964852061LLU),
- QU(14945969510148214735LLU), QU( 3680200305887550992LLU),
- QU( 6218047466131695792LLU), QU( 8242165745175775096LLU),
- QU(11021371934053307357LLU), QU( 1265099502753169797LLU),
- QU( 4644347436111321718LLU), QU( 3609296916782832859LLU),
- QU( 8109807992218521571LLU), QU(18387884215648662020LLU),
- QU(14656324896296392902LLU), QU(17386819091238216751LLU),
- QU(17788300878582317152LLU), QU( 7919446259742399591LLU),
- QU( 4466613134576358004LLU), QU(12928181023667938509LLU),
- QU(13147446154454932030LLU), QU(16552129038252734620LLU),
- QU( 8395299403738822450LLU), QU(11313817655275361164LLU),
- QU( 434258809499511718LLU), QU( 2074882104954788676LLU),
- QU( 7929892178759395518LLU), QU( 9006461629105745388LLU),
- QU( 5176475650000323086LLU), QU(11128357033468341069LLU),
- QU(12026158851559118955LLU), QU(14699716249471156500LLU),
- QU( 448982497120206757LLU), QU( 4156475356685519900LLU),
- QU( 6063816103417215727LLU), QU(10073289387954971479LLU),
- QU( 8174466846138590962LLU), QU( 2675777452363449006LLU),
- QU( 9090685420572474281LLU), QU( 6659652652765562060LLU),
- QU(12923120304018106621LLU), QU(11117480560334526775LLU),
- QU( 937910473424587511LLU), QU( 1838692113502346645LLU),
- QU(11133914074648726180LLU), QU( 7922600945143884053LLU),
- QU(13435287702700959550LLU), QU( 5287964921251123332LLU),
- QU(11354875374575318947LLU), QU(17955724760748238133LLU),
- QU(13728617396297106512LLU), QU( 4107449660118101255LLU),
- QU( 1210269794886589623LLU), QU(11408687205733456282LLU),
- QU( 4538354710392677887LLU), QU(13566803319341319267LLU),
- QU(17870798107734050771LLU), QU( 3354318982568089135LLU),
- QU( 9034450839405133651LLU), QU(13087431795753424314LLU),
- QU( 950333102820688239LLU), QU( 1968360654535604116LLU),
- QU(16840551645563314995LLU), QU( 8867501803892924995LLU),
- QU(11395388644490626845LLU), QU( 1529815836300732204LLU),
- QU(13330848522996608842LLU), QU( 1813432878817504265LLU),
- QU( 2336867432693429560LLU), QU(15192805445973385902LLU),
- QU( 2528593071076407877LLU), QU( 128459777936689248LLU),
- QU( 9976345382867214866LLU), QU( 6208885766767996043LLU),
- QU(14982349522273141706LLU), QU( 3099654362410737822LLU),
- QU(13776700761947297661LLU), QU( 8806185470684925550LLU),
- QU( 8151717890410585321LLU), QU( 640860591588072925LLU),
- QU(14592096303937307465LLU), QU( 9056472419613564846LLU),
- QU(14861544647742266352LLU), QU(12703771500398470216LLU),
- QU( 3142372800384138465LLU), QU( 6201105606917248196LLU),
- QU(18337516409359270184LLU), QU(15042268695665115339LLU),
- QU(15188246541383283846LLU), QU(12800028693090114519LLU),
- QU( 5992859621101493472LLU), QU(18278043971816803521LLU),
- QU( 9002773075219424560LLU), QU( 7325707116943598353LLU),
- QU( 7930571931248040822LLU), QU( 5645275869617023448LLU),
- QU( 7266107455295958487LLU), QU( 4363664528273524411LLU),
- QU(14313875763787479809LLU), QU(17059695613553486802LLU),
- QU( 9247761425889940932LLU), QU(13704726459237593128LLU),
- QU( 2701312427328909832LLU), QU(17235532008287243115LLU),
- QU(14093147761491729538LLU), QU( 6247352273768386516LLU),
- QU( 8268710048153268415LLU), QU( 7985295214477182083LLU),
- QU(15624495190888896807LLU), QU( 3772753430045262788LLU),
- QU( 9133991620474991698LLU), QU( 5665791943316256028LLU),
- QU( 7551996832462193473LLU), QU(13163729206798953877LLU),
- QU( 9263532074153846374LLU), QU( 1015460703698618353LLU),
- QU(17929874696989519390LLU), QU(18257884721466153847LLU),
- QU(16271867543011222991LLU), QU( 3905971519021791941LLU),
- QU(16814488397137052085LLU), QU( 1321197685504621613LLU),
- QU( 2870359191894002181LLU), QU(14317282970323395450LLU),
- QU(13663920845511074366LLU), QU( 2052463995796539594LLU),
- QU(14126345686431444337LLU), QU( 1727572121947022534LLU),
- QU(17793552254485594241LLU), QU( 6738857418849205750LLU),
- QU( 1282987123157442952LLU), QU(16655480021581159251LLU),
- QU( 6784587032080183866LLU), QU(14726758805359965162LLU),
- QU( 7577995933961987349LLU), QU(12539609320311114036LLU),
- QU(10789773033385439494LLU), QU( 8517001497411158227LLU),
- QU(10075543932136339710LLU), QU(14838152340938811081LLU),
- QU( 9560840631794044194LLU), QU(17445736541454117475LLU),
- QU(10633026464336393186LLU), QU(15705729708242246293LLU),
- QU( 1117517596891411098LLU), QU( 4305657943415886942LLU),
- QU( 4948856840533979263LLU), QU(16071681989041789593LLU),
- QU(13723031429272486527LLU), QU( 7639567622306509462LLU),
- QU(12670424537483090390LLU), QU( 9715223453097197134LLU),
- QU( 5457173389992686394LLU), QU( 289857129276135145LLU),
- QU(17048610270521972512LLU), QU( 692768013309835485LLU),
- QU(14823232360546632057LLU), QU(18218002361317895936LLU),
- QU( 3281724260212650204LLU), QU(16453957266549513795LLU),
- QU( 8592711109774511881LLU), QU( 929825123473369579LLU),
- QU(15966784769764367791LLU), QU( 9627344291450607588LLU),
- QU(10849555504977813287LLU), QU( 9234566913936339275LLU),
- QU( 6413807690366911210LLU), QU(10862389016184219267LLU),
- QU(13842504799335374048LLU), QU( 1531994113376881174LLU),
- QU( 2081314867544364459LLU), QU(16430628791616959932LLU),
- QU( 8314714038654394368LLU), QU( 9155473892098431813LLU),
- QU(12577843786670475704LLU), QU( 4399161106452401017LLU),
- QU( 1668083091682623186LLU), QU( 1741383777203714216LLU),
- QU( 2162597285417794374LLU), QU(15841980159165218736LLU),
- QU( 1971354603551467079LLU), QU( 1206714764913205968LLU),
- QU( 4790860439591272330LLU), QU(14699375615594055799LLU),
- QU( 8374423871657449988LLU), QU(10950685736472937738LLU),
- QU( 697344331343267176LLU), QU(10084998763118059810LLU),
- QU(12897369539795983124LLU), QU(12351260292144383605LLU),
- QU( 1268810970176811234LLU), QU( 7406287800414582768LLU),
- QU( 516169557043807831LLU), QU( 5077568278710520380LLU),
- QU( 3828791738309039304LLU), QU( 7721974069946943610LLU),
- QU( 3534670260981096460LLU), QU( 4865792189600584891LLU),
- QU(16892578493734337298LLU), QU( 9161499464278042590LLU),
- QU(11976149624067055931LLU), QU(13219479887277343990LLU),
- QU(14161556738111500680LLU), QU(14670715255011223056LLU),
- QU( 4671205678403576558LLU), QU(12633022931454259781LLU),
- QU(14821376219869187646LLU), QU( 751181776484317028LLU),
- QU( 2192211308839047070LLU), QU(11787306362361245189LLU),
- QU(10672375120744095707LLU), QU( 4601972328345244467LLU),
- QU(15457217788831125879LLU), QU( 8464345256775460809LLU),
- QU(10191938789487159478LLU), QU( 6184348739615197613LLU),
- QU(11425436778806882100LLU), QU( 2739227089124319793LLU),
- QU( 461464518456000551LLU), QU( 4689850170029177442LLU),
- QU( 6120307814374078625LLU), QU(11153579230681708671LLU),
- QU( 7891721473905347926LLU), QU(10281646937824872400LLU),
- QU( 3026099648191332248LLU), QU( 8666750296953273818LLU),
- QU(14978499698844363232LLU), QU(13303395102890132065LLU),
- QU( 8182358205292864080LLU), QU(10560547713972971291LLU),
- QU(11981635489418959093LLU), QU( 3134621354935288409LLU),
- QU(11580681977404383968LLU), QU(14205530317404088650LLU),
- QU( 5997789011854923157LLU), QU(13659151593432238041LLU),
- QU(11664332114338865086LLU), QU( 7490351383220929386LLU),
- QU( 7189290499881530378LLU), QU(15039262734271020220LLU),
- QU( 2057217285976980055LLU), QU( 555570804905355739LLU),
- QU(11235311968348555110LLU), QU(13824557146269603217LLU),
- QU(16906788840653099693LLU), QU( 7222878245455661677LLU),
- QU( 5245139444332423756LLU), QU( 4723748462805674292LLU),
- QU(12216509815698568612LLU), QU(17402362976648951187LLU),
- QU(17389614836810366768LLU), QU( 4880936484146667711LLU),
- QU( 9085007839292639880LLU), QU(13837353458498535449LLU),
- QU(11914419854360366677LLU), QU(16595890135313864103LLU),
- QU( 6313969847197627222LLU), QU(18296909792163910431LLU),
- QU(10041780113382084042LLU), QU( 2499478551172884794LLU),
- QU(11057894246241189489LLU), QU( 9742243032389068555LLU),
- QU(12838934582673196228LLU), QU(13437023235248490367LLU),
- QU(13372420669446163240LLU), QU( 6752564244716909224LLU),
- QU( 7157333073400313737LLU), QU(12230281516370654308LLU),
- QU( 1182884552219419117LLU), QU( 2955125381312499218LLU),
- QU(10308827097079443249LLU), QU( 1337648572986534958LLU),
- QU(16378788590020343939LLU), QU( 108619126514420935LLU),
- QU( 3990981009621629188LLU), QU( 5460953070230946410LLU),
- QU( 9703328329366531883LLU), QU(13166631489188077236LLU),
- QU( 1104768831213675170LLU), QU( 3447930458553877908LLU),
- QU( 8067172487769945676LLU), QU( 5445802098190775347LLU),
- QU( 3244840981648973873LLU), QU(17314668322981950060LLU),
- QU( 5006812527827763807LLU), QU(18158695070225526260LLU),
- QU( 2824536478852417853LLU), QU(13974775809127519886LLU),
- QU( 9814362769074067392LLU), QU(17276205156374862128LLU),
- QU(11361680725379306967LLU), QU( 3422581970382012542LLU),
- QU(11003189603753241266LLU), QU(11194292945277862261LLU),
- QU( 6839623313908521348LLU), QU(11935326462707324634LLU),
- QU( 1611456788685878444LLU), QU(13112620989475558907LLU),
- QU( 517659108904450427LLU), QU(13558114318574407624LLU),
- QU(15699089742731633077LLU), QU( 4988979278862685458LLU),
- QU( 8111373583056521297LLU), QU( 3891258746615399627LLU),
- QU( 8137298251469718086LLU), QU(12748663295624701649LLU),
- QU( 4389835683495292062LLU), QU( 5775217872128831729LLU),
- QU( 9462091896405534927LLU), QU( 8498124108820263989LLU),
- QU( 8059131278842839525LLU), QU(10503167994254090892LLU),
- QU(11613153541070396656LLU), QU(18069248738504647790LLU),
- QU( 570657419109768508LLU), QU( 3950574167771159665LLU),
- QU( 5514655599604313077LLU), QU( 2908460854428484165LLU),
- QU(10777722615935663114LLU), QU(12007363304839279486LLU),
- QU( 9800646187569484767LLU), QU( 8795423564889864287LLU),
- QU(14257396680131028419LLU), QU( 6405465117315096498LLU),
- QU( 7939411072208774878LLU), QU(17577572378528990006LLU),
- QU(14785873806715994850LLU), QU(16770572680854747390LLU),
- QU(18127549474419396481LLU), QU(11637013449455757750LLU),
- QU(14371851933996761086LLU), QU( 3601181063650110280LLU),
- QU( 4126442845019316144LLU), QU(10198287239244320669LLU),
- QU(18000169628555379659LLU), QU(18392482400739978269LLU),
- QU( 6219919037686919957LLU), QU( 3610085377719446052LLU),
- QU( 2513925039981776336LLU), QU(16679413537926716955LLU),
- QU(12903302131714909434LLU), QU( 5581145789762985009LLU),
- QU(12325955044293303233LLU), QU(17216111180742141204LLU),
- QU( 6321919595276545740LLU), QU( 3507521147216174501LLU),
- QU( 9659194593319481840LLU), QU(11473976005975358326LLU),
- QU(14742730101435987026LLU), QU( 492845897709954780LLU),
- QU(16976371186162599676LLU), QU(17712703422837648655LLU),
- QU( 9881254778587061697LLU), QU( 8413223156302299551LLU),
- QU( 1563841828254089168LLU), QU( 9996032758786671975LLU),
- QU( 138877700583772667LLU), QU(13003043368574995989LLU),
- QU( 4390573668650456587LLU), QU( 8610287390568126755LLU),
- QU(15126904974266642199LLU), QU( 6703637238986057662LLU),
- QU( 2873075592956810157LLU), QU( 6035080933946049418LLU),
- QU(13382846581202353014LLU), QU( 7303971031814642463LLU),
- QU(18418024405307444267LLU), QU( 5847096731675404647LLU),
- QU( 4035880699639842500LLU), QU(11525348625112218478LLU),
- QU( 3041162365459574102LLU), QU( 2604734487727986558LLU),
- QU(15526341771636983145LLU), QU(14556052310697370254LLU),
- QU(12997787077930808155LLU), QU( 9601806501755554499LLU),
- QU(11349677952521423389LLU), QU(14956777807644899350LLU),
- QU(16559736957742852721LLU), QU(12360828274778140726LLU),
- QU( 6685373272009662513LLU), QU(16932258748055324130LLU),
- QU(15918051131954158508LLU), QU( 1692312913140790144LLU),
- QU( 546653826801637367LLU), QU( 5341587076045986652LLU),
- QU(14975057236342585662LLU), QU(12374976357340622412LLU),
- QU(10328833995181940552LLU), QU(12831807101710443149LLU),
- QU(10548514914382545716LLU), QU( 2217806727199715993LLU),
- QU(12627067369242845138LLU), QU( 4598965364035438158LLU),
- QU( 150923352751318171LLU), QU(14274109544442257283LLU),
- QU( 4696661475093863031LLU), QU( 1505764114384654516LLU),
- QU(10699185831891495147LLU), QU( 2392353847713620519LLU),
- QU( 3652870166711788383LLU), QU( 8640653276221911108LLU),
- QU( 3894077592275889704LLU), QU( 4918592872135964845LLU),
- QU(16379121273281400789LLU), QU(12058465483591683656LLU),
- QU(11250106829302924945LLU), QU( 1147537556296983005LLU),
- QU( 6376342756004613268LLU), QU(14967128191709280506LLU),
- QU(18007449949790627628LLU), QU( 9497178279316537841LLU),
- QU( 7920174844809394893LLU), QU(10037752595255719907LLU),
- QU(15875342784985217697LLU), QU(15311615921712850696LLU),
- QU( 9552902652110992950LLU), QU(14054979450099721140LLU),
- QU( 5998709773566417349LLU), QU(18027910339276320187LLU),
- QU( 8223099053868585554LLU), QU( 7842270354824999767LLU),
- QU( 4896315688770080292LLU), QU(12969320296569787895LLU),
- QU( 2674321489185759961LLU), QU( 4053615936864718439LLU),
- QU(11349775270588617578LLU), QU( 4743019256284553975LLU),
- QU( 5602100217469723769LLU), QU(14398995691411527813LLU),
- QU( 7412170493796825470LLU), QU( 836262406131744846LLU),
- QU( 8231086633845153022LLU), QU( 5161377920438552287LLU),
- QU( 8828731196169924949LLU), QU(16211142246465502680LLU),
- QU( 3307990879253687818LLU), QU( 5193405406899782022LLU),
- QU( 8510842117467566693LLU), QU( 6070955181022405365LLU),
- QU(14482950231361409799LLU), QU(12585159371331138077LLU),
- QU( 3511537678933588148LLU), QU( 2041849474531116417LLU),
- QU(10944936685095345792LLU), QU(18303116923079107729LLU),
- QU( 2720566371239725320LLU), QU( 4958672473562397622LLU),
- QU( 3032326668253243412LLU), QU(13689418691726908338LLU),
- QU( 1895205511728843996LLU), QU( 8146303515271990527LLU),
- QU(16507343500056113480LLU), QU( 473996939105902919LLU),
- QU( 9897686885246881481LLU), QU(14606433762712790575LLU),
- QU( 6732796251605566368LLU), QU( 1399778120855368916LLU),
- QU( 935023885182833777LLU), QU(16066282816186753477LLU),
- QU( 7291270991820612055LLU), QU(17530230393129853844LLU),
- QU(10223493623477451366LLU), QU(15841725630495676683LLU),
- QU(17379567246435515824LLU), QU( 8588251429375561971LLU),
- QU(18339511210887206423LLU), QU(17349587430725976100LLU),
- QU(12244876521394838088LLU), QU( 6382187714147161259LLU),
- QU(12335807181848950831LLU), QU(16948885622305460665LLU),
- QU(13755097796371520506LLU), QU(14806740373324947801LLU),
- QU( 4828699633859287703LLU), QU( 8209879281452301604LLU),
- QU(12435716669553736437LLU), QU(13970976859588452131LLU),
- QU( 6233960842566773148LLU), QU(12507096267900505759LLU),
- QU( 1198713114381279421LLU), QU(14989862731124149015LLU),
- QU(15932189508707978949LLU), QU( 2526406641432708722LLU),
- QU( 29187427817271982LLU), QU( 1499802773054556353LLU),
- QU(10816638187021897173LLU), QU( 5436139270839738132LLU),
- QU( 6659882287036010082LLU), QU( 2154048955317173697LLU),
- QU(10887317019333757642LLU), QU(16281091802634424955LLU),
- QU(10754549879915384901LLU), QU(10760611745769249815LLU),
- QU( 2161505946972504002LLU), QU( 5243132808986265107LLU),
- QU(10129852179873415416LLU), QU( 710339480008649081LLU),
- QU( 7802129453068808528LLU), QU(17967213567178907213LLU),
- QU(15730859124668605599LLU), QU(13058356168962376502LLU),
- QU( 3701224985413645909LLU), QU(14464065869149109264LLU),
- QU( 9959272418844311646LLU), QU(10157426099515958752LLU),
- QU(14013736814538268528LLU), QU(17797456992065653951LLU),
- QU(17418878140257344806LLU), QU(15457429073540561521LLU),
- QU( 2184426881360949378LLU), QU( 2062193041154712416LLU),
- QU( 8553463347406931661LLU), QU( 4913057625202871854LLU),
- QU( 2668943682126618425LLU), QU(17064444737891172288LLU),
- QU( 4997115903913298637LLU), QU(12019402608892327416LLU),
- QU(17603584559765897352LLU), QU(11367529582073647975LLU),
- QU( 8211476043518436050LLU), QU( 8676849804070323674LLU),
- QU(18431829230394475730LLU), QU(10490177861361247904LLU),
- QU( 9508720602025651349LLU), QU( 7409627448555722700LLU),
- QU( 5804047018862729008LLU), QU(11943858176893142594LLU),
- QU(11908095418933847092LLU), QU( 5415449345715887652LLU),
- QU( 1554022699166156407LLU), QU( 9073322106406017161LLU),
- QU( 7080630967969047082LLU), QU(18049736940860732943LLU),
- QU(12748714242594196794LLU), QU( 1226992415735156741LLU),
- QU(17900981019609531193LLU), QU(11720739744008710999LLU),
- QU( 3006400683394775434LLU), QU(11347974011751996028LLU),
- QU( 3316999628257954608LLU), QU( 8384484563557639101LLU),
- QU(18117794685961729767LLU), QU( 1900145025596618194LLU),
- QU(17459527840632892676LLU), QU( 5634784101865710994LLU),
- QU( 7918619300292897158LLU), QU( 3146577625026301350LLU),
- QU( 9955212856499068767LLU), QU( 1873995843681746975LLU),
- QU( 1561487759967972194LLU), QU( 8322718804375878474LLU),
- QU(11300284215327028366LLU), QU( 4667391032508998982LLU),
- QU( 9820104494306625580LLU), QU(17922397968599970610LLU),
- QU( 1784690461886786712LLU), QU(14940365084341346821LLU),
- QU( 5348719575594186181LLU), QU(10720419084507855261LLU),
- QU(14210394354145143274LLU), QU( 2426468692164000131LLU),
- QU(16271062114607059202LLU), QU(14851904092357070247LLU),
- QU( 6524493015693121897LLU), QU( 9825473835127138531LLU),
- QU(14222500616268569578LLU), QU(15521484052007487468LLU),
- QU(14462579404124614699LLU), QU(11012375590820665520LLU),
- QU(11625327350536084927LLU), QU(14452017765243785417LLU),
- QU( 9989342263518766305LLU), QU( 3640105471101803790LLU),
- QU( 4749866455897513242LLU), QU(13963064946736312044LLU),
- QU(10007416591973223791LLU), QU(18314132234717431115LLU),
- QU( 3286596588617483450LLU), QU( 7726163455370818765LLU),
- QU( 7575454721115379328LLU), QU( 5308331576437663422LLU),
- QU(18288821894903530934LLU), QU( 8028405805410554106LLU),
- QU(15744019832103296628LLU), QU( 149765559630932100LLU),
- QU( 6137705557200071977LLU), QU(14513416315434803615LLU),
- QU(11665702820128984473LLU), QU( 218926670505601386LLU),
- QU( 6868675028717769519LLU), QU(15282016569441512302LLU),
- QU( 5707000497782960236LLU), QU( 6671120586555079567LLU),
- QU( 2194098052618985448LLU), QU(16849577895477330978LLU),
- QU(12957148471017466283LLU), QU( 1997805535404859393LLU),
- QU( 1180721060263860490LLU), QU(13206391310193756958LLU),
- QU(12980208674461861797LLU), QU( 3825967775058875366LLU),
- QU(17543433670782042631LLU), QU( 1518339070120322730LLU),
- QU(16344584340890991669LLU), QU( 2611327165318529819LLU),
- QU(11265022723283422529LLU), QU( 4001552800373196817LLU),
- QU(14509595890079346161LLU), QU( 3528717165416234562LLU),
- QU(18153222571501914072LLU), QU( 9387182977209744425LLU),
- QU(10064342315985580021LLU), QU(11373678413215253977LLU),
- QU( 2308457853228798099LLU), QU( 9729042942839545302LLU),
- QU( 7833785471140127746LLU), QU( 6351049900319844436LLU),
- QU(14454610627133496067LLU), QU(12533175683634819111LLU),
- QU(15570163926716513029LLU), QU(13356980519185762498LLU)
+ KQU( 2100341266307895239), KQU( 8344256300489757943),
+ KQU(15687933285484243894), KQU( 8268620370277076319),
+ KQU(12371852309826545459), KQU( 8800491541730110238),
+ KQU(18113268950100835773), KQU( 2886823658884438119),
+ KQU( 3293667307248180724), KQU( 9307928143300172731),
+ KQU( 7688082017574293629), KQU( 900986224735166665),
+ KQU( 9977972710722265039), KQU( 6008205004994830552),
+ KQU( 546909104521689292), KQU( 7428471521869107594),
+ KQU(14777563419314721179), KQU(16116143076567350053),
+ KQU( 5322685342003142329), KQU( 4200427048445863473),
+ KQU( 4693092150132559146), KQU(13671425863759338582),
+ KQU( 6747117460737639916), KQU( 4732666080236551150),
+ KQU( 5912839950611941263), KQU( 3903717554504704909),
+ KQU( 2615667650256786818), KQU(10844129913887006352),
+ KQU(13786467861810997820), KQU(14267853002994021570),
+ KQU(13767807302847237439), KQU(16407963253707224617),
+ KQU( 4802498363698583497), KQU( 2523802839317209764),
+ KQU( 3822579397797475589), KQU( 8950320572212130610),
+ KQU( 3745623504978342534), KQU(16092609066068482806),
+ KQU( 9817016950274642398), KQU(10591660660323829098),
+ KQU(11751606650792815920), KQU( 5122873818577122211),
+ KQU(17209553764913936624), KQU( 6249057709284380343),
+ KQU(15088791264695071830), KQU(15344673071709851930),
+ KQU( 4345751415293646084), KQU( 2542865750703067928),
+ KQU(13520525127852368784), KQU(18294188662880997241),
+ KQU( 3871781938044881523), KQU( 2873487268122812184),
+ KQU(15099676759482679005), KQU(15442599127239350490),
+ KQU( 6311893274367710888), KQU( 3286118760484672933),
+ KQU( 4146067961333542189), KQU(13303942567897208770),
+ KQU( 8196013722255630418), KQU( 4437815439340979989),
+ KQU(15433791533450605135), KQU( 4254828956815687049),
+ KQU( 1310903207708286015), KQU(10529182764462398549),
+ KQU(14900231311660638810), KQU( 9727017277104609793),
+ KQU( 1821308310948199033), KQU(11628861435066772084),
+ KQU( 9469019138491546924), KQU( 3145812670532604988),
+ KQU( 9938468915045491919), KQU( 1562447430672662142),
+ KQU(13963995266697989134), KQU( 3356884357625028695),
+ KQU( 4499850304584309747), KQU( 8456825817023658122),
+ KQU(10859039922814285279), KQU( 8099512337972526555),
+ KQU( 348006375109672149), KQU(11919893998241688603),
+ KQU( 1104199577402948826), KQU(16689191854356060289),
+ KQU(10992552041730168078), KQU( 7243733172705465836),
+ KQU( 5668075606180319560), KQU(18182847037333286970),
+ KQU( 4290215357664631322), KQU( 4061414220791828613),
+ KQU(13006291061652989604), KQU( 7140491178917128798),
+ KQU(12703446217663283481), KQU( 5500220597564558267),
+ KQU(10330551509971296358), KQU(15958554768648714492),
+ KQU( 5174555954515360045), KQU( 1731318837687577735),
+ KQU( 3557700801048354857), KQU(13764012341928616198),
+ KQU(13115166194379119043), KQU( 7989321021560255519),
+ KQU( 2103584280905877040), KQU( 9230788662155228488),
+ KQU(16396629323325547654), KQU( 657926409811318051),
+ KQU(15046700264391400727), KQU( 5120132858771880830),
+ KQU( 7934160097989028561), KQU( 6963121488531976245),
+ KQU(17412329602621742089), KQU(15144843053931774092),
+ KQU(17204176651763054532), KQU(13166595387554065870),
+ KQU( 8590377810513960213), KQU( 5834365135373991938),
+ KQU( 7640913007182226243), KQU( 3479394703859418425),
+ KQU(16402784452644521040), KQU( 4993979809687083980),
+ KQU(13254522168097688865), KQU(15643659095244365219),
+ KQU( 5881437660538424982), KQU(11174892200618987379),
+ KQU( 254409966159711077), KQU(17158413043140549909),
+ KQU( 3638048789290376272), KQU( 1376816930299489190),
+ KQU( 4622462095217761923), KQU(15086407973010263515),
+ KQU(13253971772784692238), KQU( 5270549043541649236),
+ KQU(11182714186805411604), KQU(12283846437495577140),
+ KQU( 5297647149908953219), KQU(10047451738316836654),
+ KQU( 4938228100367874746), KQU(12328523025304077923),
+ KQU( 3601049438595312361), KQU( 9313624118352733770),
+ KQU(13322966086117661798), KQU(16660005705644029394),
+ KQU(11337677526988872373), KQU(13869299102574417795),
+ KQU(15642043183045645437), KQU( 3021755569085880019),
+ KQU( 4979741767761188161), KQU(13679979092079279587),
+ KQU( 3344685842861071743), KQU(13947960059899588104),
+ KQU( 305806934293368007), KQU( 5749173929201650029),
+ KQU(11123724852118844098), KQU(15128987688788879802),
+ KQU(15251651211024665009), KQU( 7689925933816577776),
+ KQU(16732804392695859449), KQU(17087345401014078468),
+ KQU(14315108589159048871), KQU( 4820700266619778917),
+ KQU(16709637539357958441), KQU( 4936227875177351374),
+ KQU( 2137907697912987247), KQU(11628565601408395420),
+ KQU( 2333250549241556786), KQU( 5711200379577778637),
+ KQU( 5170680131529031729), KQU(12620392043061335164),
+ KQU( 95363390101096078), KQU( 5487981914081709462),
+ KQU( 1763109823981838620), KQU( 3395861271473224396),
+ KQU( 1300496844282213595), KQU( 6894316212820232902),
+ KQU(10673859651135576674), KQU( 5911839658857903252),
+ KQU(17407110743387299102), KQU( 8257427154623140385),
+ KQU(11389003026741800267), KQU( 4070043211095013717),
+ KQU(11663806997145259025), KQU(15265598950648798210),
+ KQU( 630585789434030934), KQU( 3524446529213587334),
+ KQU( 7186424168495184211), KQU(10806585451386379021),
+ KQU(11120017753500499273), KQU( 1586837651387701301),
+ KQU(17530454400954415544), KQU( 9991670045077880430),
+ KQU( 7550997268990730180), KQU( 8640249196597379304),
+ KQU( 3522203892786893823), KQU(10401116549878854788),
+ KQU(13690285544733124852), KQU( 8295785675455774586),
+ KQU(15535716172155117603), KQU( 3112108583723722511),
+ KQU(17633179955339271113), KQU(18154208056063759375),
+ KQU( 1866409236285815666), KQU(13326075895396412882),
+ KQU( 8756261842948020025), KQU( 6281852999868439131),
+ KQU(15087653361275292858), KQU(10333923911152949397),
+ KQU( 5265567645757408500), KQU(12728041843210352184),
+ KQU( 6347959327507828759), KQU( 154112802625564758),
+ KQU(18235228308679780218), KQU( 3253805274673352418),
+ KQU( 4849171610689031197), KQU(17948529398340432518),
+ KQU(13803510475637409167), KQU(13506570190409883095),
+ KQU(15870801273282960805), KQU( 8451286481299170773),
+ KQU( 9562190620034457541), KQU( 8518905387449138364),
+ KQU(12681306401363385655), KQU( 3788073690559762558),
+ KQU( 5256820289573487769), KQU( 2752021372314875467),
+ KQU( 6354035166862520716), KQU( 4328956378309739069),
+ KQU( 449087441228269600), KQU( 5533508742653090868),
+ KQU( 1260389420404746988), KQU(18175394473289055097),
+ KQU( 1535467109660399420), KQU( 8818894282874061442),
+ KQU(12140873243824811213), KQU(15031386653823014946),
+ KQU( 1286028221456149232), KQU( 6329608889367858784),
+ KQU( 9419654354945132725), KQU( 6094576547061672379),
+ KQU(17706217251847450255), KQU( 1733495073065878126),
+ KQU(16918923754607552663), KQU( 8881949849954945044),
+ KQU(12938977706896313891), KQU(14043628638299793407),
+ KQU(18393874581723718233), KQU( 6886318534846892044),
+ KQU(14577870878038334081), KQU(13541558383439414119),
+ KQU(13570472158807588273), KQU(18300760537910283361),
+ KQU( 818368572800609205), KQU( 1417000585112573219),
+ KQU(12337533143867683655), KQU(12433180994702314480),
+ KQU( 778190005829189083), KQU(13667356216206524711),
+ KQU( 9866149895295225230), KQU(11043240490417111999),
+ KQU( 1123933826541378598), KQU( 6469631933605123610),
+ KQU(14508554074431980040), KQU(13918931242962026714),
+ KQU( 2870785929342348285), KQU(14786362626740736974),
+ KQU(13176680060902695786), KQU( 9591778613541679456),
+ KQU( 9097662885117436706), KQU( 749262234240924947),
+ KQU( 1944844067793307093), KQU( 4339214904577487742),
+ KQU( 8009584152961946551), KQU(16073159501225501777),
+ KQU( 3335870590499306217), KQU(17088312653151202847),
+ KQU( 3108893142681931848), KQU(16636841767202792021),
+ KQU(10423316431118400637), KQU( 8008357368674443506),
+ KQU(11340015231914677875), KQU(17687896501594936090),
+ KQU(15173627921763199958), KQU( 542569482243721959),
+ KQU(15071714982769812975), KQU( 4466624872151386956),
+ KQU( 1901780715602332461), KQU( 9822227742154351098),
+ KQU( 1479332892928648780), KQU( 6981611948382474400),
+ KQU( 7620824924456077376), KQU(14095973329429406782),
+ KQU( 7902744005696185404), KQU(15830577219375036920),
+ KQU(10287076667317764416), KQU(12334872764071724025),
+ KQU( 4419302088133544331), KQU(14455842851266090520),
+ KQU(12488077416504654222), KQU( 7953892017701886766),
+ KQU( 6331484925529519007), KQU( 4902145853785030022),
+ KQU(17010159216096443073), KQU(11945354668653886087),
+ KQU(15112022728645230829), KQU(17363484484522986742),
+ KQU( 4423497825896692887), KQU( 8155489510809067471),
+ KQU( 258966605622576285), KQU( 5462958075742020534),
+ KQU( 6763710214913276228), KQU( 2368935183451109054),
+ KQU(14209506165246453811), KQU( 2646257040978514881),
+ KQU( 3776001911922207672), KQU( 1419304601390147631),
+ KQU(14987366598022458284), KQU( 3977770701065815721),
+ KQU( 730820417451838898), KQU( 3982991703612885327),
+ KQU( 2803544519671388477), KQU(17067667221114424649),
+ KQU( 2922555119737867166), KQU( 1989477584121460932),
+ KQU(15020387605892337354), KQU( 9293277796427533547),
+ KQU(10722181424063557247), KQU(16704542332047511651),
+ KQU( 5008286236142089514), KQU(16174732308747382540),
+ KQU(17597019485798338402), KQU(13081745199110622093),
+ KQU( 8850305883842258115), KQU(12723629125624589005),
+ KQU( 8140566453402805978), KQU(15356684607680935061),
+ KQU(14222190387342648650), KQU(11134610460665975178),
+ KQU( 1259799058620984266), KQU(13281656268025610041),
+ KQU( 298262561068153992), KQU(12277871700239212922),
+ KQU(13911297774719779438), KQU(16556727962761474934),
+ KQU(17903010316654728010), KQU( 9682617699648434744),
+ KQU(14757681836838592850), KQU( 1327242446558524473),
+ KQU(11126645098780572792), KQU( 1883602329313221774),
+ KQU( 2543897783922776873), KQU(15029168513767772842),
+ KQU(12710270651039129878), KQU(16118202956069604504),
+ KQU(15010759372168680524), KQU( 2296827082251923948),
+ KQU(10793729742623518101), KQU(13829764151845413046),
+ KQU(17769301223184451213), KQU( 3118268169210783372),
+ KQU(17626204544105123127), KQU( 7416718488974352644),
+ KQU(10450751996212925994), KQU( 9352529519128770586),
+ KQU( 259347569641110140), KQU( 8048588892269692697),
+ KQU( 1774414152306494058), KQU(10669548347214355622),
+ KQU(13061992253816795081), KQU(18432677803063861659),
+ KQU( 8879191055593984333), KQU(12433753195199268041),
+ KQU(14919392415439730602), KQU( 6612848378595332963),
+ KQU( 6320986812036143628), KQU(10465592420226092859),
+ KQU( 4196009278962570808), KQU( 3747816564473572224),
+ KQU(17941203486133732898), KQU( 2350310037040505198),
+ KQU( 5811779859134370113), KQU(10492109599506195126),
+ KQU( 7699650690179541274), KQU( 1954338494306022961),
+ KQU(14095816969027231152), KQU( 5841346919964852061),
+ KQU(14945969510148214735), KQU( 3680200305887550992),
+ KQU( 6218047466131695792), KQU( 8242165745175775096),
+ KQU(11021371934053307357), KQU( 1265099502753169797),
+ KQU( 4644347436111321718), KQU( 3609296916782832859),
+ KQU( 8109807992218521571), KQU(18387884215648662020),
+ KQU(14656324896296392902), KQU(17386819091238216751),
+ KQU(17788300878582317152), KQU( 7919446259742399591),
+ KQU( 4466613134576358004), KQU(12928181023667938509),
+ KQU(13147446154454932030), KQU(16552129038252734620),
+ KQU( 8395299403738822450), KQU(11313817655275361164),
+ KQU( 434258809499511718), KQU( 2074882104954788676),
+ KQU( 7929892178759395518), KQU( 9006461629105745388),
+ KQU( 5176475650000323086), KQU(11128357033468341069),
+ KQU(12026158851559118955), KQU(14699716249471156500),
+ KQU( 448982497120206757), KQU( 4156475356685519900),
+ KQU( 6063816103417215727), KQU(10073289387954971479),
+ KQU( 8174466846138590962), KQU( 2675777452363449006),
+ KQU( 9090685420572474281), KQU( 6659652652765562060),
+ KQU(12923120304018106621), KQU(11117480560334526775),
+ KQU( 937910473424587511), KQU( 1838692113502346645),
+ KQU(11133914074648726180), KQU( 7922600945143884053),
+ KQU(13435287702700959550), KQU( 5287964921251123332),
+ KQU(11354875374575318947), KQU(17955724760748238133),
+ KQU(13728617396297106512), KQU( 4107449660118101255),
+ KQU( 1210269794886589623), KQU(11408687205733456282),
+ KQU( 4538354710392677887), KQU(13566803319341319267),
+ KQU(17870798107734050771), KQU( 3354318982568089135),
+ KQU( 9034450839405133651), KQU(13087431795753424314),
+ KQU( 950333102820688239), KQU( 1968360654535604116),
+ KQU(16840551645563314995), KQU( 8867501803892924995),
+ KQU(11395388644490626845), KQU( 1529815836300732204),
+ KQU(13330848522996608842), KQU( 1813432878817504265),
+ KQU( 2336867432693429560), KQU(15192805445973385902),
+ KQU( 2528593071076407877), KQU( 128459777936689248),
+ KQU( 9976345382867214866), KQU( 6208885766767996043),
+ KQU(14982349522273141706), KQU( 3099654362410737822),
+ KQU(13776700761947297661), KQU( 8806185470684925550),
+ KQU( 8151717890410585321), KQU( 640860591588072925),
+ KQU(14592096303937307465), KQU( 9056472419613564846),
+ KQU(14861544647742266352), KQU(12703771500398470216),
+ KQU( 3142372800384138465), KQU( 6201105606917248196),
+ KQU(18337516409359270184), KQU(15042268695665115339),
+ KQU(15188246541383283846), KQU(12800028693090114519),
+ KQU( 5992859621101493472), KQU(18278043971816803521),
+ KQU( 9002773075219424560), KQU( 7325707116943598353),
+ KQU( 7930571931248040822), KQU( 5645275869617023448),
+ KQU( 7266107455295958487), KQU( 4363664528273524411),
+ KQU(14313875763787479809), KQU(17059695613553486802),
+ KQU( 9247761425889940932), KQU(13704726459237593128),
+ KQU( 2701312427328909832), KQU(17235532008287243115),
+ KQU(14093147761491729538), KQU( 6247352273768386516),
+ KQU( 8268710048153268415), KQU( 7985295214477182083),
+ KQU(15624495190888896807), KQU( 3772753430045262788),
+ KQU( 9133991620474991698), KQU( 5665791943316256028),
+ KQU( 7551996832462193473), KQU(13163729206798953877),
+ KQU( 9263532074153846374), KQU( 1015460703698618353),
+ KQU(17929874696989519390), KQU(18257884721466153847),
+ KQU(16271867543011222991), KQU( 3905971519021791941),
+ KQU(16814488397137052085), KQU( 1321197685504621613),
+ KQU( 2870359191894002181), KQU(14317282970323395450),
+ KQU(13663920845511074366), KQU( 2052463995796539594),
+ KQU(14126345686431444337), KQU( 1727572121947022534),
+ KQU(17793552254485594241), KQU( 6738857418849205750),
+ KQU( 1282987123157442952), KQU(16655480021581159251),
+ KQU( 6784587032080183866), KQU(14726758805359965162),
+ KQU( 7577995933961987349), KQU(12539609320311114036),
+ KQU(10789773033385439494), KQU( 8517001497411158227),
+ KQU(10075543932136339710), KQU(14838152340938811081),
+ KQU( 9560840631794044194), KQU(17445736541454117475),
+ KQU(10633026464336393186), KQU(15705729708242246293),
+ KQU( 1117517596891411098), KQU( 4305657943415886942),
+ KQU( 4948856840533979263), KQU(16071681989041789593),
+ KQU(13723031429272486527), KQU( 7639567622306509462),
+ KQU(12670424537483090390), KQU( 9715223453097197134),
+ KQU( 5457173389992686394), KQU( 289857129276135145),
+ KQU(17048610270521972512), KQU( 692768013309835485),
+ KQU(14823232360546632057), KQU(18218002361317895936),
+ KQU( 3281724260212650204), KQU(16453957266549513795),
+ KQU( 8592711109774511881), KQU( 929825123473369579),
+ KQU(15966784769764367791), KQU( 9627344291450607588),
+ KQU(10849555504977813287), KQU( 9234566913936339275),
+ KQU( 6413807690366911210), KQU(10862389016184219267),
+ KQU(13842504799335374048), KQU( 1531994113376881174),
+ KQU( 2081314867544364459), KQU(16430628791616959932),
+ KQU( 8314714038654394368), KQU( 9155473892098431813),
+ KQU(12577843786670475704), KQU( 4399161106452401017),
+ KQU( 1668083091682623186), KQU( 1741383777203714216),
+ KQU( 2162597285417794374), KQU(15841980159165218736),
+ KQU( 1971354603551467079), KQU( 1206714764913205968),
+ KQU( 4790860439591272330), KQU(14699375615594055799),
+ KQU( 8374423871657449988), KQU(10950685736472937738),
+ KQU( 697344331343267176), KQU(10084998763118059810),
+ KQU(12897369539795983124), KQU(12351260292144383605),
+ KQU( 1268810970176811234), KQU( 7406287800414582768),
+ KQU( 516169557043807831), KQU( 5077568278710520380),
+ KQU( 3828791738309039304), KQU( 7721974069946943610),
+ KQU( 3534670260981096460), KQU( 4865792189600584891),
+ KQU(16892578493734337298), KQU( 9161499464278042590),
+ KQU(11976149624067055931), KQU(13219479887277343990),
+ KQU(14161556738111500680), KQU(14670715255011223056),
+ KQU( 4671205678403576558), KQU(12633022931454259781),
+ KQU(14821376219869187646), KQU( 751181776484317028),
+ KQU( 2192211308839047070), KQU(11787306362361245189),
+ KQU(10672375120744095707), KQU( 4601972328345244467),
+ KQU(15457217788831125879), KQU( 8464345256775460809),
+ KQU(10191938789487159478), KQU( 6184348739615197613),
+ KQU(11425436778806882100), KQU( 2739227089124319793),
+ KQU( 461464518456000551), KQU( 4689850170029177442),
+ KQU( 6120307814374078625), KQU(11153579230681708671),
+ KQU( 7891721473905347926), KQU(10281646937824872400),
+ KQU( 3026099648191332248), KQU( 8666750296953273818),
+ KQU(14978499698844363232), KQU(13303395102890132065),
+ KQU( 8182358205292864080), KQU(10560547713972971291),
+ KQU(11981635489418959093), KQU( 3134621354935288409),
+ KQU(11580681977404383968), KQU(14205530317404088650),
+ KQU( 5997789011854923157), KQU(13659151593432238041),
+ KQU(11664332114338865086), KQU( 7490351383220929386),
+ KQU( 7189290499881530378), KQU(15039262734271020220),
+ KQU( 2057217285976980055), KQU( 555570804905355739),
+ KQU(11235311968348555110), KQU(13824557146269603217),
+ KQU(16906788840653099693), KQU( 7222878245455661677),
+ KQU( 5245139444332423756), KQU( 4723748462805674292),
+ KQU(12216509815698568612), KQU(17402362976648951187),
+ KQU(17389614836810366768), KQU( 4880936484146667711),
+ KQU( 9085007839292639880), KQU(13837353458498535449),
+ KQU(11914419854360366677), KQU(16595890135313864103),
+ KQU( 6313969847197627222), KQU(18296909792163910431),
+ KQU(10041780113382084042), KQU( 2499478551172884794),
+ KQU(11057894246241189489), KQU( 9742243032389068555),
+ KQU(12838934582673196228), KQU(13437023235248490367),
+ KQU(13372420669446163240), KQU( 6752564244716909224),
+ KQU( 7157333073400313737), KQU(12230281516370654308),
+ KQU( 1182884552219419117), KQU( 2955125381312499218),
+ KQU(10308827097079443249), KQU( 1337648572986534958),
+ KQU(16378788590020343939), KQU( 108619126514420935),
+ KQU( 3990981009621629188), KQU( 5460953070230946410),
+ KQU( 9703328329366531883), KQU(13166631489188077236),
+ KQU( 1104768831213675170), KQU( 3447930458553877908),
+ KQU( 8067172487769945676), KQU( 5445802098190775347),
+ KQU( 3244840981648973873), KQU(17314668322981950060),
+ KQU( 5006812527827763807), KQU(18158695070225526260),
+ KQU( 2824536478852417853), KQU(13974775809127519886),
+ KQU( 9814362769074067392), KQU(17276205156374862128),
+ KQU(11361680725379306967), KQU( 3422581970382012542),
+ KQU(11003189603753241266), KQU(11194292945277862261),
+ KQU( 6839623313908521348), KQU(11935326462707324634),
+ KQU( 1611456788685878444), KQU(13112620989475558907),
+ KQU( 517659108904450427), KQU(13558114318574407624),
+ KQU(15699089742731633077), KQU( 4988979278862685458),
+ KQU( 8111373583056521297), KQU( 3891258746615399627),
+ KQU( 8137298251469718086), KQU(12748663295624701649),
+ KQU( 4389835683495292062), KQU( 5775217872128831729),
+ KQU( 9462091896405534927), KQU( 8498124108820263989),
+ KQU( 8059131278842839525), KQU(10503167994254090892),
+ KQU(11613153541070396656), KQU(18069248738504647790),
+ KQU( 570657419109768508), KQU( 3950574167771159665),
+ KQU( 5514655599604313077), KQU( 2908460854428484165),
+ KQU(10777722615935663114), KQU(12007363304839279486),
+ KQU( 9800646187569484767), KQU( 8795423564889864287),
+ KQU(14257396680131028419), KQU( 6405465117315096498),
+ KQU( 7939411072208774878), KQU(17577572378528990006),
+ KQU(14785873806715994850), KQU(16770572680854747390),
+ KQU(18127549474419396481), KQU(11637013449455757750),
+ KQU(14371851933996761086), KQU( 3601181063650110280),
+ KQU( 4126442845019316144), KQU(10198287239244320669),
+ KQU(18000169628555379659), KQU(18392482400739978269),
+ KQU( 6219919037686919957), KQU( 3610085377719446052),
+ KQU( 2513925039981776336), KQU(16679413537926716955),
+ KQU(12903302131714909434), KQU( 5581145789762985009),
+ KQU(12325955044293303233), KQU(17216111180742141204),
+ KQU( 6321919595276545740), KQU( 3507521147216174501),
+ KQU( 9659194593319481840), KQU(11473976005975358326),
+ KQU(14742730101435987026), KQU( 492845897709954780),
+ KQU(16976371186162599676), KQU(17712703422837648655),
+ KQU( 9881254778587061697), KQU( 8413223156302299551),
+ KQU( 1563841828254089168), KQU( 9996032758786671975),
+ KQU( 138877700583772667), KQU(13003043368574995989),
+ KQU( 4390573668650456587), KQU( 8610287390568126755),
+ KQU(15126904974266642199), KQU( 6703637238986057662),
+ KQU( 2873075592956810157), KQU( 6035080933946049418),
+ KQU(13382846581202353014), KQU( 7303971031814642463),
+ KQU(18418024405307444267), KQU( 5847096731675404647),
+ KQU( 4035880699639842500), KQU(11525348625112218478),
+ KQU( 3041162365459574102), KQU( 2604734487727986558),
+ KQU(15526341771636983145), KQU(14556052310697370254),
+ KQU(12997787077930808155), KQU( 9601806501755554499),
+ KQU(11349677952521423389), KQU(14956777807644899350),
+ KQU(16559736957742852721), KQU(12360828274778140726),
+ KQU( 6685373272009662513), KQU(16932258748055324130),
+ KQU(15918051131954158508), KQU( 1692312913140790144),
+ KQU( 546653826801637367), KQU( 5341587076045986652),
+ KQU(14975057236342585662), KQU(12374976357340622412),
+ KQU(10328833995181940552), KQU(12831807101710443149),
+ KQU(10548514914382545716), KQU( 2217806727199715993),
+ KQU(12627067369242845138), KQU( 4598965364035438158),
+ KQU( 150923352751318171), KQU(14274109544442257283),
+ KQU( 4696661475093863031), KQU( 1505764114384654516),
+ KQU(10699185831891495147), KQU( 2392353847713620519),
+ KQU( 3652870166711788383), KQU( 8640653276221911108),
+ KQU( 3894077592275889704), KQU( 4918592872135964845),
+ KQU(16379121273281400789), KQU(12058465483591683656),
+ KQU(11250106829302924945), KQU( 1147537556296983005),
+ KQU( 6376342756004613268), KQU(14967128191709280506),
+ KQU(18007449949790627628), KQU( 9497178279316537841),
+ KQU( 7920174844809394893), KQU(10037752595255719907),
+ KQU(15875342784985217697), KQU(15311615921712850696),
+ KQU( 9552902652110992950), KQU(14054979450099721140),
+ KQU( 5998709773566417349), KQU(18027910339276320187),
+ KQU( 8223099053868585554), KQU( 7842270354824999767),
+ KQU( 4896315688770080292), KQU(12969320296569787895),
+ KQU( 2674321489185759961), KQU( 4053615936864718439),
+ KQU(11349775270588617578), KQU( 4743019256284553975),
+ KQU( 5602100217469723769), KQU(14398995691411527813),
+ KQU( 7412170493796825470), KQU( 836262406131744846),
+ KQU( 8231086633845153022), KQU( 5161377920438552287),
+ KQU( 8828731196169924949), KQU(16211142246465502680),
+ KQU( 3307990879253687818), KQU( 5193405406899782022),
+ KQU( 8510842117467566693), KQU( 6070955181022405365),
+ KQU(14482950231361409799), KQU(12585159371331138077),
+ KQU( 3511537678933588148), KQU( 2041849474531116417),
+ KQU(10944936685095345792), KQU(18303116923079107729),
+ KQU( 2720566371239725320), KQU( 4958672473562397622),
+ KQU( 3032326668253243412), KQU(13689418691726908338),
+ KQU( 1895205511728843996), KQU( 8146303515271990527),
+ KQU(16507343500056113480), KQU( 473996939105902919),
+ KQU( 9897686885246881481), KQU(14606433762712790575),
+ KQU( 6732796251605566368), KQU( 1399778120855368916),
+ KQU( 935023885182833777), KQU(16066282816186753477),
+ KQU( 7291270991820612055), KQU(17530230393129853844),
+ KQU(10223493623477451366), KQU(15841725630495676683),
+ KQU(17379567246435515824), KQU( 8588251429375561971),
+ KQU(18339511210887206423), KQU(17349587430725976100),
+ KQU(12244876521394838088), KQU( 6382187714147161259),
+ KQU(12335807181848950831), KQU(16948885622305460665),
+ KQU(13755097796371520506), KQU(14806740373324947801),
+ KQU( 4828699633859287703), KQU( 8209879281452301604),
+ KQU(12435716669553736437), KQU(13970976859588452131),
+ KQU( 6233960842566773148), KQU(12507096267900505759),
+ KQU( 1198713114381279421), KQU(14989862731124149015),
+ KQU(15932189508707978949), KQU( 2526406641432708722),
+ KQU( 29187427817271982), KQU( 1499802773054556353),
+ KQU(10816638187021897173), KQU( 5436139270839738132),
+ KQU( 6659882287036010082), KQU( 2154048955317173697),
+ KQU(10887317019333757642), KQU(16281091802634424955),
+ KQU(10754549879915384901), KQU(10760611745769249815),
+ KQU( 2161505946972504002), KQU( 5243132808986265107),
+ KQU(10129852179873415416), KQU( 710339480008649081),
+ KQU( 7802129453068808528), KQU(17967213567178907213),
+ KQU(15730859124668605599), KQU(13058356168962376502),
+ KQU( 3701224985413645909), KQU(14464065869149109264),
+ KQU( 9959272418844311646), KQU(10157426099515958752),
+ KQU(14013736814538268528), KQU(17797456992065653951),
+ KQU(17418878140257344806), KQU(15457429073540561521),
+ KQU( 2184426881360949378), KQU( 2062193041154712416),
+ KQU( 8553463347406931661), KQU( 4913057625202871854),
+ KQU( 2668943682126618425), KQU(17064444737891172288),
+ KQU( 4997115903913298637), KQU(12019402608892327416),
+ KQU(17603584559765897352), KQU(11367529582073647975),
+ KQU( 8211476043518436050), KQU( 8676849804070323674),
+ KQU(18431829230394475730), KQU(10490177861361247904),
+ KQU( 9508720602025651349), KQU( 7409627448555722700),
+ KQU( 5804047018862729008), KQU(11943858176893142594),
+ KQU(11908095418933847092), KQU( 5415449345715887652),
+ KQU( 1554022699166156407), KQU( 9073322106406017161),
+ KQU( 7080630967969047082), KQU(18049736940860732943),
+ KQU(12748714242594196794), KQU( 1226992415735156741),
+ KQU(17900981019609531193), KQU(11720739744008710999),
+ KQU( 3006400683394775434), KQU(11347974011751996028),
+ KQU( 3316999628257954608), KQU( 8384484563557639101),
+ KQU(18117794685961729767), KQU( 1900145025596618194),
+ KQU(17459527840632892676), KQU( 5634784101865710994),
+ KQU( 7918619300292897158), KQU( 3146577625026301350),
+ KQU( 9955212856499068767), KQU( 1873995843681746975),
+ KQU( 1561487759967972194), KQU( 8322718804375878474),
+ KQU(11300284215327028366), KQU( 4667391032508998982),
+ KQU( 9820104494306625580), KQU(17922397968599970610),
+ KQU( 1784690461886786712), KQU(14940365084341346821),
+ KQU( 5348719575594186181), KQU(10720419084507855261),
+ KQU(14210394354145143274), KQU( 2426468692164000131),
+ KQU(16271062114607059202), KQU(14851904092357070247),
+ KQU( 6524493015693121897), KQU( 9825473835127138531),
+ KQU(14222500616268569578), KQU(15521484052007487468),
+ KQU(14462579404124614699), KQU(11012375590820665520),
+ KQU(11625327350536084927), KQU(14452017765243785417),
+ KQU( 9989342263518766305), KQU( 3640105471101803790),
+ KQU( 4749866455897513242), KQU(13963064946736312044),
+ KQU(10007416591973223791), KQU(18314132234717431115),
+ KQU( 3286596588617483450), KQU( 7726163455370818765),
+ KQU( 7575454721115379328), KQU( 5308331576437663422),
+ KQU(18288821894903530934), KQU( 8028405805410554106),
+ KQU(15744019832103296628), KQU( 149765559630932100),
+ KQU( 6137705557200071977), KQU(14513416315434803615),
+ KQU(11665702820128984473), KQU( 218926670505601386),
+ KQU( 6868675028717769519), KQU(15282016569441512302),
+ KQU( 5707000497782960236), KQU( 6671120586555079567),
+ KQU( 2194098052618985448), KQU(16849577895477330978),
+ KQU(12957148471017466283), KQU( 1997805535404859393),
+ KQU( 1180721060263860490), KQU(13206391310193756958),
+ KQU(12980208674461861797), KQU( 3825967775058875366),
+ KQU(17543433670782042631), KQU( 1518339070120322730),
+ KQU(16344584340890991669), KQU( 2611327165318529819),
+ KQU(11265022723283422529), KQU( 4001552800373196817),
+ KQU(14509595890079346161), KQU( 3528717165416234562),
+ KQU(18153222571501914072), KQU( 9387182977209744425),
+ KQU(10064342315985580021), KQU(11373678413215253977),
+ KQU( 2308457853228798099), KQU( 9729042942839545302),
+ KQU( 7833785471140127746), KQU( 6351049900319844436),
+ KQU(14454610627133496067), KQU(12533175683634819111),
+ KQU(15570163926716513029), KQU(13356980519185762498)
};
-TEST_BEGIN(test_gen_rand_32)
-{
+TEST_BEGIN(test_gen_rand_32) {
uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
int i;
@@ -1484,8 +1483,7 @@ TEST_BEGIN(test_gen_rand_32)
}
TEST_END
-TEST_BEGIN(test_by_array_32)
-{
+TEST_BEGIN(test_by_array_32) {
uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
int i;
@@ -1520,8 +1518,7 @@ TEST_BEGIN(test_by_array_32)
}
TEST_END
-TEST_BEGIN(test_gen_rand_64)
-{
+TEST_BEGIN(test_gen_rand_64) {
uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
int i;
@@ -1543,21 +1540,20 @@ TEST_BEGIN(test_gen_rand_64)
}
r = gen_rand64(ctx);
assert_u64_eq(r, array64[i],
- "Mismatch at array64[%d]=%"PRIx64", gen=%"PRIx64, i,
+ "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
array64[i], r);
}
for (i = 0; i < COUNT_2; i++) {
r = gen_rand64(ctx);
assert_u64_eq(r, array64_2[i],
- "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64"", i,
+ "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
array64_2[i], r);
}
fini_gen_rand(ctx);
}
TEST_END
-TEST_BEGIN(test_by_array_64)
-{
+TEST_BEGIN(test_by_array_64) {
uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
int i;
@@ -1580,13 +1576,13 @@ TEST_BEGIN(test_by_array_64)
}
r = gen_rand64(ctx);
assert_u64_eq(r, array64[i],
- "Mismatch at array64[%d]=%"PRIx64" gen=%"PRIx64, i,
+ "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
array64[i], r);
}
for (i = 0; i < COUNT_2; i++) {
r = gen_rand64(ctx);
assert_u64_eq(r, array64_2[i],
- "Mismatch at array64_2[%d]=%"PRIx64" gen=%"PRIx64, i,
+ "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
array64_2[i], r);
}
fini_gen_rand(ctx);
@@ -1594,12 +1590,10 @@ TEST_BEGIN(test_by_array_64)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_gen_rand_32,
test_by_array_32,
test_gen_rand_64,
- test_by_array_64));
+ test_by_array_64);
}
diff --git a/deps/jemalloc/test/unit/a0.c b/deps/jemalloc/test/unit/a0.c
new file mode 100644
index 000000000..a27ab3f42
--- /dev/null
+++ b/deps/jemalloc/test/unit/a0.c
@@ -0,0 +1,16 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_a0) {
+ void *p;
+
+ p = a0malloc(1);
+ assert_ptr_not_null(p, "Unexpected a0malloc() error");
+ a0dalloc(p);
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_malloc_init(
+ test_a0);
+}
diff --git a/deps/jemalloc/test/unit/arena_reset.c b/deps/jemalloc/test/unit/arena_reset.c
new file mode 100644
index 000000000..f5fb24d1e
--- /dev/null
+++ b/deps/jemalloc/test/unit/arena_reset.c
@@ -0,0 +1,344 @@
+#ifndef ARENA_RESET_PROF_C_
+#include "test/jemalloc_test.h"
+#endif
+
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/rtree.h"
+
+#include "test/extent_hooks.h"
+
+static unsigned
+get_nsizes_impl(const char *cmd) {
+ unsigned ret;
+ size_t z;
+
+ z = sizeof(unsigned);
+ assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+ "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+ return ret;
+}
+
+static unsigned
+get_nsmall(void) {
+ return get_nsizes_impl("arenas.nbins");
+}
+
+static unsigned
+get_nlarge(void) {
+ return get_nsizes_impl("arenas.nlextents");
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind) {
+ size_t ret;
+ size_t z;
+ size_t mib[4];
+ size_t miblen = 4;
+
+ z = sizeof(size_t);
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = ind;
+ z = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+ return ret;
+}
+
+static size_t
+get_small_size(size_t ind) {
+ return get_size_impl("arenas.bin.0.size", ind);
+}
+
+static size_t
+get_large_size(size_t ind) {
+ return get_size_impl("arenas.lextent.0.size", ind);
+}
+
+/* Like ivsalloc(), but safe to call on discarded allocations. */
+static size_t
+vsalloc(tsdn_t *tsdn, const void *ptr) {
+ rtree_ctx_t rtree_ctx_fallback;
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+ extent_t *extent;
+ szind_t szind;
+ if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
+ (uintptr_t)ptr, false, &extent, &szind)) {
+ return 0;
+ }
+
+ if (extent == NULL) {
+ return 0;
+ }
+ if (extent_state_get(extent) != extent_state_active) {
+ return 0;
+ }
+
+ if (szind == NSIZES) {
+ return 0;
+ }
+
+ return sz_index2size(szind);
+}
+
+static unsigned
+do_arena_create(extent_hooks_t *h) {
+ unsigned arena_ind;
+ size_t sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+ (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
+ "Unexpected mallctl() failure");
+ return arena_ind;
+}
+
+static void
+do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
+#define NLARGE 32
+ unsigned nsmall, nlarge, i;
+ size_t sz;
+ int flags;
+ tsdn_t *tsdn;
+
+ flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+ nsmall = get_nsmall();
+ nlarge = get_nlarge() > NLARGE ? NLARGE : get_nlarge();
+ *nptrs = nsmall + nlarge;
+ *ptrs = (void **)malloc(*nptrs * sizeof(void *));
+ assert_ptr_not_null(*ptrs, "Unexpected malloc() failure");
+
+ /* Allocate objects with a wide range of sizes. */
+ for (i = 0; i < nsmall; i++) {
+ sz = get_small_size(i);
+ (*ptrs)[i] = mallocx(sz, flags);
+ assert_ptr_not_null((*ptrs)[i],
+ "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+ }
+ for (i = 0; i < nlarge; i++) {
+ sz = get_large_size(i);
+ (*ptrs)[nsmall + i] = mallocx(sz, flags);
+ assert_ptr_not_null((*ptrs)[i],
+ "Unexpected mallocx(%zu, %#x) failure", sz, flags);
+ }
+
+ tsdn = tsdn_fetch();
+
+ /* Verify allocations. */
+ for (i = 0; i < *nptrs; i++) {
+ assert_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
+ "Allocation should have queryable size");
+ }
+}
+
+static void
+do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
+ tsdn_t *tsdn;
+ unsigned i;
+
+ tsdn = tsdn_fetch();
+
+ if (have_background_thread) {
+ malloc_mutex_lock(tsdn,
+ &background_thread_info[arena_ind % ncpus].mtx);
+ }
+ /* Verify allocations no longer exist. */
+ for (i = 0; i < nptrs; i++) {
+ assert_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
+ "Allocation should no longer exist");
+ }
+ if (have_background_thread) {
+ malloc_mutex_unlock(tsdn,
+ &background_thread_info[arena_ind % ncpus].mtx);
+ }
+
+ free(ptrs);
+}
+
+static void
+do_arena_reset_destroy(const char *name, unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen;
+
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib(name, mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_arena_reset(unsigned arena_ind) {
+ do_arena_reset_destroy("arena.0.reset", arena_ind);
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+ do_arena_reset_destroy("arena.0.destroy", arena_ind);
+}
+
+TEST_BEGIN(test_arena_reset) {
+ unsigned arena_ind;
+ void **ptrs;
+ unsigned nptrs;
+
+ arena_ind = do_arena_create(NULL);
+ do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+ do_arena_reset(arena_ind);
+ do_arena_reset_post(ptrs, nptrs, arena_ind);
+}
+TEST_END
+
+static bool
+arena_i_initialized(unsigned arena_ind, bool refresh) {
+ bool initialized;
+ size_t mib[3];
+ size_t miblen, sz;
+
+ if (refresh) {
+ uint64_t epoch = 1;
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+ sizeof(epoch)), 0, "Unexpected mallctl() failure");
+ }
+
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ sz = sizeof(initialized);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
+ 0), 0, "Unexpected mallctlbymib() failure");
+
+ return initialized;
+}
+
+TEST_BEGIN(test_arena_destroy_initial) {
+ assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+ "Destroyed arena stats should not be initialized");
+}
+TEST_END
+
+TEST_BEGIN(test_arena_destroy_hooks_default) {
+ unsigned arena_ind, arena_ind_another, arena_ind_prev;
+ void **ptrs;
+ unsigned nptrs;
+
+ arena_ind = do_arena_create(NULL);
+ do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+
+ assert_false(arena_i_initialized(arena_ind, false),
+ "Arena stats should not be initialized");
+ assert_true(arena_i_initialized(arena_ind, true),
+ "Arena stats should be initialized");
+
+ /*
+ * Create another arena before destroying one, to better verify arena
+ * index reuse.
+ */
+ arena_ind_another = do_arena_create(NULL);
+
+ do_arena_destroy(arena_ind);
+
+ assert_false(arena_i_initialized(arena_ind, true),
+ "Arena stats should not be initialized");
+ assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+ "Destroyed arena stats should be initialized");
+
+ do_arena_reset_post(ptrs, nptrs, arena_ind);
+
+ arena_ind_prev = arena_ind;
+ arena_ind = do_arena_create(NULL);
+ do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+ assert_u_eq(arena_ind, arena_ind_prev,
+ "Arena index should have been recycled");
+ do_arena_destroy(arena_ind);
+ do_arena_reset_post(ptrs, nptrs, arena_ind);
+
+ do_arena_destroy(arena_ind_another);
+}
+TEST_END
+
+/*
+ * Actually unmap extents, regardless of opt_retain, so that attempts to access
+ * a destroyed arena's memory will segfault.
+ */
+static bool
+extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ bool committed, unsigned arena_ind) {
+ TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+ "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+ "true" : "false", arena_ind);
+ assert_ptr_eq(extent_hooks, &hooks,
+ "extent_hooks should be same as pointer used to set hooks");
+ assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
+ "Wrong hook function");
+ called_dalloc = true;
+ if (!try_dalloc) {
+ return true;
+ }
+ pages_unmap(addr, size);
+ did_dalloc = true;
+ return false;
+}
+
+static extent_hooks_t hooks_orig;
+
+static extent_hooks_t hooks_unmap = {
+ extent_alloc_hook,
+ extent_dalloc_unmap, /* dalloc */
+ extent_destroy_hook,
+ extent_commit_hook,
+ extent_decommit_hook,
+ extent_purge_lazy_hook,
+ extent_purge_forced_hook,
+ extent_split_hook,
+ extent_merge_hook
+};
+
+TEST_BEGIN(test_arena_destroy_hooks_unmap) {
+ unsigned arena_ind;
+ void **ptrs;
+ unsigned nptrs;
+
+ extent_hooks_prep();
+ try_decommit = false;
+ memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+ memcpy(&hooks, &hooks_unmap, sizeof(extent_hooks_t));
+
+ did_alloc = false;
+ arena_ind = do_arena_create(&hooks);
+ do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+
+ assert_true(did_alloc, "Expected alloc");
+
+ assert_false(arena_i_initialized(arena_ind, false),
+ "Arena stats should not be initialized");
+ assert_true(arena_i_initialized(arena_ind, true),
+ "Arena stats should be initialized");
+
+ did_dalloc = false;
+ do_arena_destroy(arena_ind);
+ assert_true(did_dalloc, "Expected dalloc");
+
+ assert_false(arena_i_initialized(arena_ind, true),
+ "Arena stats should not be initialized");
+ assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+ "Destroyed arena stats should be initialized");
+
+ do_arena_reset_post(ptrs, nptrs, arena_ind);
+
+ memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_arena_reset,
+ test_arena_destroy_initial,
+ test_arena_destroy_hooks_default,
+ test_arena_destroy_hooks_unmap);
+}
diff --git a/deps/jemalloc/test/unit/arena_reset_prof.c b/deps/jemalloc/test/unit/arena_reset_prof.c
new file mode 100644
index 000000000..38d801240
--- /dev/null
+++ b/deps/jemalloc/test/unit/arena_reset_prof.c
@@ -0,0 +1,4 @@
+#include "test/jemalloc_test.h"
+#define ARENA_RESET_PROF_C_
+
+#include "arena_reset.c"
diff --git a/deps/jemalloc/test/unit/arena_reset_prof.sh b/deps/jemalloc/test/unit/arena_reset_prof.sh
new file mode 100644
index 000000000..041dc1c35
--- /dev/null
+++ b/deps/jemalloc/test/unit/arena_reset_prof.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="prof:true,lg_prof_sample:0"
diff --git a/deps/jemalloc/test/unit/atomic.c b/deps/jemalloc/test/unit/atomic.c
new file mode 100644
index 000000000..572d8d23f
--- /dev/null
+++ b/deps/jemalloc/test/unit/atomic.c
@@ -0,0 +1,229 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * We *almost* have consistent short names (e.g. "u32" for uint32_t, "b" for
+ * bool, etc. The one exception is that the short name for void * is "p" in
+ * some places and "ptr" in others. In the long run it would be nice to unify
+ * these, but in the short run we'll use this shim.
+ */
+#define assert_p_eq assert_ptr_eq
+
+/*
+ * t: the non-atomic type, like "uint32_t".
+ * ta: the short name for the type, like "u32".
+ * val[1,2,3]: Values of the given type. The CAS tests use val2 for expected,
+ * and val3 for desired.
+ */
+
+#define DO_TESTS(t, ta, val1, val2, val3) do { \
+ t val; \
+ t expected; \
+ bool success; \
+ /* This (along with the load below) also tests ATOMIC_LOAD. */ \
+ atomic_##ta##_t atom = ATOMIC_INIT(val1); \
+ \
+ /* ATOMIC_INIT and load. */ \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, "Load or init failed"); \
+ \
+ /* Store. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ atomic_store_##ta(&atom, val2, ATOMIC_RELAXED); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val2, val, "Store failed"); \
+ \
+ /* Exchange. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, "Exchange returned invalid value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val2, val, "Exchange store invalid value"); \
+ \
+ /* \
+ * Weak CAS. Spurious failures are allowed, so we loop a few \
+ * times. \
+ */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ success = false; \
+ for (int i = 0; i < 10 && !success; i++) { \
+ expected = val2; \
+ success = atomic_compare_exchange_weak_##ta(&atom, \
+ &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, expected, \
+ "CAS should update expected"); \
+ } \
+ assert_b_eq(val1 == val2, success, \
+ "Weak CAS did the wrong state update"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ if (success) { \
+ assert_##ta##_eq(val3, val, \
+ "Successful CAS should update atomic"); \
+ } else { \
+ assert_##ta##_eq(val1, val, \
+ "Unsuccessful CAS should not update atomic"); \
+ } \
+ \
+ /* Strong CAS. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ expected = val2; \
+ success = atomic_compare_exchange_strong_##ta(&atom, &expected, \
+ val3, ATOMIC_RELAXED, ATOMIC_RELAXED); \
+ assert_b_eq(val1 == val2, success, \
+ "Strong CAS did the wrong state update"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ if (success) { \
+ assert_##ta##_eq(val3, val, \
+ "Successful CAS should update atomic"); \
+ } else { \
+ assert_##ta##_eq(val1, val, \
+ "Unsuccessful CAS should not update atomic"); \
+ } \
+ \
+ \
+} while (0)
+
+#define DO_INTEGER_TESTS(t, ta, val1, val2) do { \
+ atomic_##ta##_t atom; \
+ t val; \
+ \
+ /* Fetch-add. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, \
+ "Fetch-add should return previous value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1 + val2, val, \
+ "Fetch-add should update atomic"); \
+ \
+ /* Fetch-sub. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, \
+ "Fetch-sub should return previous value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1 - val2, val, \
+ "Fetch-sub should update atomic"); \
+ \
+ /* Fetch-and. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, \
+ "Fetch-and should return previous value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1 & val2, val, \
+ "Fetch-and should update atomic"); \
+ \
+ /* Fetch-or. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, \
+ "Fetch-or should return previous value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1 | val2, val, \
+ "Fetch-or should update atomic"); \
+ \
+ /* Fetch-xor. */ \
+ atomic_store_##ta(&atom, val1, ATOMIC_RELAXED); \
+ val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1, val, \
+ "Fetch-xor should return previous value"); \
+ val = atomic_load_##ta(&atom, ATOMIC_RELAXED); \
+ assert_##ta##_eq(val1 ^ val2, val, \
+ "Fetch-xor should update atomic"); \
+} while (0)
+
+#define TEST_STRUCT(t, ta) \
+typedef struct { \
+ t val1; \
+ t val2; \
+ t val3; \
+} ta##_test_t;
+
+#define TEST_CASES(t) { \
+ {(t)-1, (t)-1, (t)-2}, \
+ {(t)-1, (t) 0, (t)-2}, \
+ {(t)-1, (t) 1, (t)-2}, \
+ \
+ {(t) 0, (t)-1, (t)-2}, \
+ {(t) 0, (t) 0, (t)-2}, \
+ {(t) 0, (t) 1, (t)-2}, \
+ \
+ {(t) 1, (t)-1, (t)-2}, \
+ {(t) 1, (t) 0, (t)-2}, \
+ {(t) 1, (t) 1, (t)-2}, \
+ \
+ {(t)0, (t)-(1 << 22), (t)-2}, \
+ {(t)0, (t)(1 << 22), (t)-2}, \
+ {(t)(1 << 22), (t)-(1 << 22), (t)-2}, \
+ {(t)(1 << 22), (t)(1 << 22), (t)-2} \
+}
+
+#define TEST_BODY(t, ta) do { \
+ const ta##_test_t tests[] = TEST_CASES(t); \
+ for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \
+ ta##_test_t test = tests[i]; \
+ DO_TESTS(t, ta, test.val1, test.val2, test.val3); \
+ } \
+} while (0)
+
+#define INTEGER_TEST_BODY(t, ta) do { \
+ const ta##_test_t tests[] = TEST_CASES(t); \
+ for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { \
+ ta##_test_t test = tests[i]; \
+ DO_TESTS(t, ta, test.val1, test.val2, test.val3); \
+ DO_INTEGER_TESTS(t, ta, test.val1, test.val2); \
+ } \
+} while (0)
+
+TEST_STRUCT(uint64_t, u64);
+TEST_BEGIN(test_atomic_u64) {
+#if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+ test_skip("64-bit atomic operations not supported");
+#else
+ INTEGER_TEST_BODY(uint64_t, u64);
+#endif
+}
+TEST_END
+
+
+TEST_STRUCT(uint32_t, u32);
+TEST_BEGIN(test_atomic_u32) {
+ INTEGER_TEST_BODY(uint32_t, u32);
+}
+TEST_END
+
+TEST_STRUCT(void *, p);
+TEST_BEGIN(test_atomic_p) {
+ TEST_BODY(void *, p);
+}
+TEST_END
+
+TEST_STRUCT(size_t, zu);
+TEST_BEGIN(test_atomic_zu) {
+ INTEGER_TEST_BODY(size_t, zu);
+}
+TEST_END
+
+TEST_STRUCT(ssize_t, zd);
+TEST_BEGIN(test_atomic_zd) {
+ INTEGER_TEST_BODY(ssize_t, zd);
+}
+TEST_END
+
+
+TEST_STRUCT(unsigned, u);
+TEST_BEGIN(test_atomic_u) {
+ INTEGER_TEST_BODY(unsigned, u);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_atomic_u64,
+ test_atomic_u32,
+ test_atomic_p,
+ test_atomic_zu,
+ test_atomic_zd,
+ test_atomic_u);
+}
diff --git a/deps/jemalloc/test/unit/background_thread.c b/deps/jemalloc/test/unit/background_thread.c
new file mode 100644
index 000000000..f7bd37c42
--- /dev/null
+++ b/deps/jemalloc/test/unit/background_thread.c
@@ -0,0 +1,119 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/util.h"
+
+static void
+test_switch_background_thread_ctl(bool new_val) {
+ bool e0, e1;
+ size_t sz = sizeof(bool);
+
+ e1 = new_val;
+ assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+ &e1, sz), 0, "Unexpected mallctl() failure");
+ assert_b_eq(e0, !e1,
+ "background_thread should be %d before.\n", !e1);
+ if (e1) {
+ assert_zu_gt(n_background_threads, 0,
+ "Number of background threads should be non zero.\n");
+ } else {
+ assert_zu_eq(n_background_threads, 0,
+ "Number of background threads should be zero.\n");
+ }
+}
+
+static void
+test_repeat_background_thread_ctl(bool before) {
+ bool e0, e1;
+ size_t sz = sizeof(bool);
+
+ e1 = before;
+ assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+ &e1, sz), 0, "Unexpected mallctl() failure");
+ assert_b_eq(e0, before,
+ "background_thread should be %d.\n", before);
+ if (e1) {
+ assert_zu_gt(n_background_threads, 0,
+ "Number of background threads should be non zero.\n");
+ } else {
+ assert_zu_eq(n_background_threads, 0,
+ "Number of background threads should be zero.\n");
+ }
+}
+
+TEST_BEGIN(test_background_thread_ctl) {
+ test_skip_if(!have_background_thread);
+
+ bool e0, e1;
+ size_t sz = sizeof(bool);
+
+ assert_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("background_thread", (void *)&e1, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+ assert_b_eq(e0, e1,
+ "Default and opt.background_thread does not match.\n");
+ if (e0) {
+ test_switch_background_thread_ctl(false);
+ }
+ assert_zu_eq(n_background_threads, 0,
+ "Number of background threads should be 0.\n");
+
+ for (unsigned i = 0; i < 4; i++) {
+ test_switch_background_thread_ctl(true);
+ test_repeat_background_thread_ctl(true);
+ test_repeat_background_thread_ctl(true);
+
+ test_switch_background_thread_ctl(false);
+ test_repeat_background_thread_ctl(false);
+ test_repeat_background_thread_ctl(false);
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_background_thread_running) {
+ test_skip_if(!have_background_thread);
+ test_skip_if(!config_stats);
+
+#if defined(JEMALLOC_BACKGROUND_THREAD)
+ tsd_t *tsd = tsd_fetch();
+ background_thread_info_t *info = &background_thread_info[0];
+
+ test_repeat_background_thread_ctl(false);
+ test_switch_background_thread_ctl(true);
+ assert_b_eq(info->state, background_thread_started,
+ "Background_thread did not start.\n");
+
+ nstime_t start, now;
+ nstime_init(&start, 0);
+ nstime_update(&start);
+
+ bool ran = false;
+ while (true) {
+ malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+ if (info->tot_n_runs > 0) {
+ ran = true;
+ }
+ malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+ if (ran) {
+ break;
+ }
+
+ nstime_init(&now, 0);
+ nstime_update(&now);
+ nstime_subtract(&now, &start);
+ assert_u64_lt(nstime_sec(&now), 1000,
+ "Background threads did not run for 1000 seconds.");
+ sleep(1);
+ }
+ test_switch_background_thread_ctl(false);
+#endif
+}
+TEST_END
+
+int
+main(void) {
+ /* Background_thread creation tests reentrancy naturally. */
+ return test_no_reentrancy(
+ test_background_thread_ctl,
+ test_background_thread_running);
+}
diff --git a/deps/jemalloc/test/unit/background_thread_enable.c b/deps/jemalloc/test/unit/background_thread_enable.c
new file mode 100644
index 000000000..ff95e672c
--- /dev/null
+++ b/deps/jemalloc/test/unit/background_thread_enable.c
@@ -0,0 +1,83 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
+
+TEST_BEGIN(test_deferred) {
+ test_skip_if(!have_background_thread);
+
+ unsigned id;
+ size_t sz_u = sizeof(unsigned);
+
+ /*
+ * 10 here is somewhat arbitrary, except insofar as we want to ensure
+ * that the number of background threads is smaller than the number of
+ * arenas. I'll ragequit long before we have to spin up 10 threads per
+ * cpu to handle background purging, so this is a conservative
+ * approximation.
+ */
+ for (unsigned i = 0; i < 10 * ncpus; i++) {
+ assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+ "Failed to create arena");
+ }
+
+ bool enable = true;
+ size_t sz_b = sizeof(bool);
+ assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+ "Failed to enable background threads");
+ enable = false;
+ assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+ "Failed to disable background threads");
+}
+TEST_END
+
+TEST_BEGIN(test_max_background_threads) {
+ test_skip_if(!have_background_thread);
+
+ size_t maxt;
+ size_t opt_maxt;
+ size_t sz_m = sizeof(maxt);
+ assert_d_eq(mallctl("opt.max_background_threads",
+ &opt_maxt, &sz_m, NULL, 0), 0,
+ "Failed to get opt.max_background_threads");
+ assert_d_eq(mallctl("max_background_threads", &maxt, &sz_m, NULL, 0), 0,
+ "Failed to get max background threads");
+ assert_zu_eq(20, maxt, "should be ncpus");
+ assert_zu_eq(opt_maxt, maxt,
+ "max_background_threads and "
+ "opt.max_background_threads should match");
+ assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+ 0, "Failed to set max background threads");
+
+ unsigned id;
+ size_t sz_u = sizeof(unsigned);
+
+ for (unsigned i = 0; i < 10 * ncpus; i++) {
+ assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+ "Failed to create arena");
+ }
+
+ bool enable = true;
+ size_t sz_b = sizeof(bool);
+ assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+ "Failed to enable background threads");
+ assert_zu_eq(n_background_threads, maxt,
+ "Number of background threads should be 3.\n");
+ maxt = 10;
+ assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+ 0, "Failed to set max background threads");
+ assert_zu_eq(n_background_threads, maxt,
+ "Number of background threads should be 10.\n");
+ maxt = 3;
+ assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+ 0, "Failed to set max background threads");
+ assert_zu_eq(n_background_threads, maxt,
+ "Number of background threads should be 3.\n");
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_deferred,
+ test_max_background_threads);
+}
diff --git a/deps/jemalloc/test/unit/base.c b/deps/jemalloc/test/unit/base.c
new file mode 100644
index 000000000..6b792cf21
--- /dev/null
+++ b/deps/jemalloc/test/unit/base.c
@@ -0,0 +1,234 @@
+#include "test/jemalloc_test.h"
+
+#include "test/extent_hooks.h"
+
+static extent_hooks_t hooks_null = {
+ extent_alloc_hook,
+ NULL, /* dalloc */
+ NULL, /* destroy */
+ NULL, /* commit */
+ NULL, /* decommit */
+ NULL, /* purge_lazy */
+ NULL, /* purge_forced */
+ NULL, /* split */
+ NULL /* merge */
+};
+
+static extent_hooks_t hooks_not_null = {
+ extent_alloc_hook,
+ extent_dalloc_hook,
+ extent_destroy_hook,
+ NULL, /* commit */
+ extent_decommit_hook,
+ extent_purge_lazy_hook,
+ extent_purge_forced_hook,
+ NULL, /* split */
+ NULL /* merge */
+};
+
+TEST_BEGIN(test_base_hooks_default) {
+ base_t *base;
+ size_t allocated0, allocated1, resident, mapped, n_thp;
+
+ tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+ base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+
+ if (config_stats) {
+ base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+ &n_thp);
+ assert_zu_ge(allocated0, sizeof(base_t),
+ "Base header should count as allocated");
+ if (opt_metadata_thp == metadata_thp_always) {
+ assert_zu_gt(n_thp, 0,
+ "Base should have 1 THP at least.");
+ }
+ }
+
+ assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+ "Unexpected base_alloc() failure");
+
+ if (config_stats) {
+ base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+ &n_thp);
+ assert_zu_ge(allocated1 - allocated0, 42,
+ "At least 42 bytes were allocated by base_alloc()");
+ }
+
+ base_delete(tsdn, base);
+}
+TEST_END
+
+TEST_BEGIN(test_base_hooks_null) {
+ extent_hooks_t hooks_orig;
+ base_t *base;
+ size_t allocated0, allocated1, resident, mapped, n_thp;
+
+ extent_hooks_prep();
+ try_dalloc = false;
+ try_destroy = true;
+ try_decommit = false;
+ try_purge_lazy = false;
+ try_purge_forced = false;
+ memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+ memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
+
+ tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+ base = base_new(tsdn, 0, &hooks);
+ assert_ptr_not_null(base, "Unexpected base_new() failure");
+
+ if (config_stats) {
+ base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+ &n_thp);
+ assert_zu_ge(allocated0, sizeof(base_t),
+ "Base header should count as allocated");
+ if (opt_metadata_thp == metadata_thp_always) {
+ assert_zu_gt(n_thp, 0,
+ "Base should have 1 THP at least.");
+ }
+ }
+
+ assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+ "Unexpected base_alloc() failure");
+
+ if (config_stats) {
+ base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+ &n_thp);
+ assert_zu_ge(allocated1 - allocated0, 42,
+ "At least 42 bytes were allocated by base_alloc()");
+ }
+
+ base_delete(tsdn, base);
+
+ memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
+}
+TEST_END
+
+TEST_BEGIN(test_base_hooks_not_null) {
+ extent_hooks_t hooks_orig;
+ base_t *base;
+ void *p, *q, *r, *r_exp;
+
+ extent_hooks_prep();
+ try_dalloc = false;
+ try_destroy = true;
+ try_decommit = false;
+ try_purge_lazy = false;
+ try_purge_forced = false;
+ memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+ memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+
+ tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+ did_alloc = false;
+ base = base_new(tsdn, 0, &hooks);
+ assert_ptr_not_null(base, "Unexpected base_new() failure");
+ assert_true(did_alloc, "Expected alloc");
+
+ /*
+ * Check for tight packing at specified alignment under simple
+ * conditions.
+ */
+ {
+ const size_t alignments[] = {
+ 1,
+ QUANTUM,
+ QUANTUM << 1,
+ CACHELINE,
+ CACHELINE << 1,
+ };
+ unsigned i;
+
+ for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
+ size_t alignment = alignments[i];
+ size_t align_ceil = ALIGNMENT_CEILING(alignment,
+ QUANTUM);
+ p = base_alloc(tsdn, base, 1, alignment);
+ assert_ptr_not_null(p,
+ "Unexpected base_alloc() failure");
+ assert_ptr_eq(p,
+ (void *)(ALIGNMENT_CEILING((uintptr_t)p,
+ alignment)), "Expected quantum alignment");
+ q = base_alloc(tsdn, base, alignment, alignment);
+ assert_ptr_not_null(q,
+ "Unexpected base_alloc() failure");
+ assert_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
+ "Minimal allocation should take up %zu bytes",
+ align_ceil);
+ r = base_alloc(tsdn, base, 1, alignment);
+ assert_ptr_not_null(r,
+ "Unexpected base_alloc() failure");
+ assert_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
+ "Minimal allocation should take up %zu bytes",
+ align_ceil);
+ }
+ }
+
+ /*
+ * Allocate an object that cannot fit in the first block, then verify
+ * that the first block's remaining space is considered for subsequent
+ * allocation.
+ */
+ assert_zu_ge(extent_bsize_get(&base->blocks->extent), QUANTUM,
+ "Remainder insufficient for test");
+ /* Use up all but one quantum of block. */
+ while (extent_bsize_get(&base->blocks->extent) > QUANTUM) {
+ p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
+ assert_ptr_not_null(p, "Unexpected base_alloc() failure");
+ }
+ r_exp = extent_addr_get(&base->blocks->extent);
+ assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
+ q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
+ assert_ptr_not_null(q, "Unexpected base_alloc() failure");
+ assert_ptr_ne(q, r_exp, "Expected allocation from new block");
+ assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+ r = base_alloc(tsdn, base, QUANTUM, QUANTUM);
+ assert_ptr_not_null(r, "Unexpected base_alloc() failure");
+ assert_ptr_eq(r, r_exp, "Expected allocation from first block");
+ assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+
+ /*
+ * Check for proper alignment support when normal blocks are too small.
+ */
+ {
+ const size_t alignments[] = {
+ HUGEPAGE,
+ HUGEPAGE << 1
+ };
+ unsigned i;
+
+ for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
+ size_t alignment = alignments[i];
+ p = base_alloc(tsdn, base, QUANTUM, alignment);
+ assert_ptr_not_null(p,
+ "Unexpected base_alloc() failure");
+ assert_ptr_eq(p,
+ (void *)(ALIGNMENT_CEILING((uintptr_t)p,
+ alignment)), "Expected %zu-byte alignment",
+ alignment);
+ }
+ }
+
+ called_dalloc = called_destroy = called_decommit = called_purge_lazy =
+ called_purge_forced = false;
+ base_delete(tsdn, base);
+ assert_true(called_dalloc, "Expected dalloc call");
+ assert_true(!called_destroy, "Unexpected destroy call");
+ assert_true(called_decommit, "Expected decommit call");
+ assert_true(called_purge_lazy, "Expected purge_lazy call");
+ assert_true(called_purge_forced, "Expected purge_forced call");
+
+ try_dalloc = true;
+ try_destroy = true;
+ try_decommit = true;
+ try_purge_lazy = true;
+ try_purge_forced = true;
+ memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_base_hooks_default,
+ test_base_hooks_null,
+ test_base_hooks_not_null);
+}
diff --git a/deps/jemalloc/test/unit/bit_util.c b/deps/jemalloc/test/unit/bit_util.c
new file mode 100644
index 000000000..42a97013d
--- /dev/null
+++ b/deps/jemalloc/test/unit/bit_util.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/bit_util.h"
+
+#define TEST_POW2_CEIL(t, suf, pri) do { \
+ unsigned i, pow2; \
+ t x; \
+ \
+ assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \
+ \
+ for (i = 0; i < sizeof(t) * 8; i++) { \
+ assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) \
+ << i, "Unexpected result"); \
+ } \
+ \
+ for (i = 2; i < sizeof(t) * 8; i++) { \
+ assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1), \
+ ((t)1) << i, "Unexpected result"); \
+ } \
+ \
+ for (i = 0; i < sizeof(t) * 8 - 1; i++) { \
+ assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1), \
+ ((t)1) << (i+1), "Unexpected result"); \
+ } \
+ \
+ for (pow2 = 1; pow2 < 25; pow2++) { \
+ for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2; \
+ x++) { \
+ assert_##suf##_eq(pow2_ceil_##suf(x), \
+ ((t)1) << pow2, \
+ "Unexpected result, x=%"pri, x); \
+ } \
+ } \
+} while (0)
+
+TEST_BEGIN(test_pow2_ceil_u64) {
+ TEST_POW2_CEIL(uint64_t, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_pow2_ceil_u32) {
+ TEST_POW2_CEIL(uint32_t, u32, FMTu32);
+}
+TEST_END
+
+TEST_BEGIN(test_pow2_ceil_zu) {
+ TEST_POW2_CEIL(size_t, zu, "zu");
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_pow2_ceil_u64,
+ test_pow2_ceil_u32,
+ test_pow2_ceil_zu);
+}
diff --git a/deps/jemalloc/test/unit/bitmap.c b/deps/jemalloc/test/unit/bitmap.c
index 8086b8885..cafb2039e 100644
--- a/deps/jemalloc/test/unit/bitmap.c
+++ b/deps/jemalloc/test/unit/bitmap.c
@@ -1,165 +1,431 @@
#include "test/jemalloc_test.h"
-#if (LG_BITMAP_MAXBITS > 12)
-# define MAXBITS 4500
+#define NBITS_TAB \
+ NB( 1) \
+ NB( 2) \
+ NB( 3) \
+ NB( 4) \
+ NB( 5) \
+ NB( 6) \
+ NB( 7) \
+ NB( 8) \
+ NB( 9) \
+ NB(10) \
+ NB(11) \
+ NB(12) \
+ NB(13) \
+ NB(14) \
+ NB(15) \
+ NB(16) \
+ NB(17) \
+ NB(18) \
+ NB(19) \
+ NB(20) \
+ NB(21) \
+ NB(22) \
+ NB(23) \
+ NB(24) \
+ NB(25) \
+ NB(26) \
+ NB(27) \
+ NB(28) \
+ NB(29) \
+ NB(30) \
+ NB(31) \
+ NB(32) \
+ \
+ NB(33) \
+ NB(34) \
+ NB(35) \
+ NB(36) \
+ NB(37) \
+ NB(38) \
+ NB(39) \
+ NB(40) \
+ NB(41) \
+ NB(42) \
+ NB(43) \
+ NB(44) \
+ NB(45) \
+ NB(46) \
+ NB(47) \
+ NB(48) \
+ NB(49) \
+ NB(50) \
+ NB(51) \
+ NB(52) \
+ NB(53) \
+ NB(54) \
+ NB(55) \
+ NB(56) \
+ NB(57) \
+ NB(58) \
+ NB(59) \
+ NB(60) \
+ NB(61) \
+ NB(62) \
+ NB(63) \
+ NB(64) \
+ NB(65) \
+ \
+ NB(126) \
+ NB(127) \
+ NB(128) \
+ NB(129) \
+ NB(130) \
+ \
+ NB(254) \
+ NB(255) \
+ NB(256) \
+ NB(257) \
+ NB(258) \
+ \
+ NB(510) \
+ NB(511) \
+ NB(512) \
+ NB(513) \
+ NB(514) \
+ \
+ NB(1024) \
+ NB(2048) \
+ NB(4096) \
+ NB(8192) \
+ NB(16384) \
+
+static void
+test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
+ bitmap_info_t binfo_dyn;
+ bitmap_info_init(&binfo_dyn, nbits);
+
+ assert_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
+ "Unexpected difference between static and dynamic initialization, "
+ "nbits=%zu", nbits);
+ assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
+ "Unexpected difference between static and dynamic initialization, "
+ "nbits=%zu", nbits);
+#ifdef BITMAP_USE_TREE
+ assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
+ "Unexpected difference between static and dynamic initialization, "
+ "nbits=%zu", nbits);
+ {
+ unsigned i;
+
+ for (i = 0; i < binfo->nlevels; i++) {
+ assert_zu_eq(binfo->levels[i].group_offset,
+ binfo_dyn.levels[i].group_offset,
+ "Unexpected difference between static and dynamic "
+ "initialization, nbits=%zu, level=%u", nbits, i);
+ }
+ }
#else
-# define MAXBITS (1U << LG_BITMAP_MAXBITS)
+ assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
+ "Unexpected difference between static and dynamic initialization");
#endif
+}
-TEST_BEGIN(test_bitmap_size)
-{
- size_t i, prev_size;
+TEST_BEGIN(test_bitmap_initializer) {
+#define NB(nbits) { \
+ if (nbits <= BITMAP_MAXBITS) { \
+ bitmap_info_t binfo = \
+ BITMAP_INFO_INITIALIZER(nbits); \
+ test_bitmap_initializer_body(&binfo, nbits); \
+ } \
+ }
+ NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static size_t
+test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
+ size_t prev_size) {
+ size_t size = bitmap_size(binfo);
+ assert_zu_ge(size, (nbits >> 3),
+ "Bitmap size is smaller than expected");
+ assert_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
+ return size;
+}
+
+TEST_BEGIN(test_bitmap_size) {
+ size_t nbits, prev_size;
prev_size = 0;
- for (i = 1; i <= MAXBITS; i++) {
- size_t size = bitmap_size(i);
- assert_true(size >= prev_size,
- "Bitmap size is smaller than expected");
- prev_size = size;
+ for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
+ bitmap_info_t binfo;
+ bitmap_info_init(&binfo, nbits);
+ prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
+ }
+#define NB(nbits) { \
+ bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits); \
+ prev_size = test_bitmap_size_body(&binfo, nbits, \
+ prev_size); \
}
+ prev_size = 0;
+ NBITS_TAB
+#undef NB
}
TEST_END
-TEST_BEGIN(test_bitmap_init)
-{
+static void
+test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
size_t i;
+ bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+ assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
- for (i = 1; i <= MAXBITS; i++) {
+ bitmap_init(bitmap, binfo, false);
+ for (i = 0; i < nbits; i++) {
+ assert_false(bitmap_get(bitmap, binfo, i),
+ "Bit should be unset");
+ }
+
+ bitmap_init(bitmap, binfo, true);
+ for (i = 0; i < nbits; i++) {
+ assert_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
+ }
+
+ free(bitmap);
+}
+
+TEST_BEGIN(test_bitmap_init) {
+ size_t nbits;
+
+ for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
bitmap_info_t binfo;
- bitmap_info_init(&binfo, i);
- {
- size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
- bitmap_info_ngroups(&binfo));
- bitmap_init(bitmap, &binfo);
-
- for (j = 0; j < i; j++) {
- assert_false(bitmap_get(bitmap, &binfo, j),
- "Bit should be unset");
- }
- free(bitmap);
- }
+ bitmap_info_init(&binfo, nbits);
+ test_bitmap_init_body(&binfo, nbits);
+ }
+#define NB(nbits) { \
+ bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits); \
+ test_bitmap_init_body(&binfo, nbits); \
}
+ NBITS_TAB
+#undef NB
}
TEST_END
-TEST_BEGIN(test_bitmap_set)
-{
+static void
+test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
size_t i;
+ bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+ assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+ bitmap_init(bitmap, binfo, false);
- for (i = 1; i <= MAXBITS; i++) {
+ for (i = 0; i < nbits; i++) {
+ bitmap_set(bitmap, binfo, i);
+ }
+ assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+ free(bitmap);
+}
+
+TEST_BEGIN(test_bitmap_set) {
+ size_t nbits;
+
+ for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
bitmap_info_t binfo;
- bitmap_info_init(&binfo, i);
- {
- size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
- bitmap_info_ngroups(&binfo));
- bitmap_init(bitmap, &binfo);
-
- for (j = 0; j < i; j++)
- bitmap_set(bitmap, &binfo, j);
- assert_true(bitmap_full(bitmap, &binfo),
- "All bits should be set");
- free(bitmap);
- }
+ bitmap_info_init(&binfo, nbits);
+ test_bitmap_set_body(&binfo, nbits);
}
+#define NB(nbits) { \
+ bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits); \
+ test_bitmap_set_body(&binfo, nbits); \
+ }
+ NBITS_TAB
+#undef NB
}
TEST_END
-TEST_BEGIN(test_bitmap_unset)
-{
+static void
+test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
size_t i;
+ bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+ assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+ bitmap_init(bitmap, binfo, false);
+
+ for (i = 0; i < nbits; i++) {
+ bitmap_set(bitmap, binfo, i);
+ }
+ assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+ for (i = 0; i < nbits; i++) {
+ bitmap_unset(bitmap, binfo, i);
+ }
+ for (i = 0; i < nbits; i++) {
+ bitmap_set(bitmap, binfo, i);
+ }
+ assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+ free(bitmap);
+}
- for (i = 1; i <= MAXBITS; i++) {
+TEST_BEGIN(test_bitmap_unset) {
+ size_t nbits;
+
+ for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
bitmap_info_t binfo;
- bitmap_info_init(&binfo, i);
- {
- size_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
- bitmap_info_ngroups(&binfo));
- bitmap_init(bitmap, &binfo);
-
- for (j = 0; j < i; j++)
- bitmap_set(bitmap, &binfo, j);
- assert_true(bitmap_full(bitmap, &binfo),
- "All bits should be set");
- for (j = 0; j < i; j++)
- bitmap_unset(bitmap, &binfo, j);
- for (j = 0; j < i; j++)
- bitmap_set(bitmap, &binfo, j);
- assert_true(bitmap_full(bitmap, &binfo),
- "All bits should be set");
- free(bitmap);
- }
+ bitmap_info_init(&binfo, nbits);
+ test_bitmap_unset_body(&binfo, nbits);
}
+#define NB(nbits) { \
+ bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits); \
+ test_bitmap_unset_body(&binfo, nbits); \
+ }
+ NBITS_TAB
+#undef NB
}
TEST_END
-TEST_BEGIN(test_bitmap_sfu)
-{
- size_t i;
+static void
+test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
+ bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+ assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+ bitmap_init(bitmap, binfo, false);
- for (i = 1; i <= MAXBITS; i++) {
- bitmap_info_t binfo;
- bitmap_info_init(&binfo, i);
- {
- ssize_t j;
- bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
- bitmap_info_ngroups(&binfo));
- bitmap_init(bitmap, &binfo);
-
- /* Iteratively set bits starting at the beginning. */
- for (j = 0; j < i; j++) {
- assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
- "First unset bit should be just after "
- "previous first unset bit");
+ /* Iteratively set bits starting at the beginning. */
+ for (size_t i = 0; i < nbits; i++) {
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+ "First unset bit should be just after previous first unset "
+ "bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+ "First unset bit should be just after previous first unset "
+ "bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+ "First unset bit should be just after previous first unset "
+ "bit");
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+ "First unset bit should be just after previous first unset "
+ "bit");
+ }
+ assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+
+ /*
+ * Iteratively unset bits starting at the end, and verify that
+ * bitmap_sfu() reaches the unset bits.
+ */
+ for (size_t i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
+ bitmap_unset(bitmap, binfo, i);
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+ "First unset bit should the bit previously unset");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+ "First unset bit should the bit previously unset");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+ "First unset bit should the bit previously unset");
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+ "First unset bit should the bit previously unset");
+ bitmap_unset(bitmap, binfo, i);
+ }
+ assert_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
+
+ /*
+ * Iteratively set bits starting at the beginning, and verify that
+ * bitmap_sfu() looks past them.
+ */
+ for (size_t i = 1; i < nbits; i++) {
+ bitmap_set(bitmap, binfo, i - 1);
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+ "First unset bit should be just after the bit previously "
+ "set");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+ "First unset bit should be just after the bit previously "
+ "set");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+ "First unset bit should be just after the bit previously "
+ "set");
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+ "First unset bit should be just after the bit previously "
+ "set");
+ bitmap_unset(bitmap, binfo, i);
+ }
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
+ "First unset bit should be the last bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+ nbits - 1, "First unset bit should be the last bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
+ "First unset bit should be the last bit");
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
+ "First unset bit should be the last bit");
+ assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+
+ /*
+ * Bubble a "usu" pattern through the bitmap and verify that
+ * bitmap_ffu() finds the correct bit for all five min_bit cases.
+ */
+ if (nbits >= 3) {
+ for (size_t i = 0; i < nbits-2; i++) {
+ bitmap_unset(bitmap, binfo, i);
+ bitmap_unset(bitmap, binfo, i+2);
+ if (i > 0) {
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+ "Unexpected first unset bit");
}
- assert_true(bitmap_full(bitmap, &binfo),
- "All bits should be set");
-
- /*
- * Iteratively unset bits starting at the end, and
- * verify that bitmap_sfu() reaches the unset bits.
- */
- for (j = i - 1; j >= 0; j--) {
- bitmap_unset(bitmap, &binfo, j);
- assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
- "First unset bit should the bit previously "
- "unset");
- bitmap_unset(bitmap, &binfo, j);
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+ "Unexpected first unset bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+ "Unexpected first unset bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+ "Unexpected first unset bit");
+ if (i + 3 < nbits) {
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+ nbits, "Unexpected first unset bit");
}
- assert_false(bitmap_get(bitmap, &binfo, 0),
- "Bit should be unset");
-
- /*
- * Iteratively set bits starting at the beginning, and
- * verify that bitmap_sfu() looks past them.
- */
- for (j = 1; j < i; j++) {
- bitmap_set(bitmap, &binfo, j - 1);
- assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
- "First unset bit should be just after the "
- "bit previously set");
- bitmap_unset(bitmap, &binfo, j);
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+ "Unexpected first unset bit");
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+ "Unexpected first unset bit");
+ }
+ }
+
+ /*
+ * Unset the last bit, bubble another unset bit through the bitmap, and
+ * verify that bitmap_ffu() finds the correct bit for all four min_bit
+ * cases.
+ */
+ if (nbits >= 3) {
+ bitmap_unset(bitmap, binfo, nbits-1);
+ for (size_t i = 0; i < nbits-1; i++) {
+ bitmap_unset(bitmap, binfo, i);
+ if (i > 0) {
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+ "Unexpected first unset bit");
}
- assert_zd_eq(bitmap_sfu(bitmap, &binfo), i - 1,
- "First unset bit should be the last bit");
- assert_true(bitmap_full(bitmap, &binfo),
- "All bits should be set");
- free(bitmap);
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+ "Unexpected first unset bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
+ "Unexpected first unset bit");
+ assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
+ nbits-1, "Unexpected first unset bit");
+
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+ "Unexpected first unset bit");
}
+ assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+ "Unexpected first unset bit");
+ }
+
+ free(bitmap);
+}
+
+TEST_BEGIN(test_bitmap_xfu) {
+ size_t nbits;
+
+ for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
+ bitmap_info_t binfo;
+ bitmap_info_init(&binfo, nbits);
+ test_bitmap_xfu_body(&binfo, nbits);
+ }
+#define NB(nbits) { \
+ bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits); \
+ test_bitmap_xfu_body(&binfo, nbits); \
}
+ NBITS_TAB
+#undef NB
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
+ test_bitmap_initializer,
test_bitmap_size,
test_bitmap_init,
test_bitmap_set,
test_bitmap_unset,
- test_bitmap_sfu));
+ test_bitmap_xfu);
}
diff --git a/deps/jemalloc/test/unit/ckh.c b/deps/jemalloc/test/unit/ckh.c
index b214c279a..707ea5f8c 100644
--- a/deps/jemalloc/test/unit/ckh.c
+++ b/deps/jemalloc/test/unit/ckh.c
@@ -1,21 +1,23 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_new_delete)
-{
+TEST_BEGIN(test_new_delete) {
+ tsd_t *tsd;
ckh_t ckh;
- assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp),
- "Unexpected ckh_new() error");
- ckh_delete(&ckh);
+ tsd = tsd_fetch();
- assert_false(ckh_new(&ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp),
- "Unexpected ckh_new() error");
- ckh_delete(&ckh);
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+ ckh_string_keycomp), "Unexpected ckh_new() error");
+ ckh_delete(tsd, &ckh);
+
+ assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+ ckh_pointer_keycomp), "Unexpected ckh_new() error");
+ ckh_delete(tsd, &ckh);
}
TEST_END
-TEST_BEGIN(test_count_insert_search_remove)
-{
+TEST_BEGIN(test_count_insert_search_remove) {
+ tsd_t *tsd;
ckh_t ckh;
const char *strs[] = {
"a string",
@@ -26,15 +28,17 @@ TEST_BEGIN(test_count_insert_search_remove)
const char *missing = "A string not in the hash table.";
size_t i;
- assert_false(ckh_new(&ckh, 2, ckh_string_hash, ckh_string_keycomp),
- "Unexpected ckh_new() error");
+ tsd = tsd_fetch();
+
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+ ckh_string_keycomp), "Unexpected ckh_new() error");
assert_zu_eq(ckh_count(&ckh), 0,
"ckh_count() should return %zu, but it returned %zu", ZU(0),
ckh_count(&ckh));
/* Insert. */
for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
- ckh_insert(&ckh, strs[i], strs[i]);
+ ckh_insert(tsd, &ckh, strs[i], strs[i]);
assert_zu_eq(ckh_count(&ckh), i+1,
"ckh_count() should return %zu, but it returned %zu", i+1,
ckh_count(&ckh));
@@ -58,10 +62,10 @@ TEST_BEGIN(test_count_insert_search_remove)
ks = (i & 1) ? strs[i] : (const char *)NULL;
vs = (i & 2) ? strs[i] : (const char *)NULL;
- assert_ptr_eq((void *)ks, (void *)k.s,
- "Key mismatch, i=%zu", i);
- assert_ptr_eq((void *)vs, (void *)v.s,
- "Value mismatch, i=%zu", i);
+ assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+ i);
+ assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+ i);
}
assert_true(ckh_search(&ckh, missing, NULL, NULL),
"Unexpected ckh_search() success");
@@ -79,36 +83,38 @@ TEST_BEGIN(test_count_insert_search_remove)
vp = (i & 2) ? &v.p : NULL;
k.p = NULL;
v.p = NULL;
- assert_false(ckh_remove(&ckh, strs[i], kp, vp),
+ assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
"Unexpected ckh_remove() error");
ks = (i & 1) ? strs[i] : (const char *)NULL;
vs = (i & 2) ? strs[i] : (const char *)NULL;
- assert_ptr_eq((void *)ks, (void *)k.s,
- "Key mismatch, i=%zu", i);
- assert_ptr_eq((void *)vs, (void *)v.s,
- "Value mismatch, i=%zu", i);
+ assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+ i);
+ assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+ i);
assert_zu_eq(ckh_count(&ckh),
sizeof(strs)/sizeof(const char *) - i - 1,
"ckh_count() should return %zu, but it returned %zu",
- sizeof(strs)/sizeof(const char *) - i - 1,
+ sizeof(strs)/sizeof(const char *) - i - 1,
ckh_count(&ckh));
}
- ckh_delete(&ckh);
+ ckh_delete(tsd, &ckh);
}
TEST_END
-TEST_BEGIN(test_insert_iter_remove)
-{
-#define NITEMS ZU(1000)
+TEST_BEGIN(test_insert_iter_remove) {
+#define NITEMS ZU(1000)
+ tsd_t *tsd;
ckh_t ckh;
void **p[NITEMS];
void *q, *r;
size_t i;
- assert_false(ckh_new(&ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp),
- "Unexpected ckh_new() error");
+ tsd = tsd_fetch();
+
+ assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+ ckh_pointer_keycomp), "Unexpected ckh_new() error");
for (i = 0; i < NITEMS; i++) {
p[i] = mallocx(i+1, 0);
@@ -119,7 +125,7 @@ TEST_BEGIN(test_insert_iter_remove)
size_t j;
for (j = i; j < NITEMS; j++) {
- assert_false(ckh_insert(&ckh, p[j], p[j]),
+ assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
"Unexpected ckh_insert() failure");
assert_false(ckh_search(&ckh, p[j], &q, &r),
"Unexpected ckh_search() failure");
@@ -134,13 +140,13 @@ TEST_BEGIN(test_insert_iter_remove)
for (j = i + 1; j < NITEMS; j++) {
assert_false(ckh_search(&ckh, p[j], NULL, NULL),
"Unexpected ckh_search() failure");
- assert_false(ckh_remove(&ckh, p[j], &q, &r),
+ assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
"Unexpected ckh_remove() failure");
assert_ptr_eq(p[j], q, "Key pointer mismatch");
assert_ptr_eq(p[j], r, "Value pointer mismatch");
assert_true(ckh_search(&ckh, p[j], NULL, NULL),
"Unexpected ckh_search() success");
- assert_true(ckh_remove(&ckh, p[j], &q, &r),
+ assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
"Unexpected ckh_remove() success");
}
@@ -150,8 +156,7 @@ TEST_BEGIN(test_insert_iter_remove)
memset(seen, 0, sizeof(seen));
- for (tabind = 0; ckh_iter(&ckh, &tabind, &q, &r) ==
- false;) {
+ for (tabind = 0; !ckh_iter(&ckh, &tabind, &q, &r);) {
size_t k;
assert_ptr_eq(q, r, "Key and val not equal");
@@ -166,41 +171,41 @@ TEST_BEGIN(test_insert_iter_remove)
}
}
- for (j = 0; j < i + 1; j++)
+ for (j = 0; j < i + 1; j++) {
assert_true(seen[j], "Item %zu not seen", j);
- for (; j < NITEMS; j++)
+ }
+ for (; j < NITEMS; j++) {
assert_false(seen[j], "Item %zu seen", j);
+ }
}
}
for (i = 0; i < NITEMS; i++) {
assert_false(ckh_search(&ckh, p[i], NULL, NULL),
"Unexpected ckh_search() failure");
- assert_false(ckh_remove(&ckh, p[i], &q, &r),
+ assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
"Unexpected ckh_remove() failure");
assert_ptr_eq(p[i], q, "Key pointer mismatch");
assert_ptr_eq(p[i], r, "Value pointer mismatch");
assert_true(ckh_search(&ckh, p[i], NULL, NULL),
"Unexpected ckh_search() success");
- assert_true(ckh_remove(&ckh, p[i], &q, &r),
+ assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
"Unexpected ckh_remove() success");
dallocx(p[i], 0);
}
assert_zu_eq(ckh_count(&ckh), 0,
- "ckh_count() should return %zu, but it returned %zu", ZU(0),
- ckh_count(&ckh));
- ckh_delete(&ckh);
+ "ckh_count() should return %zu, but it returned %zu",
+ ZU(0), ckh_count(&ckh));
+ ckh_delete(tsd, &ckh);
#undef NITEMS
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_new_delete,
test_count_insert_search_remove,
- test_insert_iter_remove));
+ test_insert_iter_remove);
}
diff --git a/deps/jemalloc/test/unit/decay.c b/deps/jemalloc/test/unit/decay.c
new file mode 100644
index 000000000..f727bf931
--- /dev/null
+++ b/deps/jemalloc/test/unit/decay.c
@@ -0,0 +1,599 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ticker.h"
+
+static nstime_monotonic_t *nstime_monotonic_orig;
+static nstime_update_t *nstime_update_orig;
+
+static unsigned nupdates_mock;
+static nstime_t time_mock;
+static bool monotonic_mock;
+
+static bool
+check_background_thread_enabled(void) {
+ bool enabled;
+ size_t sz = sizeof(bool);
+ int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+ if (ret == ENOENT) {
+ return false;
+ }
+ assert_d_eq(ret, 0, "Unexpected mallctl error");
+ return enabled;
+}
+
+static bool
+nstime_monotonic_mock(void) {
+ return monotonic_mock;
+}
+
+static bool
+nstime_update_mock(nstime_t *time) {
+ nupdates_mock++;
+ if (monotonic_mock) {
+ nstime_copy(time, &time_mock);
+ }
+ return !monotonic_mock;
+}
+
+static unsigned
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+ unsigned arena_ind;
+ size_t sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+
+ assert_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+ 0, "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+ (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+ "Unexpected mallctlbymib() failure");
+
+ assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+ 0, "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+ (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+ "Unexpected mallctlbymib() failure");
+
+ return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+void
+do_epoch(void) {
+ uint64_t epoch = 1;
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
+}
+
+void
+do_purge(unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+void
+do_decay(unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+static uint64_t
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
+ size_t mib[4];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[2] = (size_t)arena_ind;
+ uint64_t npurge = 0;
+ size_t sz = sizeof(npurge);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+ config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+ return npurge;
+}
+
+static uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+ do_epoch();
+ return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+ do_epoch();
+ return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+ do_epoch();
+ return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+ get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+ do_epoch();
+ size_t mib[4];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[2] = (size_t)arena_ind;
+ size_t pdirty;
+ size_t sz = sizeof(pdirty);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+ return pdirty;
+}
+
+static size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+ do_epoch();
+ size_t mib[4];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[2] = (size_t)arena_ind;
+ size_t pmuzzy;
+ size_t sz = sizeof(pmuzzy);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+ return pmuzzy;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+ void *p = mallocx(size, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+ int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+ void *p = do_mallocx(size, flags);
+ dallocx(p, flags);
+}
+
+TEST_BEGIN(test_decay_ticks) {
+ test_skip_if(check_background_thread_enabled());
+
+ ticker_t *decay_ticker;
+ unsigned tick0, tick1, arena_ind;
+ size_t sz, large0;
+ void *p;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure");
+
+ /* Set up a manually managed arena for test. */
+ arena_ind = do_arena_create(0, 0);
+
+ /* Migrate to the new arena, and get the ticker. */
+ unsigned old_arena_ind;
+ size_t sz_arena_ind = sizeof(old_arena_ind);
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
+ &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
+ "Unexpected mallctl() failure");
+ decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
+ assert_ptr_not_null(decay_ticker,
+ "Unexpected failure getting decay ticker");
+
+ /*
+ * Test the standard APIs using a large size class, since we can't
+ * control tcache interactions for small size classes (except by
+ * completely disabling tcache for the entire test program).
+ */
+
+ /* malloc(). */
+ tick0 = ticker_read(decay_ticker);
+ p = malloc(large0);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
+ /* free(). */
+ tick0 = ticker_read(decay_ticker);
+ free(p);
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
+
+ /* calloc(). */
+ tick0 = ticker_read(decay_ticker);
+ p = calloc(1, large0);
+ assert_ptr_not_null(p, "Unexpected calloc() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
+ free(p);
+
+ /* posix_memalign(). */
+ tick0 = ticker_read(decay_ticker);
+ assert_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
+ "Unexpected posix_memalign() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during posix_memalign()");
+ free(p);
+
+ /* aligned_alloc(). */
+ tick0 = ticker_read(decay_ticker);
+ p = aligned_alloc(sizeof(size_t), large0);
+ assert_ptr_not_null(p, "Unexpected aligned_alloc() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during aligned_alloc()");
+ free(p);
+
+ /* realloc(). */
+ /* Allocate. */
+ tick0 = ticker_read(decay_ticker);
+ p = realloc(NULL, large0);
+ assert_ptr_not_null(p, "Unexpected realloc() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+ /* Reallocate. */
+ tick0 = ticker_read(decay_ticker);
+ p = realloc(p, large0);
+ assert_ptr_not_null(p, "Unexpected realloc() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+ /* Deallocate. */
+ tick0 = ticker_read(decay_ticker);
+ realloc(p, 0);
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+
+ /*
+ * Test the *allocx() APIs using large and small size classes, with
+ * tcache explicitly disabled.
+ */
+ {
+ unsigned i;
+ size_t allocx_sizes[2];
+ allocx_sizes[0] = large0;
+ allocx_sizes[1] = 1;
+
+ for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
+ sz = allocx_sizes[i];
+
+ /* mallocx(). */
+ tick0 = ticker_read(decay_ticker);
+ p = mallocx(sz, MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during mallocx() (sz=%zu)",
+ sz);
+ /* rallocx(). */
+ tick0 = ticker_read(decay_ticker);
+ p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(p, "Unexpected rallocx() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during rallocx() (sz=%zu)",
+ sz);
+ /* xallocx(). */
+ tick0 = ticker_read(decay_ticker);
+ xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during xallocx() (sz=%zu)",
+ sz);
+ /* dallocx(). */
+ tick0 = ticker_read(decay_ticker);
+ dallocx(p, MALLOCX_TCACHE_NONE);
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during dallocx() (sz=%zu)",
+ sz);
+ /* sdallocx(). */
+ p = mallocx(sz, MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ tick0 = ticker_read(decay_ticker);
+ sdallocx(p, sz, MALLOCX_TCACHE_NONE);
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during sdallocx() "
+ "(sz=%zu)", sz);
+ }
+ }
+
+ /*
+ * Test tcache fill/flush interactions for large and small size classes,
+ * using an explicit tcache.
+ */
+ unsigned tcache_ind, i;
+ size_t tcache_sizes[2];
+ tcache_sizes[0] = large0;
+ tcache_sizes[1] = 1;
+
+ size_t tcache_max, sz_tcache_max;
+ sz_tcache_max = sizeof(tcache_max);
+ assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+ &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+ NULL, 0), 0, "Unexpected mallctl failure");
+
+ for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
+ sz = tcache_sizes[i];
+
+ /* tcache fill. */
+ tick0 = ticker_read(decay_ticker);
+ p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ tick1 = ticker_read(decay_ticker);
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during tcache fill "
+ "(sz=%zu)", sz);
+ /* tcache flush. */
+ dallocx(p, MALLOCX_TCACHE(tcache_ind));
+ tick0 = ticker_read(decay_ticker);
+ assert_d_eq(mallctl("tcache.flush", NULL, NULL,
+ (void *)&tcache_ind, sizeof(unsigned)), 0,
+ "Unexpected mallctl failure");
+ tick1 = ticker_read(decay_ticker);
+
+ /* Will only tick if it's in tcache. */
+ if (sz <= tcache_max) {
+ assert_u32_ne(tick1, tick0,
+ "Expected ticker to tick during tcache "
+ "flush (sz=%zu)", sz);
+ } else {
+ assert_u32_eq(tick1, tick0,
+ "Unexpected ticker tick during tcache "
+ "flush (sz=%zu)", sz);
+ }
+ }
+}
+TEST_END
+
+static void
+decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
+ uint64_t dirty_npurge0, uint64_t muzzy_npurge0, bool terminate_asap) {
+#define NINTERVALS 101
+ nstime_t time, update_interval, decay_ms, deadline;
+
+ nstime_init(&time, 0);
+ nstime_update(&time);
+
+ nstime_init2(&decay_ms, dt, 0);
+ nstime_copy(&deadline, &time);
+ nstime_add(&deadline, &decay_ms);
+
+ nstime_init2(&update_interval, dt, 0);
+ nstime_idivide(&update_interval, NINTERVALS);
+
+ /*
+ * Keep q's slab from being deallocated during the looping below. If a
+ * cached slab were to repeatedly come and go during looping, it could
+ * prevent the decay backlog ever becoming empty.
+ */
+ void *p = do_mallocx(1, flags);
+ uint64_t dirty_npurge1, muzzy_npurge1;
+ do {
+ for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
+ i++) {
+ void *q = do_mallocx(1, flags);
+ dallocx(q, flags);
+ }
+ dirty_npurge1 = get_arena_dirty_npurge(arena_ind);
+ muzzy_npurge1 = get_arena_muzzy_npurge(arena_ind);
+
+ nstime_add(&time_mock, &update_interval);
+ nstime_update(&time);
+ } while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
+ dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
+ !terminate_asap));
+ dallocx(p, flags);
+
+ if (config_stats) {
+ assert_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
+ muzzy_npurge0, "Expected purging to occur");
+ }
+#undef NINTERVALS
+}
+
+TEST_BEGIN(test_decay_ticker) {
+ test_skip_if(check_background_thread_enabled());
+#define NPS 2048
+ ssize_t ddt = opt_dirty_decay_ms;
+ ssize_t mdt = opt_muzzy_decay_ms;
+ unsigned arena_ind = do_arena_create(ddt, mdt);
+ int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+ void *ps[NPS];
+ size_t large;
+
+ /*
+ * Allocate a bunch of large objects, pause the clock, deallocate every
+ * other object (to fragment virtual memory), restore the clock, then
+ * [md]allocx() in a tight loop while advancing time rapidly to verify
+ * the ticker triggers purging.
+ */
+
+ size_t tcache_max;
+ size_t sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure");
+ large = nallocx(tcache_max + 1, flags);
+
+ do_purge(arena_ind);
+ uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
+ uint64_t muzzy_npurge0 = get_arena_muzzy_npurge(arena_ind);
+
+ for (unsigned i = 0; i < NPS; i++) {
+ ps[i] = do_mallocx(large, flags);
+ }
+
+ nupdates_mock = 0;
+ nstime_init(&time_mock, 0);
+ nstime_update(&time_mock);
+ monotonic_mock = true;
+
+ nstime_monotonic_orig = nstime_monotonic;
+ nstime_update_orig = nstime_update;
+ nstime_monotonic = nstime_monotonic_mock;
+ nstime_update = nstime_update_mock;
+
+ for (unsigned i = 0; i < NPS; i += 2) {
+ dallocx(ps[i], flags);
+ unsigned nupdates0 = nupdates_mock;
+ do_decay(arena_ind);
+ assert_u_gt(nupdates_mock, nupdates0,
+ "Expected nstime_update() to be called");
+ }
+
+ decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
+ muzzy_npurge0, true);
+ decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+ muzzy_npurge0, false);
+
+ do_arena_destroy(arena_ind);
+
+ nstime_monotonic = nstime_monotonic_orig;
+ nstime_update = nstime_update_orig;
+#undef NPS
+}
+TEST_END
+
+TEST_BEGIN(test_decay_nonmonotonic) {
+ test_skip_if(check_background_thread_enabled());
+#define NPS (SMOOTHSTEP_NSTEPS + 1)
+ int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+ void *ps[NPS];
+ uint64_t npurge0 = 0;
+ uint64_t npurge1 = 0;
+ size_t sz, large0;
+ unsigned i, nupdates0;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure");
+
+ assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl failure");
+ do_epoch();
+ sz = sizeof(uint64_t);
+ npurge0 = get_arena_npurge(0);
+
+ nupdates_mock = 0;
+ nstime_init(&time_mock, 0);
+ nstime_update(&time_mock);
+ monotonic_mock = false;
+
+ nstime_monotonic_orig = nstime_monotonic;
+ nstime_update_orig = nstime_update;
+ nstime_monotonic = nstime_monotonic_mock;
+ nstime_update = nstime_update_mock;
+
+ for (i = 0; i < NPS; i++) {
+ ps[i] = mallocx(large0, flags);
+ assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+ }
+
+ for (i = 0; i < NPS; i++) {
+ dallocx(ps[i], flags);
+ nupdates0 = nupdates_mock;
+ assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+ "Unexpected arena.0.decay failure");
+ assert_u_gt(nupdates_mock, nupdates0,
+ "Expected nstime_update() to be called");
+ }
+
+ do_epoch();
+ sz = sizeof(uint64_t);
+ npurge1 = get_arena_npurge(0);
+
+ if (config_stats) {
+ assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
+ }
+
+ nstime_monotonic = nstime_monotonic_orig;
+ nstime_update = nstime_update_orig;
+#undef NPS
+}
+TEST_END
+
+TEST_BEGIN(test_decay_now) {
+ test_skip_if(check_background_thread_enabled());
+
+ unsigned arena_ind = do_arena_create(0, 0);
+ assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+ assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+ size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+ /* Verify that dirty/muzzy pages never linger after deallocation. */
+ for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+ size_t size = sizes[i];
+ generate_dirty(arena_ind, size);
+ assert_zu_eq(get_arena_pdirty(arena_ind), 0,
+ "Unexpected dirty pages");
+ assert_zu_eq(get_arena_pmuzzy(arena_ind), 0,
+ "Unexpected muzzy pages");
+ }
+ do_arena_destroy(arena_ind);
+}
+TEST_END
+
+TEST_BEGIN(test_decay_never) {
+ test_skip_if(check_background_thread_enabled());
+
+ unsigned arena_ind = do_arena_create(-1, -1);
+ int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+ assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+ assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+ size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+ void *ptrs[sizeof(sizes)/sizeof(size_t)];
+ for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+ ptrs[i] = do_mallocx(sizes[i], flags);
+ }
+ /* Verify that each deallocation generates additional dirty pages. */
+ size_t pdirty_prev = get_arena_pdirty(arena_ind);
+ size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
+ assert_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+ assert_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
+ for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+ dallocx(ptrs[i], flags);
+ size_t pdirty = get_arena_pdirty(arena_ind);
+ size_t pmuzzy = get_arena_pmuzzy(arena_ind);
+ assert_zu_gt(pdirty, pdirty_prev,
+ "Expected dirty pages to increase.");
+ assert_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
+ pdirty_prev = pdirty;
+ }
+ do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_decay_ticks,
+ test_decay_ticker,
+ test_decay_nonmonotonic,
+ test_decay_now,
+ test_decay_never);
+}
diff --git a/deps/jemalloc/test/unit/decay.sh b/deps/jemalloc/test/unit/decay.sh
new file mode 100644
index 000000000..45aeccf42
--- /dev/null
+++ b/deps/jemalloc/test/unit/decay.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
diff --git a/deps/jemalloc/test/unit/div.c b/deps/jemalloc/test/unit/div.c
new file mode 100644
index 000000000..b47f10b2b
--- /dev/null
+++ b/deps/jemalloc/test/unit/div.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/div.h"
+
+TEST_BEGIN(test_div_exhaustive) {
+ for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
+ div_info_t div_info;
+ div_init(&div_info, divisor);
+ size_t max = 1000 * divisor;
+ if (max < 1000 * 1000) {
+ max = 1000 * 1000;
+ }
+ for (size_t dividend = 0; dividend < 1000 * divisor;
+ dividend += divisor) {
+ size_t quotient = div_compute(
+ &div_info, dividend);
+ assert_zu_eq(dividend, quotient * divisor,
+ "With divisor = %zu, dividend = %zu, "
+ "got quotient %zu", divisor, dividend, quotient);
+ }
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_div_exhaustive);
+}
diff --git a/deps/jemalloc/test/unit/emitter.c b/deps/jemalloc/test/unit/emitter.c
new file mode 100644
index 000000000..535c7cf1d
--- /dev/null
+++ b/deps/jemalloc/test/unit/emitter.c
@@ -0,0 +1,413 @@
+#include "test/jemalloc_test.h"
+#include "jemalloc/internal/emitter.h"
+
+/*
+ * This is so useful for debugging and feature work, we'll leave printing
+ * functionality committed but disabled by default.
+ */
+/* Print the text as it will appear. */
+static bool print_raw = false;
+/* Print the text escaped, so it can be copied back into the test case. */
+static bool print_escaped = false;
+
+typedef struct buf_descriptor_s buf_descriptor_t;
+struct buf_descriptor_s {
+ char *buf;
+ size_t len;
+ bool mid_quote;
+};
+
+/*
+ * Forwards all writes to the passed-in buf_v (which should be cast from a
+ * buf_descriptor_t *).
+ */
+static void
+forwarding_cb(void *buf_descriptor_v, const char *str) {
+ buf_descriptor_t *buf_descriptor = (buf_descriptor_t *)buf_descriptor_v;
+
+ if (print_raw) {
+ malloc_printf("%s", str);
+ }
+ if (print_escaped) {
+ const char *it = str;
+ while (*it != '\0') {
+ if (!buf_descriptor->mid_quote) {
+ malloc_printf("\"");
+ buf_descriptor->mid_quote = true;
+ }
+ switch (*it) {
+ case '\\':
+ malloc_printf("\\");
+ break;
+ case '\"':
+ malloc_printf("\\\"");
+ break;
+ case '\t':
+ malloc_printf("\\t");
+ break;
+ case '\n':
+ malloc_printf("\\n\"\n");
+ buf_descriptor->mid_quote = false;
+ break;
+ default:
+ malloc_printf("%c", *it);
+ }
+ it++;
+ }
+ }
+
+ size_t written = malloc_snprintf(buf_descriptor->buf,
+ buf_descriptor->len, "%s", str);
+ assert_zu_eq(written, strlen(str), "Buffer overflow!");
+ buf_descriptor->buf += written;
+ buf_descriptor->len -= written;
+ assert_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
+}
+
+static void
+assert_emit_output(void (*emit_fn)(emitter_t *),
+ const char *expected_json_output, const char *expected_table_output) {
+ emitter_t emitter;
+ char buf[MALLOC_PRINTF_BUFSIZE];
+ buf_descriptor_t buf_descriptor;
+
+ buf_descriptor.buf = buf;
+ buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+ buf_descriptor.mid_quote = false;
+
+ emitter_init(&emitter, emitter_output_json, &forwarding_cb,
+ &buf_descriptor);
+ (*emit_fn)(&emitter);
+ assert_str_eq(expected_json_output, buf, "json output failure");
+
+ buf_descriptor.buf = buf;
+ buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+ buf_descriptor.mid_quote = false;
+
+ emitter_init(&emitter, emitter_output_table, &forwarding_cb,
+ &buf_descriptor);
+ (*emit_fn)(&emitter);
+ assert_str_eq(expected_table_output, buf, "table output failure");
+}
+
+static void
+emit_dict(emitter_t *emitter) {
+ bool b_false = false;
+ bool b_true = true;
+ int i_123 = 123;
+ const char *str = "a string";
+
+ emitter_begin(emitter);
+ emitter_dict_begin(emitter, "foo", "This is the foo table:");
+ emitter_kv(emitter, "abc", "ABC", emitter_type_bool, &b_false);
+ emitter_kv(emitter, "def", "DEF", emitter_type_bool, &b_true);
+ emitter_kv_note(emitter, "ghi", "GHI", emitter_type_int, &i_123,
+ "note_key1", emitter_type_string, &str);
+ emitter_kv_note(emitter, "jkl", "JKL", emitter_type_string, &str,
+ "note_key2", emitter_type_bool, &b_false);
+ emitter_dict_end(emitter);
+ emitter_end(emitter);
+}
+static const char *dict_json =
+"{\n"
+"\t\"foo\": {\n"
+"\t\t\"abc\": false,\n"
+"\t\t\"def\": true,\n"
+"\t\t\"ghi\": 123,\n"
+"\t\t\"jkl\": \"a string\"\n"
+"\t}\n"
+"}\n";
+static const char *dict_table =
+"This is the foo table:\n"
+" ABC: false\n"
+" DEF: true\n"
+" GHI: 123 (note_key1: \"a string\")\n"
+" JKL: \"a string\" (note_key2: false)\n";
+
+TEST_BEGIN(test_dict) {
+ assert_emit_output(&emit_dict, dict_json, dict_table);
+}
+TEST_END
+
+static void
+emit_table_printf(emitter_t *emitter) {
+ emitter_begin(emitter);
+ emitter_table_printf(emitter, "Table note 1\n");
+ emitter_table_printf(emitter, "Table note 2 %s\n",
+ "with format string");
+ emitter_end(emitter);
+}
+
+static const char *table_printf_json =
+"{\n"
+"}\n";
+
+static const char *table_printf_table =
+"Table note 1\n"
+"Table note 2 with format string\n";
+
+TEST_BEGIN(test_table_printf) {
+ assert_emit_output(&emit_table_printf, table_printf_json,
+ table_printf_table);
+}
+TEST_END
+
+static void emit_nested_dict(emitter_t *emitter) {
+ int val = 123;
+ emitter_begin(emitter);
+ emitter_dict_begin(emitter, "json1", "Dict 1");
+ emitter_dict_begin(emitter, "json2", "Dict 2");
+ emitter_kv(emitter, "primitive", "A primitive", emitter_type_int, &val);
+ emitter_dict_end(emitter); /* Close 2 */
+ emitter_dict_begin(emitter, "json3", "Dict 3");
+ emitter_dict_end(emitter); /* Close 3 */
+ emitter_dict_end(emitter); /* Close 1 */
+ emitter_dict_begin(emitter, "json4", "Dict 4");
+ emitter_kv(emitter, "primitive", "Another primitive",
+ emitter_type_int, &val);
+ emitter_dict_end(emitter); /* Close 4 */
+ emitter_end(emitter);
+}
+
+static const char *nested_dict_json =
+"{\n"
+"\t\"json1\": {\n"
+"\t\t\"json2\": {\n"
+"\t\t\t\"primitive\": 123\n"
+"\t\t},\n"
+"\t\t\"json3\": {\n"
+"\t\t}\n"
+"\t},\n"
+"\t\"json4\": {\n"
+"\t\t\"primitive\": 123\n"
+"\t}\n"
+"}\n";
+
+static const char *nested_dict_table =
+"Dict 1\n"
+" Dict 2\n"
+" A primitive: 123\n"
+" Dict 3\n"
+"Dict 4\n"
+" Another primitive: 123\n";
+
+TEST_BEGIN(test_nested_dict) {
+ assert_emit_output(&emit_nested_dict, nested_dict_json,
+ nested_dict_table);
+}
+TEST_END
+
+static void
+emit_types(emitter_t *emitter) {
+ bool b = false;
+ int i = -123;
+ unsigned u = 123;
+ ssize_t zd = -456;
+ size_t zu = 456;
+ const char *str = "string";
+ uint32_t u32 = 789;
+ uint64_t u64 = 10000000000ULL;
+
+ emitter_begin(emitter);
+ emitter_kv(emitter, "k1", "K1", emitter_type_bool, &b);
+ emitter_kv(emitter, "k2", "K2", emitter_type_int, &i);
+ emitter_kv(emitter, "k3", "K3", emitter_type_unsigned, &u);
+ emitter_kv(emitter, "k4", "K4", emitter_type_ssize, &zd);
+ emitter_kv(emitter, "k5", "K5", emitter_type_size, &zu);
+ emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
+ emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
+ emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+ /*
+ * We don't test the title type, since it's only used for tables. It's
+ * tested in the emitter_table_row tests.
+ */
+ emitter_end(emitter);
+}
+
+static const char *types_json =
+"{\n"
+"\t\"k1\": false,\n"
+"\t\"k2\": -123,\n"
+"\t\"k3\": 123,\n"
+"\t\"k4\": -456,\n"
+"\t\"k5\": 456,\n"
+"\t\"k6\": \"string\",\n"
+"\t\"k7\": 789,\n"
+"\t\"k8\": 10000000000\n"
+"}\n";
+
+static const char *types_table =
+"K1: false\n"
+"K2: -123\n"
+"K3: 123\n"
+"K4: -456\n"
+"K5: 456\n"
+"K6: \"string\"\n"
+"K7: 789\n"
+"K8: 10000000000\n";
+
+TEST_BEGIN(test_types) {
+ assert_emit_output(&emit_types, types_json, types_table);
+}
+TEST_END
+
+static void
+emit_modal(emitter_t *emitter) {
+ int val = 123;
+ emitter_begin(emitter);
+ emitter_dict_begin(emitter, "j0", "T0");
+ emitter_json_dict_begin(emitter, "j1");
+ emitter_kv(emitter, "i1", "I1", emitter_type_int, &val);
+ emitter_json_kv(emitter, "i2", emitter_type_int, &val);
+ emitter_table_kv(emitter, "I3", emitter_type_int, &val);
+ emitter_table_dict_begin(emitter, "T1");
+ emitter_kv(emitter, "i4", "I4", emitter_type_int, &val);
+ emitter_json_dict_end(emitter); /* Close j1 */
+ emitter_kv(emitter, "i5", "I5", emitter_type_int, &val);
+ emitter_table_dict_end(emitter); /* Close T1 */
+ emitter_kv(emitter, "i6", "I6", emitter_type_int, &val);
+ emitter_dict_end(emitter); /* Close j0 / T0 */
+ emitter_end(emitter);
+}
+
+const char *modal_json =
+"{\n"
+"\t\"j0\": {\n"
+"\t\t\"j1\": {\n"
+"\t\t\t\"i1\": 123,\n"
+"\t\t\t\"i2\": 123,\n"
+"\t\t\t\"i4\": 123\n"
+"\t\t},\n"
+"\t\t\"i5\": 123,\n"
+"\t\t\"i6\": 123\n"
+"\t}\n"
+"}\n";
+
+const char *modal_table =
+"T0\n"
+" I1: 123\n"
+" I3: 123\n"
+" T1\n"
+" I4: 123\n"
+" I5: 123\n"
+" I6: 123\n";
+
+TEST_BEGIN(test_modal) {
+ assert_emit_output(&emit_modal, modal_json, modal_table);
+}
+TEST_END
+
+static void
+emit_json_arr(emitter_t *emitter) {
+ int ival = 123;
+
+ emitter_begin(emitter);
+ emitter_json_dict_begin(emitter, "dict");
+ emitter_json_arr_begin(emitter, "arr");
+ emitter_json_arr_obj_begin(emitter);
+ emitter_json_kv(emitter, "foo", emitter_type_int, &ival);
+ emitter_json_arr_obj_end(emitter); /* Close arr[0] */
+ /* arr[1] and arr[2] are primitives. */
+ emitter_json_arr_value(emitter, emitter_type_int, &ival);
+ emitter_json_arr_value(emitter, emitter_type_int, &ival);
+ emitter_json_arr_obj_begin(emitter);
+ emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
+ emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
+ emitter_json_arr_obj_end(emitter); /* Close arr[3]. */
+ emitter_json_arr_end(emitter); /* Close arr. */
+ emitter_json_dict_end(emitter); /* Close dict. */
+ emitter_end(emitter);
+}
+
+static const char *json_arr_json =
+"{\n"
+"\t\"dict\": {\n"
+"\t\t\"arr\": [\n"
+"\t\t\t{\n"
+"\t\t\t\t\"foo\": 123\n"
+"\t\t\t},\n"
+"\t\t\t123,\n"
+"\t\t\t123,\n"
+"\t\t\t{\n"
+"\t\t\t\t\"bar\": 123,\n"
+"\t\t\t\t\"baz\": 123\n"
+"\t\t\t}\n"
+"\t\t]\n"
+"\t}\n"
+"}\n";
+
+static const char *json_arr_table = "";
+
+TEST_BEGIN(test_json_arr) {
+ assert_emit_output(&emit_json_arr, json_arr_json, json_arr_table);
+}
+TEST_END
+
+static void
+emit_table_row(emitter_t *emitter) {
+ emitter_begin(emitter);
+ emitter_row_t row;
+ emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title};
+ abc.str_val = "ABC title";
+ emitter_col_t def = {emitter_justify_right, 15, emitter_type_title};
+ def.str_val = "DEF title";
+ emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title};
+ ghi.str_val = "GHI";
+
+ emitter_row_init(&row);
+ emitter_col_init(&abc, &row);
+ emitter_col_init(&def, &row);
+ emitter_col_init(&ghi, &row);
+
+ emitter_table_row(emitter, &row);
+
+ abc.type = emitter_type_int;
+ def.type = emitter_type_bool;
+ ghi.type = emitter_type_int;
+
+ abc.int_val = 123;
+ def.bool_val = true;
+ ghi.int_val = 456;
+ emitter_table_row(emitter, &row);
+
+ abc.int_val = 789;
+ def.bool_val = false;
+ ghi.int_val = 1011;
+ emitter_table_row(emitter, &row);
+
+ abc.type = emitter_type_string;
+ abc.str_val = "a string";
+ def.bool_val = false;
+ ghi.type = emitter_type_title;
+ ghi.str_val = "ghi";
+ emitter_table_row(emitter, &row);
+
+ emitter_end(emitter);
+}
+
+static const char *table_row_json =
+"{\n"
+"}\n";
+
+static const char *table_row_table =
+"ABC title DEF title GHI\n"
+"123 true 456\n"
+"789 false 1011\n"
+"\"a string\" false ghi\n";
+
+TEST_BEGIN(test_table_row) {
+ assert_emit_output(&emit_table_row, table_row_json, table_row_table);
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_dict,
+ test_table_printf,
+ test_nested_dict,
+ test_types,
+ test_modal,
+ test_json_arr,
+ test_table_row);
+}
diff --git a/deps/jemalloc/test/unit/extent_quantize.c b/deps/jemalloc/test/unit/extent_quantize.c
new file mode 100644
index 000000000..0ca7a75d9
--- /dev/null
+++ b/deps/jemalloc/test/unit/extent_quantize.c
@@ -0,0 +1,141 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_small_extent_size) {
+ unsigned nbins, i;
+ size_t sz, extent_size;
+ size_t mib[4];
+ size_t miblen = sizeof(mib) / sizeof(size_t);
+
+ /*
+ * Iterate over all small size classes, get their extent sizes, and
+ * verify that the quantized size is the same as the extent size.
+ */
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+ "Unexpected mallctl failure");
+
+ assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib failure");
+ for (i = 0; i < nbins; i++) {
+ mib[2] = i;
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
+ NULL, 0), 0, "Unexpected mallctlbymib failure");
+ assert_zu_eq(extent_size,
+ extent_size_quantize_floor(extent_size),
+ "Small extent quantization should be a no-op "
+ "(extent_size=%zu)", extent_size);
+ assert_zu_eq(extent_size,
+ extent_size_quantize_ceil(extent_size),
+ "Small extent quantization should be a no-op "
+ "(extent_size=%zu)", extent_size);
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_large_extent_size) {
+ bool cache_oblivious;
+ unsigned nlextents, i;
+ size_t sz, extent_size_prev, ceil_prev;
+ size_t mib[4];
+ size_t miblen = sizeof(mib) / sizeof(size_t);
+
+ /*
+ * Iterate over all large size classes, get their extent sizes, and
+ * verify that the quantized size is the same as the extent size.
+ */
+
+ sz = sizeof(bool);
+ assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+ &sz, NULL, 0), 0, "Unexpected mallctl failure");
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+ 0), 0, "Unexpected mallctl failure");
+
+ assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib failure");
+ for (i = 0; i < nlextents; i++) {
+ size_t lextent_size, extent_size, floor, ceil;
+
+ mib[2] = i;
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
+ &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
+ extent_size = cache_oblivious ? lextent_size + PAGE :
+ lextent_size;
+ floor = extent_size_quantize_floor(extent_size);
+ ceil = extent_size_quantize_ceil(extent_size);
+
+ assert_zu_eq(extent_size, floor,
+ "Extent quantization should be a no-op for precise size "
+ "(lextent_size=%zu, extent_size=%zu)", lextent_size,
+ extent_size);
+ assert_zu_eq(extent_size, ceil,
+ "Extent quantization should be a no-op for precise size "
+ "(lextent_size=%zu, extent_size=%zu)", lextent_size,
+ extent_size);
+
+ if (i > 0) {
+ assert_zu_eq(extent_size_prev,
+ extent_size_quantize_floor(extent_size - PAGE),
+ "Floor should be a precise size");
+ if (extent_size_prev < ceil_prev) {
+ assert_zu_eq(ceil_prev, extent_size,
+ "Ceiling should be a precise size "
+ "(extent_size_prev=%zu, ceil_prev=%zu, "
+ "extent_size=%zu)", extent_size_prev,
+ ceil_prev, extent_size);
+ }
+ }
+ if (i + 1 < nlextents) {
+ extent_size_prev = floor;
+ ceil_prev = extent_size_quantize_ceil(extent_size +
+ PAGE);
+ }
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_monotonic) {
+#define SZ_MAX ZU(4 * 1024 * 1024)
+ unsigned i;
+ size_t floor_prev, ceil_prev;
+
+ floor_prev = 0;
+ ceil_prev = 0;
+ for (i = 1; i <= SZ_MAX >> LG_PAGE; i++) {
+ size_t extent_size, floor, ceil;
+
+ extent_size = i << LG_PAGE;
+ floor = extent_size_quantize_floor(extent_size);
+ ceil = extent_size_quantize_ceil(extent_size);
+
+ assert_zu_le(floor, extent_size,
+ "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
+ floor, extent_size, ceil);
+ assert_zu_ge(ceil, extent_size,
+ "Ceiling should be >= (floor=%zu, extent_size=%zu, "
+ "ceil=%zu)", floor, extent_size, ceil);
+
+ assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+ "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
+ floor_prev, floor, extent_size, ceil);
+ assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+ "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
+ floor, extent_size, ceil_prev, ceil);
+
+ floor_prev = floor;
+ ceil_prev = ceil;
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_small_extent_size,
+ test_large_extent_size,
+ test_monotonic);
+}
diff --git a/deps/jemalloc/test/unit/fork.c b/deps/jemalloc/test/unit/fork.c
new file mode 100644
index 000000000..b1690750a
--- /dev/null
+++ b/deps/jemalloc/test/unit/fork.c
@@ -0,0 +1,141 @@
+#include "test/jemalloc_test.h"
+
+#ifndef _WIN32
+#include <sys/wait.h>
+#endif
+
+#ifndef _WIN32
+static void
+wait_for_child_exit(int pid) {
+ int status;
+ while (true) {
+ if (waitpid(pid, &status, 0) == -1) {
+ test_fail("Unexpected waitpid() failure.");
+ }
+ if (WIFSIGNALED(status)) {
+ test_fail("Unexpected child termination due to "
+ "signal %d", WTERMSIG(status));
+ break;
+ }
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0) {
+ test_fail("Unexpected child exit value %d",
+ WEXITSTATUS(status));
+ }
+ break;
+ }
+ }
+}
+#endif
+
+TEST_BEGIN(test_fork) {
+#ifndef _WIN32
+ void *p;
+ pid_t pid;
+
+ /* Set up a manually managed arena for test. */
+ unsigned arena_ind;
+ size_t sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+
+ /* Migrate to the new arena. */
+ unsigned old_arena_ind;
+ sz = sizeof(old_arena_ind);
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+ (void *)&arena_ind, sizeof(arena_ind)), 0,
+ "Unexpected mallctl() failure");
+
+ p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+ pid = fork();
+
+ free(p);
+
+ p = malloc(64);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+ free(p);
+
+ if (pid == -1) {
+ /* Error. */
+ test_fail("Unexpected fork() failure");
+ } else if (pid == 0) {
+ /* Child. */
+ _exit(0);
+ } else {
+ wait_for_child_exit(pid);
+ }
+#else
+ test_skip("fork(2) is irrelevant to Windows");
+#endif
+}
+TEST_END
+
+#ifndef _WIN32
+static void *
+do_fork_thd(void *arg) {
+ malloc(1);
+ int pid = fork();
+ if (pid == -1) {
+ /* Error. */
+ test_fail("Unexpected fork() failure");
+ } else if (pid == 0) {
+ /* Child. */
+ char *args[] = {"true", NULL};
+ execvp(args[0], args);
+ test_fail("Exec failed");
+ } else {
+ /* Parent */
+ wait_for_child_exit(pid);
+ }
+ return NULL;
+}
+#endif
+
+#ifndef _WIN32
+static void
+do_test_fork_multithreaded() {
+ thd_t child;
+ thd_create(&child, do_fork_thd, NULL);
+ do_fork_thd(NULL);
+ thd_join(child, NULL);
+}
+#endif
+
+TEST_BEGIN(test_fork_multithreaded) {
+#ifndef _WIN32
+ /*
+ * We've seen bugs involving hanging on arenas_lock (though the same
+ * class of bugs can happen on any mutex). The bugs are intermittent
+ * though, so we want to run the test multiple times. Since we hold the
+ * arenas lock only early in the process lifetime, we can't just run
+ * this test in a loop (since, after all the arenas are initialized, we
+ * won't acquire arenas_lock any further). We therefore repeat the test
+ * with multiple processes.
+ */
+ for (int i = 0; i < 100; i++) {
+ int pid = fork();
+ if (pid == -1) {
+ /* Error. */
+ test_fail("Unexpected fork() failure,");
+ } else if (pid == 0) {
+ /* Child. */
+ do_test_fork_multithreaded();
+ _exit(0);
+ } else {
+ wait_for_child_exit(pid);
+ }
+ }
+#else
+ test_skip("fork(2) is irrelevant to Windows");
+#endif
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_fork,
+ test_fork_multithreaded);
+}
diff --git a/deps/jemalloc/test/unit/hash.c b/deps/jemalloc/test/unit/hash.c
index abb394ac0..7cc034f8d 100644
--- a/deps/jemalloc/test/unit/hash.c
+++ b/deps/jemalloc/test/unit/hash.c
@@ -28,6 +28,7 @@
*/
#include "test/jemalloc_test.h"
+#include "jemalloc/internal/hash.h"
typedef enum {
hash_variant_x86_32,
@@ -35,43 +36,39 @@ typedef enum {
hash_variant_x64_128
} hash_variant_t;
-static size_t
-hash_variant_bits(hash_variant_t variant)
-{
-
+static int
+hash_variant_bits(hash_variant_t variant) {
switch (variant) {
- case hash_variant_x86_32: return (32);
- case hash_variant_x86_128: return (128);
- case hash_variant_x64_128: return (128);
+ case hash_variant_x86_32: return 32;
+ case hash_variant_x86_128: return 128;
+ case hash_variant_x64_128: return 128;
default: not_reached();
}
}
static const char *
-hash_variant_string(hash_variant_t variant)
-{
-
+hash_variant_string(hash_variant_t variant) {
switch (variant) {
- case hash_variant_x86_32: return ("hash_x86_32");
- case hash_variant_x86_128: return ("hash_x86_128");
- case hash_variant_x64_128: return ("hash_x64_128");
+ case hash_variant_x86_32: return "hash_x86_32";
+ case hash_variant_x86_128: return "hash_x86_128";
+ case hash_variant_x64_128: return "hash_x64_128";
default: not_reached();
}
}
+#define KEY_SIZE 256
static void
-hash_variant_verify(hash_variant_t variant)
-{
- const size_t hashbytes = hash_variant_bits(variant) / 8;
- uint8_t key[256];
- uint8_t hashes[hashbytes * 256];
- uint8_t final[hashbytes];
+hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
+ const int hashbytes = hash_variant_bits(variant) / 8;
+ const int hashes_size = hashbytes * 256;
+ VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
+ VARIABLE_ARRAY(uint8_t, final, hashbytes);
unsigned i;
uint32_t computed, expected;
- memset(key, 0, sizeof(key));
- memset(hashes, 0, sizeof(hashes));
- memset(final, 0, sizeof(final));
+ memset(key, 0, KEY_SIZE);
+ memset(hashes, 0, hashes_size);
+ memset(final, 0, hashbytes);
/*
* Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the
@@ -102,17 +99,17 @@ hash_variant_verify(hash_variant_t variant)
/* Hash the result array. */
switch (variant) {
case hash_variant_x86_32: {
- uint32_t out = hash_x86_32(hashes, hashbytes*256, 0);
+ uint32_t out = hash_x86_32(hashes, hashes_size, 0);
memcpy(final, &out, sizeof(out));
break;
} case hash_variant_x86_128: {
uint64_t out[2];
- hash_x86_128(hashes, hashbytes*256, 0, out);
+ hash_x86_128(hashes, hashes_size, 0, out);
memcpy(final, out, sizeof(out));
break;
} case hash_variant_x64_128: {
uint64_t out[2];
- hash_x64_128(hashes, hashbytes*256, 0, out);
+ hash_x64_128(hashes, hashes_size, 0, out);
memcpy(final, out, sizeof(out));
break;
} default: not_reached();
@@ -139,33 +136,38 @@ hash_variant_verify(hash_variant_t variant)
hash_variant_string(variant), expected, computed);
}
-TEST_BEGIN(test_hash_x86_32)
-{
+static void
+hash_variant_verify(hash_variant_t variant) {
+#define MAX_ALIGN 16
+ uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
+ unsigned i;
+
+ for (i = 0; i < MAX_ALIGN; i++) {
+ hash_variant_verify_key(variant, &key[i]);
+ }
+#undef MAX_ALIGN
+}
+#undef KEY_SIZE
+TEST_BEGIN(test_hash_x86_32) {
hash_variant_verify(hash_variant_x86_32);
}
TEST_END
-TEST_BEGIN(test_hash_x86_128)
-{
-
+TEST_BEGIN(test_hash_x86_128) {
hash_variant_verify(hash_variant_x86_128);
}
TEST_END
-TEST_BEGIN(test_hash_x64_128)
-{
-
+TEST_BEGIN(test_hash_x64_128) {
hash_variant_verify(hash_variant_x64_128);
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_hash_x86_32,
test_hash_x86_128,
- test_hash_x64_128));
+ test_hash_x64_128);
}
diff --git a/deps/jemalloc/test/unit/hooks.c b/deps/jemalloc/test/unit/hooks.c
new file mode 100644
index 000000000..b70172e13
--- /dev/null
+++ b/deps/jemalloc/test/unit/hooks.c
@@ -0,0 +1,38 @@
+#include "test/jemalloc_test.h"
+
+static bool hook_called = false;
+
+static void
+hook() {
+ hook_called = true;
+}
+
+static int
+func_to_hook(int arg1, int arg2) {
+ return arg1 + arg2;
+}
+
+#define func_to_hook JEMALLOC_HOOK(func_to_hook, hooks_libc_hook)
+
+TEST_BEGIN(unhooked_call) {
+ hooks_libc_hook = NULL;
+ hook_called = false;
+ assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+ assert_false(hook_called, "Nulling out hook didn't take.");
+}
+TEST_END
+
+TEST_BEGIN(hooked_call) {
+ hooks_libc_hook = &hook;
+ hook_called = false;
+ assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+ assert_true(hook_called, "Hook should have executed.");
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ unhooked_call,
+ hooked_call);
+}
diff --git a/deps/jemalloc/test/unit/junk.c b/deps/jemalloc/test/unit/junk.c
index 85bbf9e2b..243ced41e 100644
--- a/deps/jemalloc/test/unit/junk.c
+++ b/deps/jemalloc/test/unit/junk.c
@@ -1,222 +1,141 @@
#include "test/jemalloc_test.h"
-#ifdef JEMALLOC_FILL
-const char *malloc_conf =
- "abort:false,junk:true,zero:false,redzone:true,quarantine:0";
-#endif
+#include "jemalloc/internal/util.h"
static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
-static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig;
-static huge_dalloc_junk_t *huge_dalloc_junk_orig;
-static void *most_recently_junked;
+static large_dalloc_junk_t *large_dalloc_junk_orig;
+static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
+static void *watch_for_junking;
+static bool saw_junking;
static void
-arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
-{
+watch_junking(void *p) {
+ watch_for_junking = p;
+ saw_junking = false;
+}
+
+static void
+arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
size_t i;
arena_dalloc_junk_small_orig(ptr, bin_info);
for (i = 0; i < bin_info->reg_size; i++) {
- assert_c_eq(((char *)ptr)[i], 0x5a,
+ assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
"Missing junk fill for byte %zu/%zu of deallocated region",
i, bin_info->reg_size);
}
- most_recently_junked = ptr;
+ if (ptr == watch_for_junking) {
+ saw_junking = true;
+ }
}
static void
-arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
-{
+large_dalloc_junk_intercept(void *ptr, size_t usize) {
size_t i;
- arena_dalloc_junk_large_orig(ptr, usize);
+ large_dalloc_junk_orig(ptr, usize);
for (i = 0; i < usize; i++) {
- assert_c_eq(((char *)ptr)[i], 0x5a,
+ assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
"Missing junk fill for byte %zu/%zu of deallocated region",
i, usize);
}
- most_recently_junked = ptr;
+ if (ptr == watch_for_junking) {
+ saw_junking = true;
+ }
}
static void
-huge_dalloc_junk_intercept(void *ptr, size_t usize)
-{
-
- huge_dalloc_junk_orig(ptr, usize);
- /*
- * The conditions under which junk filling actually occurs are nuanced
- * enough that it doesn't make sense to duplicate the decision logic in
- * test code, so don't actually check that the region is junk-filled.
- */
- most_recently_junked = ptr;
+large_dalloc_maybe_junk_intercept(void *ptr, size_t usize) {
+ large_dalloc_maybe_junk_orig(ptr, usize);
+ if (ptr == watch_for_junking) {
+ saw_junking = true;
+ }
}
static void
-test_junk(size_t sz_min, size_t sz_max)
-{
- char *s;
+test_junk(size_t sz_min, size_t sz_max) {
+ uint8_t *s;
size_t sz_prev, sz, i;
- arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
- arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
- arena_dalloc_junk_large_orig = arena_dalloc_junk_large;
- arena_dalloc_junk_large = arena_dalloc_junk_large_intercept;
- huge_dalloc_junk_orig = huge_dalloc_junk;
- huge_dalloc_junk = huge_dalloc_junk_intercept;
+ if (opt_junk_free) {
+ arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
+ arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
+ large_dalloc_junk_orig = large_dalloc_junk;
+ large_dalloc_junk = large_dalloc_junk_intercept;
+ large_dalloc_maybe_junk_orig = large_dalloc_maybe_junk;
+ large_dalloc_maybe_junk = large_dalloc_maybe_junk_intercept;
+ }
sz_prev = 0;
- s = (char *)mallocx(sz_min, 0);
+ s = (uint8_t *)mallocx(sz_min, 0);
assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
for (sz = sallocx(s, 0); sz <= sz_max;
sz_prev = sz, sz = sallocx(s, 0)) {
if (sz_prev > 0) {
- assert_c_eq(s[0], 'a',
+ assert_u_eq(s[0], 'a',
"Previously allocated byte %zu/%zu is corrupted",
ZU(0), sz_prev);
- assert_c_eq(s[sz_prev-1], 'a',
+ assert_u_eq(s[sz_prev-1], 'a',
"Previously allocated byte %zu/%zu is corrupted",
sz_prev-1, sz_prev);
}
for (i = sz_prev; i < sz; i++) {
- assert_c_eq(s[i], 0xa5,
- "Newly allocated byte %zu/%zu isn't junk-filled",
- i, sz);
+ if (opt_junk_alloc) {
+ assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
+ "Newly allocated byte %zu/%zu isn't "
+ "junk-filled", i, sz);
+ }
s[i] = 'a';
}
if (xallocx(s, sz+1, 0, 0) == sz) {
- void *junked = (void *)s;
-
- s = (char *)rallocx(s, sz+1, 0);
- assert_ptr_not_null((void *)s,
+ uint8_t *t;
+ watch_junking(s);
+ t = (uint8_t *)rallocx(s, sz+1, 0);
+ assert_ptr_not_null((void *)t,
"Unexpected rallocx() failure");
- if (!config_mremap || sz+1 <= arena_maxclass) {
- assert_ptr_eq(most_recently_junked, junked,
+ assert_zu_ge(sallocx(t, 0), sz+1,
+ "Unexpectedly small rallocx() result");
+ if (!background_thread_enabled()) {
+ assert_ptr_ne(s, t,
+ "Unexpected in-place rallocx()");
+ assert_true(!opt_junk_free || saw_junking,
"Expected region of size %zu to be "
- "junk-filled",
- sz);
+ "junk-filled", sz);
}
+ s = t;
}
}
+ watch_junking(s);
dallocx(s, 0);
- assert_ptr_eq(most_recently_junked, (void *)s,
+ assert_true(!opt_junk_free || saw_junking,
"Expected region of size %zu to be junk-filled", sz);
- arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
- arena_dalloc_junk_large = arena_dalloc_junk_large_orig;
- huge_dalloc_junk = huge_dalloc_junk_orig;
+ if (opt_junk_free) {
+ arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
+ large_dalloc_junk = large_dalloc_junk_orig;
+ large_dalloc_maybe_junk = large_dalloc_maybe_junk_orig;
+ }
}
-TEST_BEGIN(test_junk_small)
-{
-
+TEST_BEGIN(test_junk_small) {
test_skip_if(!config_fill);
test_junk(1, SMALL_MAXCLASS-1);
}
TEST_END
-TEST_BEGIN(test_junk_large)
-{
-
+TEST_BEGIN(test_junk_large) {
test_skip_if(!config_fill);
- test_junk(SMALL_MAXCLASS+1, arena_maxclass);
-}
-TEST_END
-
-TEST_BEGIN(test_junk_huge)
-{
-
- test_skip_if(!config_fill);
- test_junk(arena_maxclass+1, chunksize*2);
-}
-TEST_END
-
-arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig;
-static void *most_recently_trimmed;
-
-static void
-arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize)
-{
-
- arena_ralloc_junk_large_orig(ptr, old_usize, usize);
- assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize");
- assert_zu_eq(usize, arena_maxclass-PAGE, "Unexpected usize");
- most_recently_trimmed = ptr;
-}
-
-TEST_BEGIN(test_junk_large_ralloc_shrink)
-{
- void *p1, *p2;
-
- p1 = mallocx(arena_maxclass, 0);
- assert_ptr_not_null(p1, "Unexpected mallocx() failure");
-
- arena_ralloc_junk_large_orig = arena_ralloc_junk_large;
- arena_ralloc_junk_large = arena_ralloc_junk_large_intercept;
-
- p2 = rallocx(p1, arena_maxclass-PAGE, 0);
- assert_ptr_eq(p1, p2, "Unexpected move during shrink");
-
- arena_ralloc_junk_large = arena_ralloc_junk_large_orig;
-
- assert_ptr_eq(most_recently_trimmed, p1,
- "Expected trimmed portion of region to be junk-filled");
-}
-TEST_END
-
-static bool detected_redzone_corruption;
-
-static void
-arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after,
- size_t offset, uint8_t byte)
-{
-
- detected_redzone_corruption = true;
-}
-
-TEST_BEGIN(test_junk_redzone)
-{
- char *s;
- arena_redzone_corruption_t *arena_redzone_corruption_orig;
-
- test_skip_if(!config_fill);
-
- arena_redzone_corruption_orig = arena_redzone_corruption;
- arena_redzone_corruption = arena_redzone_corruption_replacement;
-
- /* Test underflow. */
- detected_redzone_corruption = false;
- s = (char *)mallocx(1, 0);
- assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
- s[-1] = 0xbb;
- dallocx(s, 0);
- assert_true(detected_redzone_corruption,
- "Did not detect redzone corruption");
-
- /* Test overflow. */
- detected_redzone_corruption = false;
- s = (char *)mallocx(1, 0);
- assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
- s[sallocx(s, 0)] = 0xbb;
- dallocx(s, 0);
- assert_true(detected_redzone_corruption,
- "Did not detect redzone corruption");
-
- arena_redzone_corruption = arena_redzone_corruption_orig;
+ test_junk(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_junk_small,
- test_junk_large,
- test_junk_huge,
- test_junk_large_ralloc_shrink,
- test_junk_redzone));
+ test_junk_large);
}
diff --git a/deps/jemalloc/test/unit/junk.sh b/deps/jemalloc/test/unit/junk.sh
new file mode 100644
index 000000000..97cd8ca5e
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="abort:false,zero:false,junk:true"
+fi
diff --git a/deps/jemalloc/test/unit/junk_alloc.c b/deps/jemalloc/test/unit/junk_alloc.c
new file mode 100644
index 000000000..a442a0ca5
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_alloc.c
@@ -0,0 +1 @@
+#include "junk.c"
diff --git a/deps/jemalloc/test/unit/junk_alloc.sh b/deps/jemalloc/test/unit/junk_alloc.sh
new file mode 100644
index 000000000..e1008c2e1
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_alloc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="abort:false,zero:false,junk:alloc"
+fi
diff --git a/deps/jemalloc/test/unit/junk_free.c b/deps/jemalloc/test/unit/junk_free.c
new file mode 100644
index 000000000..a442a0ca5
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_free.c
@@ -0,0 +1 @@
+#include "junk.c"
diff --git a/deps/jemalloc/test/unit/junk_free.sh b/deps/jemalloc/test/unit/junk_free.sh
new file mode 100644
index 000000000..402196ca6
--- /dev/null
+++ b/deps/jemalloc/test/unit/junk_free.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="abort:false,zero:false,junk:free"
+fi
diff --git a/deps/jemalloc/test/unit/log.c b/deps/jemalloc/test/unit/log.c
new file mode 100644
index 000000000..a52bd737d
--- /dev/null
+++ b/deps/jemalloc/test/unit/log.c
@@ -0,0 +1,193 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/log.h"
+
+static void
+expect_no_logging(const char *names) {
+ log_var_t log_l1 = LOG_VAR_INIT("l1");
+ log_var_t log_l2 = LOG_VAR_INIT("l2");
+ log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+
+ strcpy(log_var_names, names);
+
+ int count = 0;
+
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1)
+ count++;
+ log_do_end(log_l1)
+
+ log_do_begin(log_l2)
+ count++;
+ log_do_end(log_l2)
+
+ log_do_begin(log_l2_a)
+ count++;
+ log_do_end(log_l2_a)
+ }
+ assert_d_eq(count, 0, "Disabled logging not ignored!");
+}
+
+TEST_BEGIN(test_log_disabled) {
+ test_skip_if(!config_log);
+ atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+ expect_no_logging("");
+ expect_no_logging("abc");
+ expect_no_logging("a.b.c");
+ expect_no_logging("l12");
+ expect_no_logging("l123|a456|b789");
+ expect_no_logging("|||");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_direct) {
+ test_skip_if(!config_log);
+ atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+ log_var_t log_l1 = LOG_VAR_INIT("l1");
+ log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+ log_var_t log_l2 = LOG_VAR_INIT("l2");
+
+ int count;
+
+ count = 0;
+ strcpy(log_var_names, "l1");
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1)
+ count++;
+ log_do_end(log_l1)
+ }
+ assert_d_eq(count, 10, "Mis-logged!");
+
+ count = 0;
+ strcpy(log_var_names, "l1.a");
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1_a)
+ count++;
+ log_do_end(log_l1_a)
+ }
+ assert_d_eq(count, 10, "Mis-logged!");
+
+ count = 0;
+ strcpy(log_var_names, "l1.a|abc|l2|def");
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1_a)
+ count++;
+ log_do_end(log_l1_a)
+
+ log_do_begin(log_l2)
+ count++;
+ log_do_end(log_l2)
+ }
+ assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_indirect) {
+ test_skip_if(!config_log);
+ atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+ strcpy(log_var_names, "l0|l1|abc|l2.b|def");
+
+ /* On. */
+ log_var_t log_l1 = LOG_VAR_INIT("l1");
+ /* Off. */
+ log_var_t log_l1a = LOG_VAR_INIT("l1a");
+ /* On. */
+ log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+ /* Off. */
+ log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+ /* On. */
+ log_var_t log_l2_b_a = LOG_VAR_INIT("l2.b.a");
+ /* On. */
+ log_var_t log_l2_b_b = LOG_VAR_INIT("l2.b.b");
+
+ /* 4 are on total, so should sum to 40. */
+ int count = 0;
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1)
+ count++;
+ log_do_end(log_l1)
+
+ log_do_begin(log_l1a)
+ count++;
+ log_do_end(log_l1a)
+
+ log_do_begin(log_l1_a)
+ count++;
+ log_do_end(log_l1_a)
+
+ log_do_begin(log_l2_a)
+ count++;
+ log_do_end(log_l2_a)
+
+ log_do_begin(log_l2_b_a)
+ count++;
+ log_do_end(log_l2_b_a)
+
+ log_do_begin(log_l2_b_b)
+ count++;
+ log_do_end(log_l2_b_b)
+ }
+
+ assert_d_eq(count, 40, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_global) {
+ test_skip_if(!config_log);
+ atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+ strcpy(log_var_names, "abc|.|def");
+
+ log_var_t log_l1 = LOG_VAR_INIT("l1");
+ log_var_t log_l2_a_a = LOG_VAR_INIT("l2.a.a");
+
+ int count = 0;
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(log_l1)
+ count++;
+ log_do_end(log_l1)
+
+ log_do_begin(log_l2_a_a)
+ count++;
+ log_do_end(log_l2_a_a)
+ }
+ assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_logs_if_no_init) {
+ test_skip_if(!config_log);
+ atomic_store_b(&log_init_done, false, ATOMIC_RELAXED);
+
+ log_var_t l = LOG_VAR_INIT("definitely.not.enabled");
+
+ int count = 0;
+ for (int i = 0; i < 10; i++) {
+ log_do_begin(l)
+ count++;
+ log_do_end(l)
+ }
+ assert_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
+}
+TEST_END
+
+/*
+ * This really just checks to make sure that this usage compiles; we don't have
+ * any test code to run.
+ */
+TEST_BEGIN(test_log_only_format_string) {
+ if (false) {
+ LOG("log_str", "No arguments follow this format string.");
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_log_disabled,
+ test_log_enabled_direct,
+ test_log_enabled_indirect,
+ test_log_enabled_global,
+ test_logs_if_no_init,
+ test_log_only_format_string);
+}
diff --git a/deps/jemalloc/test/unit/mallctl.c b/deps/jemalloc/test/unit/mallctl.c
index 31fb81057..1ecbab08e 100644
--- a/deps/jemalloc/test/unit/mallctl.c
+++ b/deps/jemalloc/test/unit/mallctl.c
@@ -1,7 +1,8 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_mallctl_errors)
-{
+#include "jemalloc/internal/util.h"
+
+TEST_BEGIN(test_mallctl_errors) {
uint64_t epoch;
size_t sz;
@@ -12,22 +13,23 @@ TEST_BEGIN(test_mallctl_errors)
EPERM, "mallctl() should return EPERM on attempt to write "
"read-only value");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1),
- EINVAL, "mallctl() should return EINVAL for input size mismatch");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1),
- EINVAL, "mallctl() should return EINVAL for input size mismatch");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+ sizeof(epoch)-1), EINVAL,
+ "mallctl() should return EINVAL for input size mismatch");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+ sizeof(epoch)+1), EINVAL,
+ "mallctl() should return EINVAL for input size mismatch");
sz = sizeof(epoch)-1;
- assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+ assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
"mallctl() should return EINVAL for output size mismatch");
sz = sizeof(epoch)+1;
- assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+ assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
"mallctl() should return EINVAL for output size mismatch");
}
TEST_END
-TEST_BEGIN(test_mallctlnametomib_errors)
-{
+TEST_BEGIN(test_mallctlnametomib_errors) {
size_t mib[1];
size_t miblen;
@@ -37,8 +39,7 @@ TEST_BEGIN(test_mallctlnametomib_errors)
}
TEST_END
-TEST_BEGIN(test_mallctlbymib_errors)
-{
+TEST_BEGIN(test_mallctlbymib_errors) {
uint64_t epoch;
size_t sz;
size_t mib[1];
@@ -56,24 +57,25 @@ TEST_BEGIN(test_mallctlbymib_errors)
assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
"Unexpected mallctlnametomib() failure");
- assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
sizeof(epoch)-1), EINVAL,
"mallctlbymib() should return EINVAL for input size mismatch");
- assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
sizeof(epoch)+1), EINVAL,
"mallctlbymib() should return EINVAL for input size mismatch");
sz = sizeof(epoch)-1;
- assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+ EINVAL,
"mallctlbymib() should return EINVAL for output size mismatch");
sz = sizeof(epoch)+1;
- assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+ EINVAL,
"mallctlbymib() should return EINVAL for output size mismatch");
}
TEST_END
-TEST_BEGIN(test_mallctl_read_write)
-{
+TEST_BEGIN(test_mallctl_read_write) {
uint64_t old_epoch, new_epoch;
size_t sz = sizeof(old_epoch);
@@ -83,24 +85,24 @@ TEST_BEGIN(test_mallctl_read_write)
assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
/* Read. */
- assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0,
+ assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
/* Write. */
- assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)),
- 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
+ sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
/* Read+write. */
- assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch,
- sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
+ (void *)&new_epoch, sizeof(new_epoch)), 0,
+ "Unexpected mallctl() failure");
assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
}
TEST_END
-TEST_BEGIN(test_mallctlnametomib_short_mib)
-{
+TEST_BEGIN(test_mallctlnametomib_short_mib) {
size_t mib[4];
size_t miblen;
@@ -114,67 +116,65 @@ TEST_BEGIN(test_mallctlnametomib_short_mib)
}
TEST_END
-TEST_BEGIN(test_mallctl_config)
-{
-
-#define TEST_MALLCTL_CONFIG(config) do { \
- bool oldval; \
+TEST_BEGIN(test_mallctl_config) {
+#define TEST_MALLCTL_CONFIG(config, t) do { \
+ t oldval; \
size_t sz = sizeof(oldval); \
- assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0), \
- 0, "Unexpected mallctl() failure"); \
+ assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz, \
+ NULL, 0), 0, "Unexpected mallctl() failure"); \
assert_b_eq(oldval, config_##config, "Incorrect config value"); \
assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \
} while (0)
- TEST_MALLCTL_CONFIG(debug);
- TEST_MALLCTL_CONFIG(dss);
- TEST_MALLCTL_CONFIG(fill);
- TEST_MALLCTL_CONFIG(lazy_lock);
- TEST_MALLCTL_CONFIG(mremap);
- TEST_MALLCTL_CONFIG(munmap);
- TEST_MALLCTL_CONFIG(prof);
- TEST_MALLCTL_CONFIG(prof_libgcc);
- TEST_MALLCTL_CONFIG(prof_libunwind);
- TEST_MALLCTL_CONFIG(stats);
- TEST_MALLCTL_CONFIG(tcache);
- TEST_MALLCTL_CONFIG(tls);
- TEST_MALLCTL_CONFIG(utrace);
- TEST_MALLCTL_CONFIG(valgrind);
- TEST_MALLCTL_CONFIG(xmalloc);
+ TEST_MALLCTL_CONFIG(cache_oblivious, bool);
+ TEST_MALLCTL_CONFIG(debug, bool);
+ TEST_MALLCTL_CONFIG(fill, bool);
+ TEST_MALLCTL_CONFIG(lazy_lock, bool);
+ TEST_MALLCTL_CONFIG(malloc_conf, const char *);
+ TEST_MALLCTL_CONFIG(prof, bool);
+ TEST_MALLCTL_CONFIG(prof_libgcc, bool);
+ TEST_MALLCTL_CONFIG(prof_libunwind, bool);
+ TEST_MALLCTL_CONFIG(stats, bool);
+ TEST_MALLCTL_CONFIG(utrace, bool);
+ TEST_MALLCTL_CONFIG(xmalloc, bool);
#undef TEST_MALLCTL_CONFIG
}
TEST_END
-TEST_BEGIN(test_mallctl_opt)
-{
+TEST_BEGIN(test_mallctl_opt) {
bool config_always = true;
-#define TEST_MALLCTL_OPT(t, opt, config) do { \
+#define TEST_MALLCTL_OPT(t, opt, config) do { \
t oldval; \
size_t sz = sizeof(oldval); \
int expected = config_##config ? 0 : ENOENT; \
- int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0); \
+ int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL, \
+ 0); \
assert_d_eq(result, expected, \
"Unexpected mallctl() result for opt."#opt); \
assert_zu_eq(sz, sizeof(oldval), "Unexpected output size"); \
} while (0)
TEST_MALLCTL_OPT(bool, abort, always);
- TEST_MALLCTL_OPT(size_t, lg_chunk, always);
+ TEST_MALLCTL_OPT(bool, abort_conf, always);
+ TEST_MALLCTL_OPT(const char *, metadata_thp, always);
+ TEST_MALLCTL_OPT(bool, retain, always);
TEST_MALLCTL_OPT(const char *, dss, always);
- TEST_MALLCTL_OPT(size_t, narenas, always);
- TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always);
+ TEST_MALLCTL_OPT(unsigned, narenas, always);
+ TEST_MALLCTL_OPT(const char *, percpu_arena, always);
+ TEST_MALLCTL_OPT(bool, background_thread, always);
+ TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
+ TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
TEST_MALLCTL_OPT(bool, stats_print, always);
- TEST_MALLCTL_OPT(bool, junk, fill);
- TEST_MALLCTL_OPT(size_t, quarantine, fill);
- TEST_MALLCTL_OPT(bool, redzone, fill);
+ TEST_MALLCTL_OPT(const char *, junk, fill);
TEST_MALLCTL_OPT(bool, zero, fill);
TEST_MALLCTL_OPT(bool, utrace, utrace);
- TEST_MALLCTL_OPT(bool, valgrind, valgrind);
TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
- TEST_MALLCTL_OPT(bool, tcache, tcache);
- TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
+ TEST_MALLCTL_OPT(bool, tcache, always);
+ TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
+ TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
+ TEST_MALLCTL_OPT(const char *, thp, always);
TEST_MALLCTL_OPT(bool, prof, prof);
TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
TEST_MALLCTL_OPT(bool, prof_active, prof);
@@ -189,14 +189,13 @@ TEST_BEGIN(test_mallctl_opt)
}
TEST_END
-TEST_BEGIN(test_manpage_example)
-{
+TEST_BEGIN(test_manpage_example) {
unsigned nbins, i;
size_t mib[4];
size_t len, miblen;
len = sizeof(nbins);
- assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+ assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
"Unexpected mallctl() failure");
miblen = 4;
@@ -207,32 +206,268 @@ TEST_BEGIN(test_manpage_example)
mib[2] = i;
len = sizeof(bin_size);
- assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0),
- 0, "Unexpected mallctlbymib() failure");
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
+ NULL, 0), 0, "Unexpected mallctlbymib() failure");
/* Do something with bin_size... */
}
}
TEST_END
-TEST_BEGIN(test_thread_arena)
-{
- unsigned arena_old, arena_new, narenas;
- size_t sz = sizeof(unsigned);
+TEST_BEGIN(test_tcache_none) {
+ test_skip_if(!opt_tcache);
+
+ /* Allocate p and q. */
+ void *p0 = mallocx(42, 0);
+ assert_ptr_not_null(p0, "Unexpected mallocx() failure");
+ void *q = mallocx(42, 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+
+ /* Deallocate p and q, but bypass the tcache for q. */
+ dallocx(p0, 0);
+ dallocx(q, MALLOCX_TCACHE_NONE);
+
+ /* Make sure that tcache-based allocation returns p, not q. */
+ void *p1 = mallocx(42, 0);
+ assert_ptr_not_null(p1, "Unexpected mallocx() failure");
+ assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+
+ /* Clean up. */
+ dallocx(p1, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
+TEST_BEGIN(test_tcache) {
+#define NTCACHES 10
+ unsigned tis[NTCACHES];
+ void *ps[NTCACHES];
+ void *qs[NTCACHES];
+ unsigned i;
+ size_t sz, psz, qsz;
+
+ psz = 42;
+ qsz = nallocx(psz, 0) + 1;
+
+ /* Create tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure, i=%u", i);
+ }
+
+ /* Exercise tcache ID recycling. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+ (void *)&tis[i], sizeof(unsigned)), 0,
+ "Unexpected mallctl() failure, i=%u", i);
+ }
+ for (i = 0; i < NTCACHES; i++) {
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure, i=%u", i);
+ }
+
+ /* Flush empty tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+
+ /* Cache some allocations. */
+ for (i = 0; i < NTCACHES; i++) {
+ ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
+
+ qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
+ }
+
+ /* Verify that tcaches allocate cached regions. */
+ for (i = 0; i < NTCACHES; i++) {
+ void *p0 = ps[i];
+ ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+ i);
+ assert_ptr_eq(ps[i], p0,
+ "Expected mallocx() to allocate cached region, i=%u", i);
+ }
- assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
+ /* Verify that reallocation uses cached regions. */
+ for (i = 0; i < NTCACHES; i++) {
+ void *q0 = qs[i];
+ qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
+ assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
+ i);
+ assert_ptr_eq(qs[i], q0,
+ "Expected rallocx() to allocate cached region, i=%u", i);
+ /* Avoid undefined behavior in case of test failure. */
+ if (qs[i] == NULL) {
+ qs[i] = ps[i];
+ }
+ }
+ for (i = 0; i < NTCACHES; i++) {
+ dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
+ }
+
+ /* Flush some non-empty tcaches. */
+ for (i = 0; i < NTCACHES/2; i++) {
+ assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+ sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
+ i);
+ }
+
+ /* Destroy tcaches. */
+ for (i = 0; i < NTCACHES; i++) {
+ assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+ (void *)&tis[i], sizeof(unsigned)), 0,
+ "Unexpected mallctl() failure, i=%u", i);
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_thread_arena) {
+ unsigned old_arena_ind, new_arena_ind, narenas;
+
+ const char *opa;
+ size_t sz = sizeof(opa);
+ assert_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
- arena_new = narenas - 1;
- assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
- sizeof(unsigned)), 0, "Unexpected mallctl() failure");
- arena_new = 0;
- assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
- sizeof(unsigned)), 0, "Unexpected mallctl() failure");
+
+ if (strcmp(opa, "disabled") == 0) {
+ new_arena_ind = narenas - 1;
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+ (void *)&new_arena_ind, sizeof(unsigned)), 0,
+ "Unexpected mallctl() failure");
+ new_arena_ind = 0;
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+ (void *)&new_arena_ind, sizeof(unsigned)), 0,
+ "Unexpected mallctl() failure");
+ } else {
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+ new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
+ if (old_arena_ind != new_arena_ind) {
+ assert_d_eq(mallctl("thread.arena",
+ (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
+ sizeof(unsigned)), EPERM, "thread.arena ctl "
+ "should not be allowed with percpu arena");
+ }
+ }
}
TEST_END
-TEST_BEGIN(test_arena_i_purge)
-{
+TEST_BEGIN(test_arena_i_initialized) {
+ unsigned narenas, i;
+ size_t sz;
+ size_t mib[3];
+ size_t miblen = sizeof(mib) / sizeof(size_t);
+ bool initialized;
+
+ sz = sizeof(narenas);
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+
+ assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ for (i = 0; i < narenas; i++) {
+ mib[1] = i;
+ sz = sizeof(initialized);
+ assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
+ }
+
+ mib[1] = MALLCTL_ARENAS_ALL;
+ sz = sizeof(initialized);
+ assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ assert_true(initialized,
+ "Merged arena statistics should always be initialized");
+
+ /* Equivalent to the above but using mallctl() directly. */
+ sz = sizeof(initialized);
+ assert_d_eq(mallctl(
+ "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
+ (void *)&initialized, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ assert_true(initialized,
+ "Merged arena statistics should always be initialized");
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_dirty_decay_ms) {
+ ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
+ size_t sz = sizeof(ssize_t);
+
+ assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+ (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+
+ dirty_decay_ms = -2;
+ assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+ (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ dirty_decay_ms = 0x7fffffff;
+ assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+ (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
+ "Unexpected mallctl() failure");
+
+ for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
+ dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
+ dirty_decay_ms++) {
+ ssize_t old_dirty_decay_ms;
+
+ assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+ (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
+ sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+ assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+ "Unexpected old arena.0.dirty_decay_ms");
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
+ ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
+ size_t sz = sizeof(ssize_t);
+
+ assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+ (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+
+ muzzy_decay_ms = -2;
+ assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+ (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ muzzy_decay_ms = 0x7fffffff;
+ assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+ (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
+ "Unexpected mallctl() failure");
+
+ for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
+ muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
+ muzzy_decay_ms++) {
+ ssize_t old_muzzy_decay_ms;
+
+ assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+ (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
+ sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+ assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+ "Unexpected old arena.0.muzzy_decay_ms");
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_purge) {
unsigned narenas;
size_t sz = sizeof(unsigned);
size_t mib[3];
@@ -241,128 +476,261 @@ TEST_BEGIN(test_arena_i_purge)
assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
"Unexpected mallctl() failure");
- assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
"Unexpected mallctlnametomib() failure");
mib[1] = narenas;
assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
"Unexpected mallctlbymib() failure");
+
+ mib[1] = MALLCTL_ARENAS_ALL;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
}
TEST_END
-TEST_BEGIN(test_arena_i_dss)
-{
+TEST_BEGIN(test_arena_i_decay) {
+ unsigned narenas;
+ size_t sz = sizeof(unsigned);
+ size_t mib[3];
+ size_t miblen = 3;
+
+ assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+ 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = narenas;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+
+ mib[1] = MALLCTL_ARENAS_ALL;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_dss) {
const char *dss_prec_old, *dss_prec_new;
size_t sz = sizeof(dss_prec_old);
+ size_t mib[3];
+ size_t miblen;
+
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+
+ dss_prec_new = "disabled";
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+ (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+ "Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected default for dss precedence");
+
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+ (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
+ "Unexpected mallctl() failure");
+
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected value for dss precedence");
- dss_prec_new = "primary";
- assert_d_eq(mallctl("arena.0.dss", &dss_prec_old, &sz, &dss_prec_new,
- sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+ mib[1] = narenas_total_get();
+ dss_prec_new = "disabled";
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+ (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+ "Unexpected mallctl() failure");
assert_str_ne(dss_prec_old, "primary",
"Unexpected default for dss precedence");
- assert_d_eq(mallctl("arena.0.dss", &dss_prec_new, &sz, &dss_prec_old,
- sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+ (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
+ "Unexpected mallctl() failure");
+
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
+ assert_str_ne(dss_prec_old, "primary",
+ "Unexpected value for dss precedence");
}
TEST_END
-TEST_BEGIN(test_arenas_purge)
-{
- unsigned arena = 0;
+TEST_BEGIN(test_arena_i_retain_grow_limit) {
+ size_t old_limit, new_limit, default_limit;
+ size_t mib[3];
+ size_t miblen;
- assert_d_eq(mallctl("arenas.purge", NULL, NULL, &arena, sizeof(arena)),
+ bool retain_enabled;
+ size_t sz = sizeof(retain_enabled);
+ assert_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
0, "Unexpected mallctl() failure");
+ test_skip_if(!retain_enabled);
+
+ sz = sizeof(default_limit);
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
+ 0, "Unexpected mallctlnametomib() error");
- assert_d_eq(mallctl("arenas.purge", NULL, NULL, NULL, 0), 0,
+ assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
+ assert_zu_eq(default_limit, sz_pind2sz(EXTENT_GROW_MAX_PIND),
+ "Unexpected default for retain_grow_limit");
+
+ new_limit = PAGE - 1;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+ sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+
+ new_limit = PAGE + 1;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+ sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ assert_zu_eq(old_limit, PAGE,
+ "Unexpected value for retain_grow_limit");
+
+ /* Expect grow less than psize class 10. */
+ new_limit = sz_pind2sz(10) - 1;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+ sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ assert_zu_eq(old_limit, sz_pind2sz(9),
+ "Unexpected value for retain_grow_limit");
+
+ /* Restore to default. */
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
+ sizeof(default_limit)), 0, "Unexpected mallctl() failure");
}
TEST_END
-TEST_BEGIN(test_arenas_initialized)
-{
- unsigned narenas;
- size_t sz = sizeof(narenas);
+TEST_BEGIN(test_arenas_dirty_decay_ms) {
+ ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
+ size_t sz = sizeof(ssize_t);
- assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
+ assert_d_eq(mallctl("arenas.dirty_decay_ms",
+ (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
- {
- bool initialized[narenas];
- sz = narenas * sizeof(bool);
- assert_d_eq(mallctl("arenas.initialized", initialized, &sz,
- NULL, 0), 0, "Unexpected mallctl() failure");
+ dirty_decay_ms = -2;
+ assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+ (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ dirty_decay_ms = 0x7fffffff;
+ assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+ (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
+ "Expected mallctl() failure");
+
+ for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
+ dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
+ dirty_decay_ms++) {
+ ssize_t old_dirty_decay_ms;
+
+ assert_d_eq(mallctl("arenas.dirty_decay_ms",
+ (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
+ sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+ assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+ "Unexpected old arenas.dirty_decay_ms");
}
}
TEST_END
-TEST_BEGIN(test_arenas_constants)
-{
+TEST_BEGIN(test_arenas_muzzy_decay_ms) {
+ ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
+ size_t sz = sizeof(ssize_t);
-#define TEST_ARENAS_CONSTANT(t, name, expected) do { \
+ assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+ (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+
+ muzzy_decay_ms = -2;
+ assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+ (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
+ "Unexpected mallctl() success");
+
+ muzzy_decay_ms = 0x7fffffff;
+ assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+ (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
+ "Expected mallctl() failure");
+
+ for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
+ muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
+ muzzy_decay_ms++) {
+ ssize_t old_muzzy_decay_ms;
+
+ assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+ (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
+ sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+ assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+ "Unexpected old arenas.muzzy_decay_ms");
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_constants) {
+#define TEST_ARENAS_CONSTANT(t, name, expected) do { \
t name; \
size_t sz = sizeof(t); \
- assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0, \
- "Unexpected mallctl() failure"); \
+ assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL, \
+ 0), 0, "Unexpected mallctl() failure"); \
assert_zu_eq(name, expected, "Incorrect "#name" size"); \
} while (0)
TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
TEST_ARENAS_CONSTANT(size_t, page, PAGE);
TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS);
- TEST_ARENAS_CONSTANT(size_t, nlruns, nlclasses);
+ TEST_ARENAS_CONSTANT(unsigned, nlextents, NSIZES - NBINS);
#undef TEST_ARENAS_CONSTANT
}
TEST_END
-TEST_BEGIN(test_arenas_bin_constants)
-{
-
-#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do { \
+TEST_BEGIN(test_arenas_bin_constants) {
+#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do { \
t name; \
size_t sz = sizeof(t); \
- assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0), \
- 0, "Unexpected mallctl() failure"); \
+ assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz, \
+ NULL, 0), 0, "Unexpected mallctl() failure"); \
assert_zu_eq(name, expected, "Incorrect "#name" size"); \
} while (0)
- TEST_ARENAS_BIN_CONSTANT(size_t, size, arena_bin_info[0].reg_size);
- TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, arena_bin_info[0].nregs);
- TEST_ARENAS_BIN_CONSTANT(size_t, run_size, arena_bin_info[0].run_size);
+ TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
+ TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
+ TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
+ bin_infos[0].slab_size);
#undef TEST_ARENAS_BIN_CONSTANT
}
TEST_END
-TEST_BEGIN(test_arenas_lrun_constants)
-{
-
-#define TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do { \
+TEST_BEGIN(test_arenas_lextent_constants) {
+#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do { \
t name; \
size_t sz = sizeof(t); \
- assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL, \
- 0), 0, "Unexpected mallctl() failure"); \
+ assert_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name, \
+ &sz, NULL, 0), 0, "Unexpected mallctl() failure"); \
assert_zu_eq(name, expected, "Incorrect "#name" size"); \
} while (0)
- TEST_ARENAS_LRUN_CONSTANT(size_t, size, (1 << LG_PAGE));
+ TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, LARGE_MINCLASS);
-#undef TEST_ARENAS_LRUN_CONSTANT
+#undef TEST_ARENAS_LEXTENT_CONSTANT
}
TEST_END
-TEST_BEGIN(test_arenas_extend)
-{
+TEST_BEGIN(test_arenas_create) {
unsigned narenas_before, arena, narenas_after;
size_t sz = sizeof(unsigned);
- assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0,
- "Unexpected mallctl() failure");
- assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0,
- "Unexpected mallctl() failure");
- assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0,
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
+ NULL, 0), 0, "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
"Unexpected mallctl() failure");
+ assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
assert_u_eq(narenas_before+1, narenas_after,
"Unexpected number of arenas before versus after extension");
@@ -370,18 +738,34 @@ TEST_BEGIN(test_arenas_extend)
}
TEST_END
-TEST_BEGIN(test_stats_arenas)
-{
+TEST_BEGIN(test_arenas_lookup) {
+ unsigned arena, arena1;
+ void *ptr;
+ size_t sz = sizeof(unsigned);
+
+ assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+ "Unexpected mallctl() failure");
+ ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(ptr, "Unexpected mallocx() failure");
+ assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+ 0, "Unexpected mallctl() failure");
+ assert_u_eq(arena, arena1, "Unexpected arena index");
+ dallocx(ptr, 0);
+}
+TEST_END
-#define TEST_STATS_ARENAS(t, name) do { \
+TEST_BEGIN(test_stats_arenas) {
+#define TEST_STATS_ARENAS(t, name) do { \
t name; \
size_t sz = sizeof(t); \
- assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL, \
- 0), 0, "Unexpected mallctl() failure"); \
+ assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz, \
+ NULL, 0), 0, "Unexpected mallctl() failure"); \
} while (0)
- TEST_STATS_ARENAS(const char *, dss);
TEST_STATS_ARENAS(unsigned, nthreads);
+ TEST_STATS_ARENAS(const char *, dss);
+ TEST_STATS_ARENAS(ssize_t, dirty_decay_ms);
+ TEST_STATS_ARENAS(ssize_t, muzzy_decay_ms);
TEST_STATS_ARENAS(size_t, pactive);
TEST_STATS_ARENAS(size_t, pdirty);
@@ -390,10 +774,8 @@ TEST_BEGIN(test_stats_arenas)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_mallctl_errors,
test_mallctlnametomib_errors,
test_mallctlbymib_errors,
@@ -402,14 +784,22 @@ main(void)
test_mallctl_config,
test_mallctl_opt,
test_manpage_example,
+ test_tcache_none,
+ test_tcache,
test_thread_arena,
+ test_arena_i_initialized,
+ test_arena_i_dirty_decay_ms,
+ test_arena_i_muzzy_decay_ms,
test_arena_i_purge,
+ test_arena_i_decay,
test_arena_i_dss,
- test_arenas_purge,
- test_arenas_initialized,
+ test_arena_i_retain_grow_limit,
+ test_arenas_dirty_decay_ms,
+ test_arenas_muzzy_decay_ms,
test_arenas_constants,
test_arenas_bin_constants,
- test_arenas_lrun_constants,
- test_arenas_extend,
- test_stats_arenas));
+ test_arenas_lextent_constants,
+ test_arenas_create,
+ test_arenas_lookup,
+ test_stats_arenas);
}
diff --git a/deps/jemalloc/test/unit/util.c b/deps/jemalloc/test/unit/malloc_io.c
index dc3cfe8a9..79ba7fc53 100644
--- a/deps/jemalloc/test/unit/util.c
+++ b/deps/jemalloc/test/unit/malloc_io.c
@@ -1,38 +1,6 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_pow2_ceil)
-{
- unsigned i, pow2;
- size_t x;
-
- assert_zu_eq(pow2_ceil(0), 0, "Unexpected result");
-
- for (i = 0; i < sizeof(size_t) * 8; i++) {
- assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i,
- "Unexpected result");
- }
-
- for (i = 2; i < sizeof(size_t) * 8; i++) {
- assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i,
- "Unexpected result");
- }
-
- for (i = 0; i < sizeof(size_t) * 8 - 1; i++) {
- assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1),
- "Unexpected result");
- }
-
- for (pow2 = 1; pow2 < 25; pow2++) {
- for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) {
- assert_zu_eq(pow2_ceil(x), ZU(1) << pow2,
- "Unexpected result, x=%zu", x);
- }
- }
-}
-TEST_END
-
-TEST_BEGIN(test_malloc_strtoumax_no_endptr)
-{
+TEST_BEGIN(test_malloc_strtoumax_no_endptr) {
int err;
set_errno(0);
@@ -42,8 +10,7 @@ TEST_BEGIN(test_malloc_strtoumax_no_endptr)
}
TEST_END
-TEST_BEGIN(test_malloc_strtoumax)
-{
+TEST_BEGIN(test_malloc_strtoumax) {
struct test_s {
const char *input;
const char *expected_remainder;
@@ -52,8 +19,9 @@ TEST_BEGIN(test_malloc_strtoumax)
const char *expected_errno_name;
uintmax_t expected_x;
};
-#define ERR(e) e, #e
-#define UMAX(x) ((uintmax_t)x##ULL)
+#define ERR(e) e, #e
+#define KUMAX(x) ((uintmax_t)x##ULL)
+#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL)
struct test_s tests[] = {
{"0", "0", -1, ERR(EINVAL), UINTMAX_MAX},
{"0", "0", 1, ERR(EINVAL), UINTMAX_MAX},
@@ -64,51 +32,52 @@ TEST_BEGIN(test_malloc_strtoumax)
{"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX},
{"-", "-", 0, ERR(EINVAL), UINTMAX_MAX},
- {"42", "", 0, ERR(0), UMAX(42)},
- {"+42", "", 0, ERR(0), UMAX(42)},
- {"-42", "", 0, ERR(0), UMAX(-42)},
- {"042", "", 0, ERR(0), UMAX(042)},
- {"+042", "", 0, ERR(0), UMAX(042)},
- {"-042", "", 0, ERR(0), UMAX(-042)},
- {"0x42", "", 0, ERR(0), UMAX(0x42)},
- {"+0x42", "", 0, ERR(0), UMAX(0x42)},
- {"-0x42", "", 0, ERR(0), UMAX(-0x42)},
-
- {"0", "", 0, ERR(0), UMAX(0)},
- {"1", "", 0, ERR(0), UMAX(1)},
-
- {"42", "", 0, ERR(0), UMAX(42)},
- {" 42", "", 0, ERR(0), UMAX(42)},
- {"42 ", " ", 0, ERR(0), UMAX(42)},
- {"0x", "x", 0, ERR(0), UMAX(0)},
- {"42x", "x", 0, ERR(0), UMAX(42)},
-
- {"07", "", 0, ERR(0), UMAX(7)},
- {"010", "", 0, ERR(0), UMAX(8)},
- {"08", "8", 0, ERR(0), UMAX(0)},
- {"0_", "_", 0, ERR(0), UMAX(0)},
-
- {"0x", "x", 0, ERR(0), UMAX(0)},
- {"0X", "X", 0, ERR(0), UMAX(0)},
- {"0xg", "xg", 0, ERR(0), UMAX(0)},
- {"0XA", "", 0, ERR(0), UMAX(10)},
-
- {"010", "", 10, ERR(0), UMAX(10)},
- {"0x3", "x3", 10, ERR(0), UMAX(0)},
-
- {"12", "2", 2, ERR(0), UMAX(1)},
- {"78", "8", 8, ERR(0), UMAX(7)},
- {"9a", "a", 10, ERR(0), UMAX(9)},
- {"9A", "A", 10, ERR(0), UMAX(9)},
- {"fg", "g", 16, ERR(0), UMAX(15)},
- {"FG", "G", 16, ERR(0), UMAX(15)},
- {"0xfg", "g", 16, ERR(0), UMAX(15)},
- {"0XFG", "G", 16, ERR(0), UMAX(15)},
- {"z_", "_", 36, ERR(0), UMAX(35)},
- {"Z_", "_", 36, ERR(0), UMAX(35)}
+ {"42", "", 0, ERR(0), KUMAX(42)},
+ {"+42", "", 0, ERR(0), KUMAX(42)},
+ {"-42", "", 0, ERR(0), KSMAX(-42)},
+ {"042", "", 0, ERR(0), KUMAX(042)},
+ {"+042", "", 0, ERR(0), KUMAX(042)},
+ {"-042", "", 0, ERR(0), KSMAX(-042)},
+ {"0x42", "", 0, ERR(0), KUMAX(0x42)},
+ {"+0x42", "", 0, ERR(0), KUMAX(0x42)},
+ {"-0x42", "", 0, ERR(0), KSMAX(-0x42)},
+
+ {"0", "", 0, ERR(0), KUMAX(0)},
+ {"1", "", 0, ERR(0), KUMAX(1)},
+
+ {"42", "", 0, ERR(0), KUMAX(42)},
+ {" 42", "", 0, ERR(0), KUMAX(42)},
+ {"42 ", " ", 0, ERR(0), KUMAX(42)},
+ {"0x", "x", 0, ERR(0), KUMAX(0)},
+ {"42x", "x", 0, ERR(0), KUMAX(42)},
+
+ {"07", "", 0, ERR(0), KUMAX(7)},
+ {"010", "", 0, ERR(0), KUMAX(8)},
+ {"08", "8", 0, ERR(0), KUMAX(0)},
+ {"0_", "_", 0, ERR(0), KUMAX(0)},
+
+ {"0x", "x", 0, ERR(0), KUMAX(0)},
+ {"0X", "X", 0, ERR(0), KUMAX(0)},
+ {"0xg", "xg", 0, ERR(0), KUMAX(0)},
+ {"0XA", "", 0, ERR(0), KUMAX(10)},
+
+ {"010", "", 10, ERR(0), KUMAX(10)},
+ {"0x3", "x3", 10, ERR(0), KUMAX(0)},
+
+ {"12", "2", 2, ERR(0), KUMAX(1)},
+ {"78", "8", 8, ERR(0), KUMAX(7)},
+ {"9a", "a", 10, ERR(0), KUMAX(9)},
+ {"9A", "A", 10, ERR(0), KUMAX(9)},
+ {"fg", "g", 16, ERR(0), KUMAX(15)},
+ {"FG", "G", 16, ERR(0), KUMAX(15)},
+ {"0xfg", "g", 16, ERR(0), KUMAX(15)},
+ {"0XFG", "G", 16, ERR(0), KUMAX(15)},
+ {"z_", "_", 36, ERR(0), KUMAX(35)},
+ {"Z_", "_", 36, ERR(0), KUMAX(35)}
};
#undef ERR
-#undef UMAX
+#undef KUMAX
+#undef KSMAX
unsigned i;
for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {
@@ -135,18 +104,17 @@ TEST_BEGIN(test_malloc_strtoumax)
}
TEST_END
-TEST_BEGIN(test_malloc_snprintf_truncated)
-{
-#define BUFLEN 15
+TEST_BEGIN(test_malloc_snprintf_truncated) {
+#define BUFLEN 15
char buf[BUFLEN];
- int result;
+ size_t result;
size_t len;
-#define TEST(expected_str_untruncated, fmt...) do { \
- result = malloc_snprintf(buf, len, fmt); \
+#define TEST(expected_str_untruncated, ...) do { \
+ result = malloc_snprintf(buf, len, __VA_ARGS__); \
assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0, \
"Unexpected string inequality (\"%s\" vs \"%s\")", \
- buf, expected_str_untruncated); \
- assert_d_eq(result, strlen(expected_str_untruncated), \
+ buf, expected_str_untruncated); \
+ assert_zu_eq(result, strlen(expected_str_untruncated), \
"Unexpected result"); \
} while (0)
@@ -168,15 +136,14 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
}
TEST_END
-TEST_BEGIN(test_malloc_snprintf)
-{
-#define BUFLEN 128
+TEST_BEGIN(test_malloc_snprintf) {
+#define BUFLEN 128
char buf[BUFLEN];
- int result;
-#define TEST(expected_str, fmt...) do { \
- result = malloc_snprintf(buf, sizeof(buf), fmt); \
+ size_t result;
+#define TEST(expected_str, ...) do { \
+ result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__); \
assert_str_eq(buf, expected_str, "Unexpected output"); \
- assert_d_eq(result, strlen(expected_str), "Unexpected result"); \
+ assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
} while (0)
TEST("hello", "hello");
@@ -282,13 +249,10 @@ TEST_BEGIN(test_malloc_snprintf)
TEST_END
int
-main(void)
-{
-
- return (test(
- test_pow2_ceil,
+main(void) {
+ return test(
test_malloc_strtoumax_no_endptr,
test_malloc_strtoumax,
test_malloc_snprintf_truncated,
- test_malloc_snprintf));
+ test_malloc_snprintf);
}
diff --git a/deps/jemalloc/test/unit/math.c b/deps/jemalloc/test/unit/math.c
index a1b288ea1..09ef20c7b 100644
--- a/deps/jemalloc/test/unit/math.c
+++ b/deps/jemalloc/test/unit/math.c
@@ -1,33 +1,42 @@
#include "test/jemalloc_test.h"
-#define MAX_REL_ERR 1.0e-9
-#define MAX_ABS_ERR 1.0e-9
+#define MAX_REL_ERR 1.0e-9
+#define MAX_ABS_ERR 1.0e-9
+
+#include <float.h>
+
+#ifdef __PGI
+#undef INFINITY
+#endif
+
+#ifndef INFINITY
+#define INFINITY (DBL_MAX + DBL_MAX)
+#endif
static bool
-double_eq_rel(double a, double b, double max_rel_err, double max_abs_err)
-{
+double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
double rel_err;
- if (fabs(a - b) < max_abs_err)
- return (true);
+ if (fabs(a - b) < max_abs_err) {
+ return true;
+ }
rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
return (rel_err < max_rel_err);
}
static uint64_t
-factorial(unsigned x)
-{
+factorial(unsigned x) {
uint64_t ret = 1;
unsigned i;
- for (i = 2; i <= x; i++)
+ for (i = 2; i <= x; i++) {
ret *= (uint64_t)i;
+ }
- return (ret);
+ return ret;
}
-TEST_BEGIN(test_ln_gamma_factorial)
-{
+TEST_BEGIN(test_ln_gamma_factorial) {
unsigned x;
/* exp(ln_gamma(x)) == (x-1)! for integer x. */
@@ -178,8 +187,7 @@ static const double ln_gamma_misc_expected[] = {
359.13420536957539753
};
-TEST_BEGIN(test_ln_gamma_misc)
-{
+TEST_BEGIN(test_ln_gamma_misc) {
unsigned i;
for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
@@ -229,8 +237,7 @@ static const double pt_norm_expected[] = {
1.88079360815125041, 2.05374891063182208, 2.32634787404084076
};
-TEST_BEGIN(test_pt_norm)
-{
+TEST_BEGIN(test_pt_norm) {
unsigned i;
for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
@@ -279,8 +286,7 @@ static const double pt_chi2_expected[] = {
1046.4872561869577, 1063.5717461999654, 1107.0741966053859
};
-TEST_BEGIN(test_pt_chi2)
-{
+TEST_BEGIN(test_pt_chi2) {
unsigned i, j;
unsigned e = 0;
@@ -341,8 +347,7 @@ static const double pt_gamma_expected[] = {
4.7230515633946677, 5.6417477865306020, 8.4059469148854635
};
-TEST_BEGIN(test_pt_gamma_shape)
-{
+TEST_BEGIN(test_pt_gamma_shape) {
unsigned i, j;
unsigned e = 0;
@@ -361,8 +366,7 @@ TEST_BEGIN(test_pt_gamma_shape)
}
TEST_END
-TEST_BEGIN(test_pt_gamma_scale)
-{
+TEST_BEGIN(test_pt_gamma_scale) {
double shape = 1.0;
double ln_gamma_shape = ln_gamma(shape);
@@ -375,14 +379,12 @@ TEST_BEGIN(test_pt_gamma_scale)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_ln_gamma_factorial,
test_ln_gamma_misc,
test_pt_norm,
test_pt_chi2,
test_pt_gamma_shape,
- test_pt_gamma_scale));
+ test_pt_gamma_scale);
}
diff --git a/deps/jemalloc/test/unit/mq.c b/deps/jemalloc/test/unit/mq.c
index f57e96af1..57a4d54e4 100644
--- a/deps/jemalloc/test/unit/mq.c
+++ b/deps/jemalloc/test/unit/mq.c
@@ -1,7 +1,7 @@
#include "test/jemalloc_test.h"
-#define NSENDERS 3
-#define NMSGS 100000
+#define NSENDERS 3
+#define NMSGS 100000
typedef struct mq_msg_s mq_msg_t;
struct mq_msg_s {
@@ -9,8 +9,7 @@ struct mq_msg_s {
};
mq_gen(static, mq_, mq_t, mq_msg_t, link)
-TEST_BEGIN(test_mq_basic)
-{
+TEST_BEGIN(test_mq_basic) {
mq_t mq;
mq_msg_t msg;
@@ -31,8 +30,7 @@ TEST_BEGIN(test_mq_basic)
TEST_END
static void *
-thd_receiver_start(void *arg)
-{
+thd_receiver_start(void *arg) {
mq_t *mq = (mq_t *)arg;
unsigned i;
@@ -41,12 +39,11 @@ thd_receiver_start(void *arg)
assert_ptr_not_null(msg, "mq_get() should never return NULL");
dallocx(msg, 0);
}
- return (NULL);
+ return NULL;
}
static void *
-thd_sender_start(void *arg)
-{
+thd_sender_start(void *arg) {
mq_t *mq = (mq_t *)arg;
unsigned i;
@@ -54,15 +51,14 @@ thd_sender_start(void *arg)
mq_msg_t *msg;
void *p;
p = mallocx(sizeof(mq_msg_t), 0);
- assert_ptr_not_null(p, "Unexpected allocm() failure");
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
msg = (mq_msg_t *)p;
mq_put(mq, msg);
}
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_mq_threaded)
-{
+TEST_BEGIN(test_mq_threaded) {
mq_t mq;
thd_t receiver;
thd_t senders[NSENDERS];
@@ -71,22 +67,23 @@ TEST_BEGIN(test_mq_threaded)
assert_false(mq_init(&mq), "Unexpected mq_init() failure");
thd_create(&receiver, thd_receiver_start, (void *)&mq);
- for (i = 0; i < NSENDERS; i++)
+ for (i = 0; i < NSENDERS; i++) {
thd_create(&senders[i], thd_sender_start, (void *)&mq);
+ }
thd_join(receiver, NULL);
- for (i = 0; i < NSENDERS; i++)
+ for (i = 0; i < NSENDERS; i++) {
thd_join(senders[i], NULL);
+ }
mq_fini(&mq);
}
TEST_END
int
-main(void)
-{
- return (test(
+main(void) {
+ return test(
test_mq_basic,
- test_mq_threaded));
+ test_mq_threaded);
}
diff --git a/deps/jemalloc/test/unit/mtx.c b/deps/jemalloc/test/unit/mtx.c
index 96ff69486..424587b03 100644
--- a/deps/jemalloc/test/unit/mtx.c
+++ b/deps/jemalloc/test/unit/mtx.c
@@ -1,10 +1,9 @@
#include "test/jemalloc_test.h"
-#define NTHREADS 2
-#define NINCRS 2000000
+#define NTHREADS 2
+#define NINCRS 2000000
-TEST_BEGIN(test_mtx_basic)
-{
+TEST_BEGIN(test_mtx_basic) {
mtx_t mtx;
assert_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
@@ -20,8 +19,7 @@ typedef struct {
} thd_start_arg_t;
static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
thd_start_arg_t *arg = (thd_start_arg_t *)varg;
unsigned i;
@@ -30,31 +28,30 @@ thd_start(void *varg)
arg->x++;
mtx_unlock(&arg->mtx);
}
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_mtx_race)
-{
+TEST_BEGIN(test_mtx_race) {
thd_start_arg_t arg;
thd_t thds[NTHREADS];
unsigned i;
assert_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
arg.x = 0;
- for (i = 0; i < NTHREADS; i++)
+ for (i = 0; i < NTHREADS; i++) {
thd_create(&thds[i], thd_start, (void *)&arg);
- for (i = 0; i < NTHREADS; i++)
+ }
+ for (i = 0; i < NTHREADS; i++) {
thd_join(thds[i], NULL);
+ }
assert_u_eq(arg.x, NTHREADS * NINCRS,
"Race-related counter corruption");
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_mtx_basic,
- test_mtx_race));
+ test_mtx_race);
}
diff --git a/deps/jemalloc/test/unit/nstime.c b/deps/jemalloc/test/unit/nstime.c
new file mode 100644
index 000000000..f31378058
--- /dev/null
+++ b/deps/jemalloc/test/unit/nstime.c
@@ -0,0 +1,249 @@
+#include "test/jemalloc_test.h"
+
+#define BILLION UINT64_C(1000000000)
+
+TEST_BEGIN(test_nstime_init) {
+ nstime_t nst;
+
+ nstime_init(&nst, 42000000043);
+ assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
+ assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+ assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_init2) {
+ nstime_t nst;
+
+ nstime_init2(&nst, 42, 43);
+ assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+ assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_copy) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_init(&nstb, 0);
+ nstime_copy(&nstb, &nsta);
+ assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
+ assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_compare) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
+ assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
+
+ nstime_init2(&nstb, 42, 42);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+ "nsta should be greater than nstb");
+ assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+ "nstb should be less than nsta");
+
+ nstime_init2(&nstb, 42, 44);
+ assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+ "nsta should be less than nstb");
+ assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+ "nstb should be greater than nsta");
+
+ nstime_init2(&nstb, 41, BILLION - 1);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+ "nsta should be greater than nstb");
+ assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+ "nstb should be less than nsta");
+
+ nstime_init2(&nstb, 43, 0);
+ assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+ "nsta should be less than nstb");
+ assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+ "nstb should be greater than nsta");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_add) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_add(&nsta, &nstb);
+ nstime_init2(&nstb, 84, 86);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect addition result");
+
+ nstime_init2(&nsta, 42, BILLION - 1);
+ nstime_copy(&nstb, &nsta);
+ nstime_add(&nsta, &nstb);
+ nstime_init2(&nstb, 85, BILLION - 2);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect addition result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_iadd) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, BILLION - 1);
+ nstime_iadd(&nsta, 1);
+ nstime_init2(&nstb, 43, 0);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect addition result");
+
+ nstime_init2(&nsta, 42, 1);
+ nstime_iadd(&nsta, BILLION + 1);
+ nstime_init2(&nstb, 43, 2);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect addition result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_subtract) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_subtract(&nsta, &nstb);
+ nstime_init(&nstb, 0);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect subtraction result");
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_init2(&nstb, 41, 44);
+ nstime_subtract(&nsta, &nstb);
+ nstime_init2(&nstb, 0, BILLION - 1);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect subtraction result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_isubtract) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_isubtract(&nsta, 42*BILLION + 43);
+ nstime_init(&nstb, 0);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect subtraction result");
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_isubtract(&nsta, 41*BILLION + 44);
+ nstime_init2(&nstb, 0, BILLION - 1);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect subtraction result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_imultiply) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_imultiply(&nsta, 10);
+ nstime_init2(&nstb, 420, 430);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect multiplication result");
+
+ nstime_init2(&nsta, 42, 666666666);
+ nstime_imultiply(&nsta, 3);
+ nstime_init2(&nstb, 127, 999999998);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect multiplication result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_idivide) {
+ nstime_t nsta, nstb;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_imultiply(&nsta, 10);
+ nstime_idivide(&nsta, 10);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect division result");
+
+ nstime_init2(&nsta, 42, 666666666);
+ nstime_copy(&nstb, &nsta);
+ nstime_imultiply(&nsta, 3);
+ nstime_idivide(&nsta, 3);
+ assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+ "Incorrect division result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_divide) {
+ nstime_t nsta, nstb, nstc;
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_imultiply(&nsta, 10);
+ assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+ "Incorrect division result");
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_imultiply(&nsta, 10);
+ nstime_init(&nstc, 1);
+ nstime_add(&nsta, &nstc);
+ assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+ "Incorrect division result");
+
+ nstime_init2(&nsta, 42, 43);
+ nstime_copy(&nstb, &nsta);
+ nstime_imultiply(&nsta, 10);
+ nstime_init(&nstc, 1);
+ nstime_subtract(&nsta, &nstc);
+ assert_u64_eq(nstime_divide(&nsta, &nstb), 9,
+ "Incorrect division result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_monotonic) {
+ nstime_monotonic();
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_update) {
+ nstime_t nst;
+
+ nstime_init(&nst, 0);
+
+ assert_false(nstime_update(&nst), "Basic time update failed.");
+
+ /* Only Rip Van Winkle sleeps this long. */
+ {
+ nstime_t addend;
+ nstime_init2(&addend, 631152000, 0);
+ nstime_add(&nst, &addend);
+ }
+ {
+ nstime_t nst0;
+ nstime_copy(&nst0, &nst);
+ assert_true(nstime_update(&nst),
+ "Update should detect time roll-back.");
+ assert_d_eq(nstime_compare(&nst, &nst0), 0,
+ "Time should not have been modified");
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_nstime_init,
+ test_nstime_init2,
+ test_nstime_copy,
+ test_nstime_compare,
+ test_nstime_add,
+ test_nstime_iadd,
+ test_nstime_subtract,
+ test_nstime_isubtract,
+ test_nstime_imultiply,
+ test_nstime_idivide,
+ test_nstime_divide,
+ test_nstime_monotonic,
+ test_nstime_update);
+}
diff --git a/deps/jemalloc/test/unit/pack.c b/deps/jemalloc/test/unit/pack.c
new file mode 100644
index 000000000..fc188b003
--- /dev/null
+++ b/deps/jemalloc/test/unit/pack.c
@@ -0,0 +1,166 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Size class that is a divisor of the page size, ideally 4+ regions per run.
+ */
+#if LG_PAGE <= 14
+#define SZ (ZU(1) << (LG_PAGE - 2))
+#else
+#define SZ ZU(4096)
+#endif
+
+/*
+ * Number of slabs to consume at high water mark. Should be at least 2 so that
+ * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
+ * tested.
+ */
+#define NSLABS 8
+
+static unsigned
+binind_compute(void) {
+ size_t sz;
+ unsigned nbins, i;
+
+ sz = sizeof(nbins);
+ assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+ "Unexpected mallctl failure");
+
+ for (i = 0; i < nbins; i++) {
+ size_t mib[4];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ size_t size;
+
+ assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
+ &miblen), 0, "Unexpected mallctlnametomb failure");
+ mib[2] = (size_t)i;
+
+ sz = sizeof(size);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
+ 0), 0, "Unexpected mallctlbymib failure");
+ if (size == SZ) {
+ return i;
+ }
+ }
+
+ test_fail("Unable to compute nregs_per_run");
+ return 0;
+}
+
+static size_t
+nregs_per_run_compute(void) {
+ uint32_t nregs;
+ size_t sz;
+ unsigned binind = binind_compute();
+ size_t mib[4];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+
+ assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+ "Unexpected mallctlnametomb failure");
+ mib[2] = (size_t)binind;
+ sz = sizeof(nregs);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
+ 0), 0, "Unexpected mallctlbymib failure");
+ return nregs;
+}
+
+static unsigned
+arenas_create_mallctl(void) {
+ unsigned arena_ind;
+ size_t sz;
+
+ sz = sizeof(arena_ind);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Error in arenas.create");
+
+ return arena_ind;
+}
+
+static void
+arena_reset_mallctl(unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+
+ assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+TEST_BEGIN(test_pack) {
+ bool prof_enabled;
+ size_t sz = sizeof(prof_enabled);
+ if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
+ test_skip_if(prof_enabled);
+ }
+
+ unsigned arena_ind = arenas_create_mallctl();
+ size_t nregs_per_run = nregs_per_run_compute();
+ size_t nregs = nregs_per_run * NSLABS;
+ VARIABLE_ARRAY(void *, ptrs, nregs);
+ size_t i, j, offset;
+
+ /* Fill matrix. */
+ for (i = offset = 0; i < NSLABS; i++) {
+ for (j = 0; j < nregs_per_run; j++) {
+ void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+ MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(p,
+ "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
+ " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
+ SZ, arena_ind, i, j);
+ ptrs[(i * nregs_per_run) + j] = p;
+ }
+ }
+
+ /*
+ * Free all but one region of each run, but rotate which region is
+ * preserved, so that subsequent allocations exercise the within-run
+ * layout policy.
+ */
+ offset = 0;
+ for (i = offset = 0;
+ i < NSLABS;
+ i++, offset = (offset + 1) % nregs_per_run) {
+ for (j = 0; j < nregs_per_run; j++) {
+ void *p = ptrs[(i * nregs_per_run) + j];
+ if (offset == j) {
+ continue;
+ }
+ dallocx(p, MALLOCX_ARENA(arena_ind) |
+ MALLOCX_TCACHE_NONE);
+ }
+ }
+
+ /*
+ * Logically refill matrix, skipping preserved regions and verifying
+ * that the matrix is unmodified.
+ */
+ offset = 0;
+ for (i = offset = 0;
+ i < NSLABS;
+ i++, offset = (offset + 1) % nregs_per_run) {
+ for (j = 0; j < nregs_per_run; j++) {
+ void *p;
+
+ if (offset == j) {
+ continue;
+ }
+ p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+ MALLOCX_TCACHE_NONE);
+ assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
+ "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
+ i, j);
+ }
+ }
+
+ /* Clean up. */
+ arena_reset_mallctl(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_pack);
+}
diff --git a/deps/jemalloc/test/unit/pack.sh b/deps/jemalloc/test/unit/pack.sh
new file mode 100644
index 000000000..6f451480b
--- /dev/null
+++ b/deps/jemalloc/test/unit/pack.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+# Immediately purge to minimize fragmentation.
+export MALLOC_CONF="dirty_decay_ms:0,muzzy_decay_ms:0"
diff --git a/deps/jemalloc/test/unit/pages.c b/deps/jemalloc/test/unit/pages.c
new file mode 100644
index 000000000..ee729eece
--- /dev/null
+++ b/deps/jemalloc/test/unit/pages.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_pages_huge) {
+ size_t alloc_size;
+ bool commit;
+ void *pages, *hugepage;
+
+ alloc_size = HUGEPAGE * 2 - PAGE;
+ commit = true;
+ pages = pages_map(NULL, alloc_size, PAGE, &commit);
+ assert_ptr_not_null(pages, "Unexpected pages_map() error");
+
+ if (init_system_thp_mode == thp_mode_default) {
+ hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
+ assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+ "Unexpected pages_huge() result");
+ assert_false(pages_nohuge(hugepage, HUGEPAGE),
+ "Unexpected pages_nohuge() result");
+ }
+
+ pages_unmap(pages, alloc_size);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_pages_huge);
+}
diff --git a/deps/jemalloc/test/unit/ph.c b/deps/jemalloc/test/unit/ph.c
new file mode 100644
index 000000000..88bf56f88
--- /dev/null
+++ b/deps/jemalloc/test/unit/ph.c
@@ -0,0 +1,318 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ph.h"
+
+typedef struct node_s node_t;
+
+struct node_s {
+#define NODE_MAGIC 0x9823af7e
+ uint32_t magic;
+ phn(node_t) link;
+ uint64_t key;
+};
+
+static int
+node_cmp(const node_t *a, const node_t *b) {
+ int ret;
+
+ ret = (a->key > b->key) - (a->key < b->key);
+ if (ret == 0) {
+ /*
+ * Duplicates are not allowed in the heap, so force an
+ * arbitrary ordering for non-identical items with equal keys.
+ */
+ ret = (((uintptr_t)a) > ((uintptr_t)b))
+ - (((uintptr_t)a) < ((uintptr_t)b));
+ }
+ return ret;
+}
+
+static int
+node_cmp_magic(const node_t *a, const node_t *b) {
+
+ assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+ assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+
+ return node_cmp(a, b);
+}
+
+typedef ph(node_t) heap_t;
+ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+
+static void
+node_print(const node_t *node, unsigned depth) {
+ unsigned i;
+ node_t *leftmost_child, *sibling;
+
+ for (i = 0; i < depth; i++) {
+ malloc_printf("\t");
+ }
+ malloc_printf("%2"FMTu64"\n", node->key);
+
+ leftmost_child = phn_lchild_get(node_t, link, node);
+ if (leftmost_child == NULL) {
+ return;
+ }
+ node_print(leftmost_child, depth + 1);
+
+ for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+ NULL; sibling = phn_next_get(node_t, link, sibling)) {
+ node_print(sibling, depth + 1);
+ }
+}
+
+static void
+heap_print(const heap_t *heap) {
+ node_t *auxelm;
+
+ malloc_printf("vvv heap %p vvv\n", heap);
+ if (heap->ph_root == NULL) {
+ goto label_return;
+ }
+
+ node_print(heap->ph_root, 0);
+
+ for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+ auxelm = phn_next_get(node_t, link, auxelm)) {
+ assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+ link, auxelm)), auxelm,
+ "auxelm's prev doesn't link to auxelm");
+ node_print(auxelm, 0);
+ }
+
+label_return:
+ malloc_printf("^^^ heap %p ^^^\n", heap);
+}
+
+static unsigned
+node_validate(const node_t *node, const node_t *parent) {
+ unsigned nnodes = 1;
+ node_t *leftmost_child, *sibling;
+
+ if (parent != NULL) {
+ assert_d_ge(node_cmp_magic(node, parent), 0,
+ "Child is less than parent");
+ }
+
+ leftmost_child = phn_lchild_get(node_t, link, node);
+ if (leftmost_child == NULL) {
+ return nnodes;
+ }
+ assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+ (void *)node, "Leftmost child does not link to node");
+ nnodes += node_validate(leftmost_child, node);
+
+ for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+ NULL; sibling = phn_next_get(node_t, link, sibling)) {
+ assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+ link, sibling)), sibling,
+ "sibling's prev doesn't link to sibling");
+ nnodes += node_validate(sibling, node);
+ }
+ return nnodes;
+}
+
+static unsigned
+heap_validate(const heap_t *heap) {
+ unsigned nnodes = 0;
+ node_t *auxelm;
+
+ if (heap->ph_root == NULL) {
+ goto label_return;
+ }
+
+ nnodes += node_validate(heap->ph_root, NULL);
+
+ for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+ auxelm = phn_next_get(node_t, link, auxelm)) {
+ assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+ link, auxelm)), auxelm,
+ "auxelm's prev doesn't link to auxelm");
+ nnodes += node_validate(auxelm, NULL);
+ }
+
+label_return:
+ if (false) {
+ heap_print(heap);
+ }
+ return nnodes;
+}
+
+TEST_BEGIN(test_ph_empty) {
+ heap_t heap;
+
+ heap_new(&heap);
+ assert_true(heap_empty(&heap), "Heap should be empty");
+ assert_ptr_null(heap_first(&heap), "Unexpected node");
+ assert_ptr_null(heap_any(&heap), "Unexpected node");
+}
+TEST_END
+
+static void
+node_remove(heap_t *heap, node_t *node) {
+ heap_remove(heap, node);
+
+ node->magic = 0;
+}
+
+static node_t *
+node_remove_first(heap_t *heap) {
+ node_t *node = heap_remove_first(heap);
+ node->magic = 0;
+ return node;
+}
+
+static node_t *
+node_remove_any(heap_t *heap) {
+ node_t *node = heap_remove_any(heap);
+ node->magic = 0;
+ return node;
+}
+
+TEST_BEGIN(test_ph_random) {
+#define NNODES 25
+#define NBAGS 250
+#define SEED 42
+ sfmt_t *sfmt;
+ uint64_t bag[NNODES];
+ heap_t heap;
+ node_t nodes[NNODES];
+ unsigned i, j, k;
+
+ sfmt = init_gen_rand(SEED);
+ for (i = 0; i < NBAGS; i++) {
+ switch (i) {
+ case 0:
+ /* Insert in order. */
+ for (j = 0; j < NNODES; j++) {
+ bag[j] = j;
+ }
+ break;
+ case 1:
+ /* Insert in reverse order. */
+ for (j = 0; j < NNODES; j++) {
+ bag[j] = NNODES - j - 1;
+ }
+ break;
+ default:
+ for (j = 0; j < NNODES; j++) {
+ bag[j] = gen_rand64_range(sfmt, NNODES);
+ }
+ }
+
+ for (j = 1; j <= NNODES; j++) {
+ /* Initialize heap and nodes. */
+ heap_new(&heap);
+ assert_u_eq(heap_validate(&heap), 0,
+ "Incorrect node count");
+ for (k = 0; k < j; k++) {
+ nodes[k].magic = NODE_MAGIC;
+ nodes[k].key = bag[k];
+ }
+
+ /* Insert nodes. */
+ for (k = 0; k < j; k++) {
+ heap_insert(&heap, &nodes[k]);
+ if (i % 13 == 12) {
+ assert_ptr_not_null(heap_any(&heap),
+ "Heap should not be empty");
+ /* Trigger merging. */
+ assert_ptr_not_null(heap_first(&heap),
+ "Heap should not be empty");
+ }
+ assert_u_eq(heap_validate(&heap), k + 1,
+ "Incorrect node count");
+ }
+
+ assert_false(heap_empty(&heap),
+ "Heap should not be empty");
+
+ /* Remove nodes. */
+ switch (i % 6) {
+ case 0:
+ for (k = 0; k < j; k++) {
+ assert_u_eq(heap_validate(&heap), j - k,
+ "Incorrect node count");
+ node_remove(&heap, &nodes[k]);
+ assert_u_eq(heap_validate(&heap), j - k
+ - 1, "Incorrect node count");
+ }
+ break;
+ case 1:
+ for (k = j; k > 0; k--) {
+ node_remove(&heap, &nodes[k-1]);
+ assert_u_eq(heap_validate(&heap), k - 1,
+ "Incorrect node count");
+ }
+ break;
+ case 2: {
+ node_t *prev = NULL;
+ for (k = 0; k < j; k++) {
+ node_t *node = node_remove_first(&heap);
+ assert_u_eq(heap_validate(&heap), j - k
+ - 1, "Incorrect node count");
+ if (prev != NULL) {
+ assert_d_ge(node_cmp(node,
+ prev), 0,
+ "Bad removal order");
+ }
+ prev = node;
+ }
+ break;
+ } case 3: {
+ node_t *prev = NULL;
+ for (k = 0; k < j; k++) {
+ node_t *node = heap_first(&heap);
+ assert_u_eq(heap_validate(&heap), j - k,
+ "Incorrect node count");
+ if (prev != NULL) {
+ assert_d_ge(node_cmp(node,
+ prev), 0,
+ "Bad removal order");
+ }
+ node_remove(&heap, node);
+ assert_u_eq(heap_validate(&heap), j - k
+ - 1, "Incorrect node count");
+ prev = node;
+ }
+ break;
+ } case 4: {
+ for (k = 0; k < j; k++) {
+ node_remove_any(&heap);
+ assert_u_eq(heap_validate(&heap), j - k
+ - 1, "Incorrect node count");
+ }
+ break;
+ } case 5: {
+ for (k = 0; k < j; k++) {
+ node_t *node = heap_any(&heap);
+ assert_u_eq(heap_validate(&heap), j - k,
+ "Incorrect node count");
+ node_remove(&heap, node);
+ assert_u_eq(heap_validate(&heap), j - k
+ - 1, "Incorrect node count");
+ }
+ break;
+ } default:
+ not_reached();
+ }
+
+ assert_ptr_null(heap_first(&heap),
+ "Heap should be empty");
+ assert_ptr_null(heap_any(&heap),
+ "Heap should be empty");
+ assert_true(heap_empty(&heap), "Heap should be empty");
+ }
+ }
+ fini_gen_rand(sfmt);
+#undef NNODES
+#undef SEED
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_ph_empty,
+ test_ph_random);
+}
diff --git a/deps/jemalloc/test/unit/prng.c b/deps/jemalloc/test/unit/prng.c
new file mode 100644
index 000000000..b5795c2f4
--- /dev/null
+++ b/deps/jemalloc/test/unit/prng.c
@@ -0,0 +1,237 @@
+#include "test/jemalloc_test.h"
+
+static void
+test_prng_lg_range_u32(bool atomic) {
+ atomic_u32_t sa, sb;
+ uint32_t ra, rb;
+ unsigned lg_range;
+
+ atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_u32(&sa, 32, atomic);
+ assert_u32_eq(ra, rb,
+ "Repeated generation should produce repeated results");
+
+ atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_u32(&sb, 32, atomic);
+ assert_u32_eq(ra, rb,
+ "Equivalent generation should produce equivalent results");
+
+ atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ rb = prng_lg_range_u32(&sa, 32, atomic);
+ assert_u32_ne(ra, rb,
+ "Full-width results must not immediately repeat");
+
+ atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ for (lg_range = 31; lg_range > 0; lg_range--) {
+ atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_u32(&sb, lg_range, atomic);
+ assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+ 0, "High order bits should be 0, lg_range=%u", lg_range);
+ assert_u32_eq(rb, (ra >> (32 - lg_range)),
+ "Expected high order bits of full-width result, "
+ "lg_range=%u", lg_range);
+ }
+}
+
+static void
+test_prng_lg_range_u64(void) {
+ uint64_t sa, sb, ra, rb;
+ unsigned lg_range;
+
+ sa = 42;
+ ra = prng_lg_range_u64(&sa, 64);
+ sa = 42;
+ rb = prng_lg_range_u64(&sa, 64);
+ assert_u64_eq(ra, rb,
+ "Repeated generation should produce repeated results");
+
+ sb = 42;
+ rb = prng_lg_range_u64(&sb, 64);
+ assert_u64_eq(ra, rb,
+ "Equivalent generation should produce equivalent results");
+
+ sa = 42;
+ ra = prng_lg_range_u64(&sa, 64);
+ rb = prng_lg_range_u64(&sa, 64);
+ assert_u64_ne(ra, rb,
+ "Full-width results must not immediately repeat");
+
+ sa = 42;
+ ra = prng_lg_range_u64(&sa, 64);
+ for (lg_range = 63; lg_range > 0; lg_range--) {
+ sb = 42;
+ rb = prng_lg_range_u64(&sb, lg_range);
+ assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
+ 0, "High order bits should be 0, lg_range=%u", lg_range);
+ assert_u64_eq(rb, (ra >> (64 - lg_range)),
+ "Expected high order bits of full-width result, "
+ "lg_range=%u", lg_range);
+ }
+}
+
+static void
+test_prng_lg_range_zu(bool atomic) {
+ atomic_zu_t sa, sb;
+ size_t ra, rb;
+ unsigned lg_range;
+
+ atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ assert_zu_eq(ra, rb,
+ "Repeated generation should produce repeated results");
+
+ atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ assert_zu_eq(ra, rb,
+ "Equivalent generation should produce equivalent results");
+
+ atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ assert_zu_ne(ra, rb,
+ "Full-width results must not immediately repeat");
+
+ atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
+ ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+ for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
+ lg_range--) {
+ atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
+ rb = prng_lg_range_zu(&sb, lg_range, atomic);
+ assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+ 0, "High order bits should be 0, lg_range=%u", lg_range);
+ assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+ lg_range)), "Expected high order bits of full-width "
+ "result, lg_range=%u", lg_range);
+ }
+}
+
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
+ test_prng_lg_range_u32(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u32_atomic) {
+ test_prng_lg_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
+ test_prng_lg_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
+ test_prng_lg_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_atomic) {
+ test_prng_lg_range_zu(true);
+}
+TEST_END
+
+static void
+test_prng_range_u32(bool atomic) {
+ uint32_t range;
+#define MAX_RANGE 10000000
+#define RANGE_STEP 97
+#define NREPS 10
+
+ for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+ atomic_u32_t s;
+ unsigned rep;
+
+ atomic_store_u32(&s, range, ATOMIC_RELAXED);
+ for (rep = 0; rep < NREPS; rep++) {
+ uint32_t r = prng_range_u32(&s, range, atomic);
+
+ assert_u32_lt(r, range, "Out of range");
+ }
+ }
+}
+
+static void
+test_prng_range_u64(void) {
+ uint64_t range;
+#define MAX_RANGE 10000000
+#define RANGE_STEP 97
+#define NREPS 10
+
+ for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+ uint64_t s;
+ unsigned rep;
+
+ s = range;
+ for (rep = 0; rep < NREPS; rep++) {
+ uint64_t r = prng_range_u64(&s, range);
+
+ assert_u64_lt(r, range, "Out of range");
+ }
+ }
+}
+
+static void
+test_prng_range_zu(bool atomic) {
+ size_t range;
+#define MAX_RANGE 10000000
+#define RANGE_STEP 97
+#define NREPS 10
+
+ for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+ atomic_zu_t s;
+ unsigned rep;
+
+ atomic_store_zu(&s, range, ATOMIC_RELAXED);
+ for (rep = 0; rep < NREPS; rep++) {
+ size_t r = prng_range_zu(&s, range, atomic);
+
+ assert_zu_lt(r, range, "Out of range");
+ }
+ }
+}
+
+TEST_BEGIN(test_prng_range_u32_nonatomic) {
+ test_prng_range_u32(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u32_atomic) {
+ test_prng_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u64_nonatomic) {
+ test_prng_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_nonatomic) {
+ test_prng_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_atomic) {
+ test_prng_range_zu(true);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_prng_lg_range_u32_nonatomic,
+ test_prng_lg_range_u32_atomic,
+ test_prng_lg_range_u64_nonatomic,
+ test_prng_lg_range_zu_nonatomic,
+ test_prng_lg_range_zu_atomic,
+ test_prng_range_u32_nonatomic,
+ test_prng_range_u32_atomic,
+ test_prng_range_u64_nonatomic,
+ test_prng_range_zu_nonatomic,
+ test_prng_range_zu_atomic);
+}
diff --git a/deps/jemalloc/test/unit/prof_accum.c b/deps/jemalloc/test/unit/prof_accum.c
index 050a8a7ee..252200635 100644
--- a/deps/jemalloc/test/unit/prof_accum.c
+++ b/deps/jemalloc/test/unit/prof_accum.c
@@ -1,31 +1,27 @@
-#include "prof_accum.h"
+#include "test/jemalloc_test.h"
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
- "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0";
-#endif
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD 50
+#define DUMP_INTERVAL 1
+#define BT_COUNT_CHECK_INTERVAL 5
static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
int fd;
fd = open("/dev/null", O_WRONLY);
assert_d_ne(fd, -1, "Unexpected open() failure");
- return (fd);
+ return fd;
}
static void *
-alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration)
-{
-
- return (alloc_0(thd_ind*NALLOCS_PER_THREAD + iteration));
+alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
+ return btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration);
}
static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
unsigned thd_ind = *(unsigned *)varg;
size_t bt_count_prev, bt_count;
unsigned i_prev, i;
@@ -50,11 +46,10 @@ thd_start(void *varg)
}
}
- return (NULL);
+ return NULL;
}
-TEST_BEGIN(test_idump)
-{
+TEST_BEGIN(test_idump) {
bool active;
thd_t thds[NTHREADS];
unsigned thd_args[NTHREADS];
@@ -63,8 +58,9 @@ TEST_BEGIN(test_idump)
test_skip_if(!config_prof);
active = true;
- assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
- 0, "Unexpected mallctl failure while activating profiling");
+ assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+ sizeof(active)), 0,
+ "Unexpected mallctl failure while activating profiling");
prof_dump_open = prof_dump_open_intercept;
@@ -72,15 +68,14 @@ TEST_BEGIN(test_idump)
thd_args[i] = i;
thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
}
- for (i = 0; i < NTHREADS; i++)
+ for (i = 0; i < NTHREADS; i++) {
thd_join(thds[i], NULL);
+ }
}
TEST_END
int
-main(void)
-{
-
- return (test(
- test_idump));
+main(void) {
+ return test_no_reentrancy(
+ test_idump);
}
diff --git a/deps/jemalloc/test/unit/prof_accum.h b/deps/jemalloc/test/unit/prof_accum.h
deleted file mode 100644
index 109d86b59..000000000
--- a/deps/jemalloc/test/unit/prof_accum.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define NTHREADS 4
-#define NALLOCS_PER_THREAD 50
-#define DUMP_INTERVAL 1
-#define BT_COUNT_CHECK_INTERVAL 5
-
-#define alloc_n_proto(n) \
-void *alloc_##n(unsigned bits);
-alloc_n_proto(0)
-alloc_n_proto(1)
-
-#define alloc_n_gen(n) \
-void * \
-alloc_##n(unsigned bits) \
-{ \
- void *p; \
- \
- if (bits == 0) \
- p = mallocx(1, 0); \
- else { \
- switch (bits & 0x1U) { \
- case 0: \
- p = (alloc_0(bits >> 1)); \
- break; \
- case 1: \
- p = (alloc_1(bits >> 1)); \
- break; \
- default: not_reached(); \
- } \
- } \
- /* Intentionally sabotage tail call optimization. */ \
- assert_ptr_not_null(p, "Unexpected mallocx() failure"); \
- return (p); \
-}
diff --git a/deps/jemalloc/test/unit/prof_accum.sh b/deps/jemalloc/test/unit/prof_accum.sh
new file mode 100644
index 000000000..b3e13fc54
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_accum.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/deps/jemalloc/test/unit/prof_accum_a.c b/deps/jemalloc/test/unit/prof_accum_a.c
deleted file mode 100644
index 42ad521d8..000000000
--- a/deps/jemalloc/test/unit/prof_accum_a.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "prof_accum.h"
-
-alloc_n_gen(0)
diff --git a/deps/jemalloc/test/unit/prof_accum_b.c b/deps/jemalloc/test/unit/prof_accum_b.c
deleted file mode 100644
index 60d9dab6a..000000000
--- a/deps/jemalloc/test/unit/prof_accum_b.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "prof_accum.h"
-
-alloc_n_gen(1)
diff --git a/deps/jemalloc/test/unit/prof_active.c b/deps/jemalloc/test/unit/prof_active.c
new file mode 100644
index 000000000..850a24a77
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_active.c
@@ -0,0 +1,117 @@
+#include "test/jemalloc_test.h"
+
+static void
+mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
+ bool old;
+ size_t sz;
+
+ sz = sizeof(old);
+ assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
+ "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
+ assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
+ name);
+}
+
+static void
+mallctl_bool_set(const char *name, bool old_expected, bool val_new,
+ const char *func, int line) {
+ bool old;
+ size_t sz;
+
+ sz = sizeof(old);
+ assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
+ sizeof(val_new)), 0,
+ "%s():%d: Unexpected mallctl failure reading/writing %s", func,
+ line, name);
+ assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
+ line, name);
+}
+
+static void
+mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
+ int line) {
+ mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
+}
+#define mallctl_prof_active_get(a) \
+ mallctl_prof_active_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_prof_active_set_impl(bool prof_active_old_expected,
+ bool prof_active_new, const char *func, int line) {
+ mallctl_bool_set("prof.active", prof_active_old_expected,
+ prof_active_new, func, line);
+}
+#define mallctl_prof_active_set(a, b) \
+ mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
+
+static void
+mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
+ const char *func, int line) {
+ mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
+ func, line);
+}
+#define mallctl_thread_prof_active_get(a) \
+ mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
+ bool thread_prof_active_new, const char *func, int line) {
+ mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
+ thread_prof_active_new, func, line);
+}
+#define mallctl_thread_prof_active_set(a, b) \
+ mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
+
+static void
+prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
+ void *p;
+ size_t expected_backtraces = expect_sample ? 1 : 0;
+
+ assert_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
+ line);
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ assert_zu_eq(prof_bt_count(), expected_backtraces,
+ "%s():%d: Unexpected backtrace count", func, line);
+ dallocx(p, 0);
+}
+#define prof_sampling_probe(a) \
+ prof_sampling_probe_impl(a, __func__, __LINE__)
+
+TEST_BEGIN(test_prof_active) {
+ test_skip_if(!config_prof);
+
+ mallctl_prof_active_get(true);
+ mallctl_thread_prof_active_get(false);
+
+ mallctl_prof_active_set(true, true);
+ mallctl_thread_prof_active_set(false, false);
+ /* prof.active, !thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(true, false);
+ mallctl_thread_prof_active_set(false, false);
+ /* !prof.active, !thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(false, false);
+ mallctl_thread_prof_active_set(false, true);
+ /* !prof.active, thread.prof.active. */
+ prof_sampling_probe(false);
+
+ mallctl_prof_active_set(false, true);
+ mallctl_thread_prof_active_set(true, true);
+ /* prof.active, thread.prof.active. */
+ prof_sampling_probe(true);
+
+ /* Restore settings. */
+ mallctl_prof_active_set(true, true);
+ mallctl_thread_prof_active_set(true, false);
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_prof_active);
+}
diff --git a/deps/jemalloc/test/unit/prof_active.sh b/deps/jemalloc/test/unit/prof_active.sh
new file mode 100644
index 000000000..0167cb10b
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_active.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,prof_thread_active_init:false,lg_prof_sample:0"
+fi
diff --git a/deps/jemalloc/test/unit/prof_gdump.c b/deps/jemalloc/test/unit/prof_gdump.c
index a00b1054f..fcb434cb9 100644
--- a/deps/jemalloc/test/unit/prof_gdump.c
+++ b/deps/jemalloc/test/unit/prof_gdump.c
@@ -1,14 +1,9 @@
#include "test/jemalloc_test.h"
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,prof_active:false,prof_gdump:true";
-#endif
-
static bool did_prof_dump_open;
static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
int fd;
did_prof_dump_open = true;
@@ -16,41 +11,64 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
fd = open("/dev/null", O_WRONLY);
assert_d_ne(fd, -1, "Unexpected open() failure");
- return (fd);
+ return fd;
}
-TEST_BEGIN(test_gdump)
-{
- bool active;
- void *p, *q;
+TEST_BEGIN(test_gdump) {
+ bool active, gdump, gdump_old;
+ void *p, *q, *r, *s;
+ size_t sz;
test_skip_if(!config_prof);
active = true;
- assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
- 0, "Unexpected mallctl failure while activating profiling");
+ assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+ sizeof(active)), 0,
+ "Unexpected mallctl failure while activating profiling");
prof_dump_open = prof_dump_open_intercept;
did_prof_dump_open = false;
- p = mallocx(chunksize, 0);
+ p = mallocx((1U << LG_LARGE_MINCLASS), 0);
assert_ptr_not_null(p, "Unexpected mallocx() failure");
assert_true(did_prof_dump_open, "Expected a profile dump");
did_prof_dump_open = false;
- q = mallocx(chunksize, 0);
+ q = mallocx((1U << LG_LARGE_MINCLASS), 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+ assert_true(did_prof_dump_open, "Expected a profile dump");
+
+ gdump = false;
+ sz = sizeof(gdump_old);
+ assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+ (void *)&gdump, sizeof(gdump)), 0,
+ "Unexpected mallctl failure while disabling prof.gdump");
+ assert(gdump_old);
+ did_prof_dump_open = false;
+ r = mallocx((1U << LG_LARGE_MINCLASS), 0);
+ assert_ptr_not_null(q, "Unexpected mallocx() failure");
+ assert_false(did_prof_dump_open, "Unexpected profile dump");
+
+ gdump = true;
+ sz = sizeof(gdump_old);
+ assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+ (void *)&gdump, sizeof(gdump)), 0,
+ "Unexpected mallctl failure while enabling prof.gdump");
+ assert(!gdump_old);
+ did_prof_dump_open = false;
+ s = mallocx((1U << LG_LARGE_MINCLASS), 0);
assert_ptr_not_null(q, "Unexpected mallocx() failure");
assert_true(did_prof_dump_open, "Expected a profile dump");
dallocx(p, 0);
dallocx(q, 0);
+ dallocx(r, 0);
+ dallocx(s, 0);
}
TEST_END
int
-main(void)
-{
-
- return (test(
- test_gdump));
+main(void) {
+ return test_no_reentrancy(
+ test_gdump);
}
diff --git a/deps/jemalloc/test/unit/prof_gdump.sh b/deps/jemalloc/test/unit/prof_gdump.sh
new file mode 100644
index 000000000..3f600d200
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_gdump.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+fi
+
diff --git a/deps/jemalloc/test/unit/prof_idump.c b/deps/jemalloc/test/unit/prof_idump.c
index bdea53ecd..1cc6c98cd 100644
--- a/deps/jemalloc/test/unit/prof_idump.c
+++ b/deps/jemalloc/test/unit/prof_idump.c
@@ -1,16 +1,9 @@
#include "test/jemalloc_test.h"
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
- "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,"
- "lg_prof_interval:0";
-#endif
-
static bool did_prof_dump_open;
static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
int fd;
did_prof_dump_open = true;
@@ -18,19 +11,19 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
fd = open("/dev/null", O_WRONLY);
assert_d_ne(fd, -1, "Unexpected open() failure");
- return (fd);
+ return fd;
}
-TEST_BEGIN(test_idump)
-{
+TEST_BEGIN(test_idump) {
bool active;
void *p;
test_skip_if(!config_prof);
active = true;
- assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
- 0, "Unexpected mallctl failure while activating profiling");
+ assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+ sizeof(active)), 0,
+ "Unexpected mallctl failure while activating profiling");
prof_dump_open = prof_dump_open_intercept;
@@ -43,9 +36,7 @@ TEST_BEGIN(test_idump)
TEST_END
int
-main(void)
-{
-
- return (test(
- test_idump));
+main(void) {
+ return test(
+ test_idump);
}
diff --git a/deps/jemalloc/test/unit/prof_idump.sh b/deps/jemalloc/test/unit/prof_idump.sh
new file mode 100644
index 000000000..4dc599a30
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_idump.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache:false"
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="${MALLOC_CONF},prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,lg_prof_interval:0"
+fi
+
+
diff --git a/deps/jemalloc/test/unit/prof_reset.c b/deps/jemalloc/test/unit/prof_reset.c
new file mode 100644
index 000000000..7cce42d27
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_reset.c
@@ -0,0 +1,286 @@
+#include "test/jemalloc_test.h"
+
+static int
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
+ int fd;
+
+ fd = open("/dev/null", O_WRONLY);
+ assert_d_ne(fd, -1, "Unexpected open() failure");
+
+ return fd;
+}
+
+static void
+set_prof_active(bool active) {
+ assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+ sizeof(active)), 0, "Unexpected mallctl failure");
+}
+
+static size_t
+get_lg_prof_sample(void) {
+ size_t lg_prof_sample;
+ size_t sz = sizeof(size_t);
+
+ assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
+ NULL, 0), 0,
+ "Unexpected mallctl failure while reading profiling sample rate");
+ return lg_prof_sample;
+}
+
+static void
+do_prof_reset(size_t lg_prof_sample) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL,
+ (void *)&lg_prof_sample, sizeof(size_t)), 0,
+ "Unexpected mallctl failure while resetting profile data");
+ assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+ "Expected profile sample rate change");
+}
+
+TEST_BEGIN(test_prof_reset_basic) {
+ size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
+ size_t sz;
+ unsigned i;
+
+ test_skip_if(!config_prof);
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
+ &sz, NULL, 0), 0,
+ "Unexpected mallctl failure while reading profiling sample rate");
+ assert_zu_eq(lg_prof_sample_orig, 0,
+ "Unexpected profiling sample rate");
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected disagreement between \"opt.lg_prof_sample\" and "
+ "\"prof.lg_sample\"");
+
+ /* Test simple resets. */
+ for (i = 0; i < 2; i++) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctl failure while resetting profile data");
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected profile sample rate change");
+ }
+
+ /* Test resets with prof.lg_sample changes. */
+ lg_prof_sample_next = 1;
+ for (i = 0; i < 2; i++) {
+ do_prof_reset(lg_prof_sample_next);
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
+ "Expected profile sample rate change");
+ lg_prof_sample_next = lg_prof_sample_orig;
+ }
+
+ /* Make sure the test code restored prof.lg_sample. */
+ lg_prof_sample = get_lg_prof_sample();
+ assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+ "Unexpected disagreement between \"opt.lg_prof_sample\" and "
+ "\"prof.lg_sample\"");
+}
+TEST_END
+
+bool prof_dump_header_intercepted = false;
+prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
+static bool
+prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
+ const prof_cnt_t *cnt_all) {
+ prof_dump_header_intercepted = true;
+ memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
+
+ return false;
+}
+
+TEST_BEGIN(test_prof_reset_cleanup) {
+ void *p;
+ prof_dump_header_t *prof_dump_header_orig;
+
+ test_skip_if(!config_prof);
+
+ set_prof_active(true);
+
+ assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+ p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+
+ prof_dump_header_orig = prof_dump_header;
+ prof_dump_header = prof_dump_header_intercept;
+ assert_false(prof_dump_header_intercepted, "Unexpected intercept");
+
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ assert_true(prof_dump_header_intercepted, "Expected intercept");
+ assert_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected error while resetting heap profile data");
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ assert_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+ assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+
+ prof_dump_header = prof_dump_header_orig;
+
+ dallocx(p, 0);
+ assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+
+ set_prof_active(false);
+}
+TEST_END
+
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD (1U << 13)
+#define OBJ_RING_BUF_COUNT 1531
+#define RESET_INTERVAL (1U << 10)
+#define DUMP_INTERVAL 3677
+static void *
+thd_start(void *varg) {
+ unsigned thd_ind = *(unsigned *)varg;
+ unsigned i;
+ void *objs[OBJ_RING_BUF_COUNT];
+
+ memset(objs, 0, sizeof(objs));
+
+ for (i = 0; i < NALLOCS_PER_THREAD; i++) {
+ if (i % RESET_INTERVAL == 0) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while resetting heap profile "
+ "data");
+ }
+
+ if (i % DUMP_INTERVAL == 0) {
+ assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+ 0, "Unexpected error while dumping heap profile");
+ }
+
+ {
+ void **pp = &objs[i % OBJ_RING_BUF_COUNT];
+ if (*pp != NULL) {
+ dallocx(*pp, 0);
+ *pp = NULL;
+ }
+ *pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
+ assert_ptr_not_null(*pp,
+ "Unexpected btalloc() failure");
+ }
+ }
+
+ /* Clean up any remaining objects. */
+ for (i = 0; i < OBJ_RING_BUF_COUNT; i++) {
+ void **pp = &objs[i % OBJ_RING_BUF_COUNT];
+ if (*pp != NULL) {
+ dallocx(*pp, 0);
+ *pp = NULL;
+ }
+ }
+
+ return NULL;
+}
+
+TEST_BEGIN(test_prof_reset) {
+ size_t lg_prof_sample_orig;
+ thd_t thds[NTHREADS];
+ unsigned thd_args[NTHREADS];
+ unsigned i;
+ size_t bt_count, tdata_count;
+
+ test_skip_if(!config_prof);
+
+ bt_count = prof_bt_count();
+ assert_zu_eq(bt_count, 0,
+ "Unexpected pre-existing tdata structures");
+ tdata_count = prof_tdata_count();
+
+ lg_prof_sample_orig = get_lg_prof_sample();
+ do_prof_reset(5);
+
+ set_prof_active(true);
+
+ for (i = 0; i < NTHREADS; i++) {
+ thd_args[i] = i;
+ thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+ }
+ for (i = 0; i < NTHREADS; i++) {
+ thd_join(thds[i], NULL);
+ }
+
+ assert_zu_eq(prof_bt_count(), bt_count,
+ "Unexpected bactrace count change");
+ assert_zu_eq(prof_tdata_count(), tdata_count,
+ "Unexpected remaining tdata structures");
+
+ set_prof_active(false);
+
+ do_prof_reset(lg_prof_sample_orig);
+}
+TEST_END
+#undef NTHREADS
+#undef NALLOCS_PER_THREAD
+#undef OBJ_RING_BUF_COUNT
+#undef RESET_INTERVAL
+#undef DUMP_INTERVAL
+
+/* Test sampling at the same allocation site across resets. */
+#define NITER 10
+TEST_BEGIN(test_xallocx) {
+ size_t lg_prof_sample_orig;
+ unsigned i;
+ void *ptrs[NITER];
+
+ test_skip_if(!config_prof);
+
+ lg_prof_sample_orig = get_lg_prof_sample();
+ set_prof_active(true);
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ for (i = 0; i < NITER; i++) {
+ void *p;
+ size_t sz, nsz;
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ /* Allocate small object (which will be promoted). */
+ p = ptrs[i] = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+ /* Reset profiling. */
+ do_prof_reset(0);
+
+ /* Perform successful xallocx(). */
+ sz = sallocx(p, 0);
+ assert_zu_eq(xallocx(p, sz, 0, 0), sz,
+ "Unexpected xallocx() failure");
+
+ /* Perform unsuccessful xallocx(). */
+ nsz = nallocx(sz+1, 0);
+ assert_zu_eq(xallocx(p, nsz, 0, 0), sz,
+ "Unexpected xallocx() success");
+ }
+
+ for (i = 0; i < NITER; i++) {
+ /* dallocx. */
+ dallocx(ptrs[i], 0);
+ }
+
+ set_prof_active(false);
+ do_prof_reset(lg_prof_sample_orig);
+}
+TEST_END
+#undef NITER
+
+int
+main(void) {
+ /* Intercept dumping prior to running any tests. */
+ prof_dump_open = prof_dump_open_intercept;
+
+ return test_no_reentrancy(
+ test_prof_reset_basic,
+ test_prof_reset_cleanup,
+ test_prof_reset,
+ test_xallocx);
+}
diff --git a/deps/jemalloc/test/unit/prof_reset.sh b/deps/jemalloc/test/unit/prof_reset.sh
new file mode 100644
index 000000000..43c516a08
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_reset.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/deps/jemalloc/test/unit/prof_tctx.c b/deps/jemalloc/test/unit/prof_tctx.c
new file mode 100644
index 000000000..ff3b2b0ca
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_tctx.c
@@ -0,0 +1,46 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_prof_realloc) {
+ tsdn_t *tsdn;
+ int flags;
+ void *p, *q;
+ prof_tctx_t *tctx_p, *tctx_q;
+ uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
+
+ test_skip_if(!config_prof);
+
+ tsdn = tsdn_fetch();
+ flags = MALLOCX_TCACHE_NONE;
+
+ prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
+ p = mallocx(1024, flags);
+ assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ tctx_p = prof_tctx_get(tsdn, p, NULL);
+ assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
+ "Expected valid tctx");
+ prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
+ assert_u64_eq(curobjs_0 + 1, curobjs_1,
+ "Allocation should have increased sample size");
+
+ q = rallocx(p, 2048, flags);
+ assert_ptr_ne(p, q, "Expected move");
+ assert_ptr_not_null(p, "Unexpected rmallocx() failure");
+ tctx_q = prof_tctx_get(tsdn, q, NULL);
+ assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
+ "Expected valid tctx");
+ prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
+ assert_u64_eq(curobjs_1, curobjs_2,
+ "Reallocation should not have changed sample size");
+
+ dallocx(q, flags);
+ prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
+ assert_u64_eq(curobjs_0, curobjs_3,
+ "Sample size should have returned to base level");
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_prof_realloc);
+}
diff --git a/deps/jemalloc/test/unit/prof_tctx.sh b/deps/jemalloc/test/unit/prof_tctx.sh
new file mode 100644
index 000000000..8fcc7d8a7
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_tctx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
diff --git a/deps/jemalloc/test/unit/prof_thread_name.c b/deps/jemalloc/test/unit/prof_thread_name.c
new file mode 100644
index 000000000..c9c2a2b76
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_thread_name.c
@@ -0,0 +1,120 @@
+#include "test/jemalloc_test.h"
+
+static void
+mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
+ int line) {
+ const char *thread_name_old;
+ size_t sz;
+
+ sz = sizeof(thread_name_old);
+ assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+ NULL, 0), 0,
+ "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+ func, line);
+ assert_str_eq(thread_name_old, thread_name_expected,
+ "%s():%d: Unexpected thread.prof.name value", func, line);
+}
+#define mallctl_thread_name_get(a) \
+ mallctl_thread_name_get_impl(a, __func__, __LINE__)
+
+static void
+mallctl_thread_name_set_impl(const char *thread_name, const char *func,
+ int line) {
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+ (void *)&thread_name, sizeof(thread_name)), 0,
+ "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+ func, line);
+ mallctl_thread_name_get_impl(thread_name, func, line);
+}
+#define mallctl_thread_name_set(a) \
+ mallctl_thread_name_set_impl(a, __func__, __LINE__)
+
+TEST_BEGIN(test_prof_thread_name_validation) {
+ const char *thread_name;
+
+ test_skip_if(!config_prof);
+
+ mallctl_thread_name_get("");
+ mallctl_thread_name_set("hi there");
+
+ /* NULL input shouldn't be allowed. */
+ thread_name = NULL;
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+ (void *)&thread_name, sizeof(thread_name)), EFAULT,
+ "Unexpected mallctl result writing \"%s\" to thread.prof.name",
+ thread_name);
+
+ /* '\n' shouldn't be allowed. */
+ thread_name = "hi\nthere";
+ assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+ (void *)&thread_name, sizeof(thread_name)), EFAULT,
+ "Unexpected mallctl result writing \"%s\" to thread.prof.name",
+ thread_name);
+
+ /* Simultaneous read/write shouldn't be allowed. */
+ {
+ const char *thread_name_old;
+ size_t sz;
+
+ sz = sizeof(thread_name_old);
+ assert_d_eq(mallctl("thread.prof.name",
+ (void *)&thread_name_old, &sz, (void *)&thread_name,
+ sizeof(thread_name)), EPERM,
+ "Unexpected mallctl result writing \"%s\" to "
+ "thread.prof.name", thread_name);
+ }
+
+ mallctl_thread_name_set("");
+}
+TEST_END
+
+#define NTHREADS 4
+#define NRESET 25
+static void *
+thd_start(void *varg) {
+ unsigned thd_ind = *(unsigned *)varg;
+ char thread_name[16] = "";
+ unsigned i;
+
+ malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind);
+
+ mallctl_thread_name_get("");
+ mallctl_thread_name_set(thread_name);
+
+ for (i = 0; i < NRESET; i++) {
+ assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+ "Unexpected error while resetting heap profile data");
+ mallctl_thread_name_get(thread_name);
+ }
+
+ mallctl_thread_name_set(thread_name);
+ mallctl_thread_name_set("");
+
+ return NULL;
+}
+
+TEST_BEGIN(test_prof_thread_name_threaded) {
+ thd_t thds[NTHREADS];
+ unsigned thd_args[NTHREADS];
+ unsigned i;
+
+ test_skip_if(!config_prof);
+
+ for (i = 0; i < NTHREADS; i++) {
+ thd_args[i] = i;
+ thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+ }
+ for (i = 0; i < NTHREADS; i++) {
+ thd_join(thds[i], NULL);
+ }
+}
+TEST_END
+#undef NTHREADS
+#undef NRESET
+
+int
+main(void) {
+ return test(
+ test_prof_thread_name_validation,
+ test_prof_thread_name_threaded);
+}
diff --git a/deps/jemalloc/test/unit/prof_thread_name.sh b/deps/jemalloc/test/unit/prof_thread_name.sh
new file mode 100644
index 000000000..298c1058e
--- /dev/null
+++ b/deps/jemalloc/test/unit/prof_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+ export MALLOC_CONF="prof:true,prof_active:false"
+fi
diff --git a/deps/jemalloc/test/unit/ql.c b/deps/jemalloc/test/unit/ql.c
index 05fad450f..b76c24c41 100644
--- a/deps/jemalloc/test/unit/ql.c
+++ b/deps/jemalloc/test/unit/ql.c
@@ -1,7 +1,9 @@
#include "test/jemalloc_test.h"
+#include "jemalloc/internal/ql.h"
+
/* Number of ring entries, in [2..26]. */
-#define NENTRIES 9
+#define NENTRIES 9
typedef struct list_s list_t;
typedef ql_head(list_t) list_head_t;
@@ -12,8 +14,7 @@ struct list_s {
};
static void
-test_empty_list(list_head_t *head)
-{
+test_empty_list(list_head_t *head) {
list_t *t;
unsigned i;
@@ -34,8 +35,7 @@ test_empty_list(list_head_t *head)
assert_u_eq(i, 0, "Unexpected element for empty list");
}
-TEST_BEGIN(test_ql_empty)
-{
+TEST_BEGIN(test_ql_empty) {
list_head_t head;
ql_new(&head);
@@ -44,8 +44,7 @@ TEST_BEGIN(test_ql_empty)
TEST_END
static void
-init_entries(list_t *entries, unsigned nentries)
-{
+init_entries(list_t *entries, unsigned nentries) {
unsigned i;
for (i = 0; i < nentries; i++) {
@@ -55,8 +54,7 @@ init_entries(list_t *entries, unsigned nentries)
}
static void
-test_entries_list(list_head_t *head, list_t *entries, unsigned nentries)
-{
+test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
list_t *t;
unsigned i;
@@ -91,31 +89,31 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries)
}
}
-TEST_BEGIN(test_ql_tail_insert)
-{
+TEST_BEGIN(test_ql_tail_insert) {
list_head_t head;
list_t entries[NENTRIES];
unsigned i;
ql_new(&head);
init_entries(entries, sizeof(entries)/sizeof(list_t));
- for (i = 0; i < NENTRIES; i++)
+ for (i = 0; i < NENTRIES; i++) {
ql_tail_insert(&head, &entries[i], link);
+ }
test_entries_list(&head, entries, NENTRIES);
}
TEST_END
-TEST_BEGIN(test_ql_tail_remove)
-{
+TEST_BEGIN(test_ql_tail_remove) {
list_head_t head;
list_t entries[NENTRIES];
unsigned i;
ql_new(&head);
init_entries(entries, sizeof(entries)/sizeof(list_t));
- for (i = 0; i < NENTRIES; i++)
+ for (i = 0; i < NENTRIES; i++) {
ql_tail_insert(&head, &entries[i], link);
+ }
for (i = 0; i < NENTRIES; i++) {
test_entries_list(&head, entries, NENTRIES-i);
@@ -125,31 +123,31 @@ TEST_BEGIN(test_ql_tail_remove)
}
TEST_END
-TEST_BEGIN(test_ql_head_insert)
-{
+TEST_BEGIN(test_ql_head_insert) {
list_head_t head;
list_t entries[NENTRIES];
unsigned i;
ql_new(&head);
init_entries(entries, sizeof(entries)/sizeof(list_t));
- for (i = 0; i < NENTRIES; i++)
+ for (i = 0; i < NENTRIES; i++) {
ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+ }
test_entries_list(&head, entries, NENTRIES);
}
TEST_END
-TEST_BEGIN(test_ql_head_remove)
-{
+TEST_BEGIN(test_ql_head_remove) {
list_head_t head;
list_t entries[NENTRIES];
unsigned i;
ql_new(&head);
init_entries(entries, sizeof(entries)/sizeof(list_t));
- for (i = 0; i < NENTRIES; i++)
+ for (i = 0; i < NENTRIES; i++) {
ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+ }
for (i = 0; i < NENTRIES; i++) {
test_entries_list(&head, &entries[i], NENTRIES-i);
@@ -159,8 +157,7 @@ TEST_BEGIN(test_ql_head_remove)
}
TEST_END
-TEST_BEGIN(test_ql_insert)
-{
+TEST_BEGIN(test_ql_insert) {
list_head_t head;
list_t entries[8];
list_t *a, *b, *c, *d, *e, *f, *g, *h;
@@ -196,14 +193,12 @@ TEST_BEGIN(test_ql_insert)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_ql_empty,
test_ql_tail_insert,
test_ql_tail_remove,
test_ql_head_insert,
test_ql_head_remove,
- test_ql_insert));
+ test_ql_insert);
}
diff --git a/deps/jemalloc/test/unit/qr.c b/deps/jemalloc/test/unit/qr.c
index a2a2d902b..271a10953 100644
--- a/deps/jemalloc/test/unit/qr.c
+++ b/deps/jemalloc/test/unit/qr.c
@@ -1,9 +1,11 @@
#include "test/jemalloc_test.h"
+#include "jemalloc/internal/qr.h"
+
/* Number of ring entries, in [2..26]. */
-#define NENTRIES 9
+#define NENTRIES 9
/* Split index, in [1..NENTRIES). */
-#define SPLIT_INDEX 5
+#define SPLIT_INDEX 5
typedef struct ring_s ring_t;
@@ -13,8 +15,7 @@ struct ring_s {
};
static void
-init_entries(ring_t *entries)
-{
+init_entries(ring_t *entries) {
unsigned i;
for (i = 0; i < NENTRIES; i++) {
@@ -24,8 +25,7 @@ init_entries(ring_t *entries)
}
static void
-test_independent_entries(ring_t *entries)
-{
+test_independent_entries(ring_t *entries) {
ring_t *t;
unsigned i, j;
@@ -61,8 +61,7 @@ test_independent_entries(ring_t *entries)
}
}
-TEST_BEGIN(test_qr_one)
-{
+TEST_BEGIN(test_qr_one) {
ring_t entries[NENTRIES];
init_entries(entries);
@@ -71,8 +70,7 @@ TEST_BEGIN(test_qr_one)
TEST_END
static void
-test_entries_ring(ring_t *entries)
-{
+test_entries_ring(ring_t *entries) {
ring_t *t;
unsigned i, j;
@@ -104,27 +102,27 @@ test_entries_ring(ring_t *entries)
}
}
-TEST_BEGIN(test_qr_after_insert)
-{
+TEST_BEGIN(test_qr_after_insert) {
ring_t entries[NENTRIES];
unsigned i;
init_entries(entries);
- for (i = 1; i < NENTRIES; i++)
+ for (i = 1; i < NENTRIES; i++) {
qr_after_insert(&entries[i - 1], &entries[i], link);
+ }
test_entries_ring(entries);
}
TEST_END
-TEST_BEGIN(test_qr_remove)
-{
+TEST_BEGIN(test_qr_remove) {
ring_t entries[NENTRIES];
ring_t *t;
unsigned i, j;
init_entries(entries);
- for (i = 1; i < NENTRIES; i++)
+ for (i = 1; i < NENTRIES; i++) {
qr_after_insert(&entries[i - 1], &entries[i], link);
+ }
for (i = 0; i < NENTRIES; i++) {
j = 0;
@@ -145,15 +143,15 @@ TEST_BEGIN(test_qr_remove)
}
TEST_END
-TEST_BEGIN(test_qr_before_insert)
-{
+TEST_BEGIN(test_qr_before_insert) {
ring_t entries[NENTRIES];
ring_t *t;
unsigned i, j;
init_entries(entries);
- for (i = 1; i < NENTRIES; i++)
+ for (i = 1; i < NENTRIES; i++) {
qr_before_insert(&entries[i - 1], &entries[i], link);
+ }
for (i = 0; i < NENTRIES; i++) {
j = 0;
qr_foreach(t, &entries[i], link) {
@@ -184,8 +182,7 @@ TEST_BEGIN(test_qr_before_insert)
TEST_END
static void
-test_split_entries(ring_t *entries)
-{
+test_split_entries(ring_t *entries) {
ring_t *t;
unsigned i, j;
@@ -206,43 +203,41 @@ test_split_entries(ring_t *entries)
}
}
-TEST_BEGIN(test_qr_meld_split)
-{
+TEST_BEGIN(test_qr_meld_split) {
ring_t entries[NENTRIES];
unsigned i;
init_entries(entries);
- for (i = 1; i < NENTRIES; i++)
+ for (i = 1; i < NENTRIES; i++) {
qr_after_insert(&entries[i - 1], &entries[i], link);
+ }
- qr_split(&entries[0], &entries[SPLIT_INDEX], link);
+ qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
test_split_entries(entries);
- qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
+ qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
test_entries_ring(entries);
- qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
+ qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
test_split_entries(entries);
- qr_split(&entries[0], &entries[SPLIT_INDEX], link);
+ qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
test_entries_ring(entries);
- qr_split(&entries[0], &entries[0], link);
+ qr_split(&entries[0], &entries[0], ring_t, link);
test_entries_ring(entries);
- qr_meld(&entries[0], &entries[0], link);
+ qr_meld(&entries[0], &entries[0], ring_t, link);
test_entries_ring(entries);
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_qr_one,
test_qr_after_insert,
test_qr_remove,
test_qr_before_insert,
- test_qr_meld_split));
+ test_qr_meld_split);
}
diff --git a/deps/jemalloc/test/unit/quarantine.c b/deps/jemalloc/test/unit/quarantine.c
deleted file mode 100644
index bbd48a51d..000000000
--- a/deps/jemalloc/test/unit/quarantine.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define QUARANTINE_SIZE 8192
-#define STRINGIFY_HELPER(x) #x
-#define STRINGIFY(x) STRINGIFY_HELPER(x)
-
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "abort:false,junk:true,redzone:true,quarantine:"
- STRINGIFY(QUARANTINE_SIZE);
-#endif
-
-void
-quarantine_clear(void)
-{
- void *p;
-
- p = mallocx(QUARANTINE_SIZE*2, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() failure");
- dallocx(p, 0);
-}
-
-TEST_BEGIN(test_quarantine)
-{
-#define SZ ZU(256)
-#define NQUARANTINED (QUARANTINE_SIZE/SZ)
- void *quarantined[NQUARANTINED+1];
- size_t i, j;
-
- test_skip_if(!config_fill);
-
- assert_zu_eq(nallocx(SZ, 0), SZ,
- "SZ=%zu does not precisely equal a size class", SZ);
-
- quarantine_clear();
-
- /*
- * Allocate enough regions to completely fill the quarantine, plus one
- * more. The last iteration occurs with a completely full quarantine,
- * but no regions should be drained from the quarantine until the last
- * deallocation occurs. Therefore no region recycling should occur
- * until after this loop completes.
- */
- for (i = 0; i < NQUARANTINED+1; i++) {
- void *p = mallocx(SZ, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() failure");
- quarantined[i] = p;
- dallocx(p, 0);
- for (j = 0; j < i; j++) {
- assert_ptr_ne(p, quarantined[j],
- "Quarantined region recycled too early; "
- "i=%zu, j=%zu", i, j);
- }
- }
-#undef NQUARANTINED
-#undef SZ
-}
-TEST_END
-
-static bool detected_redzone_corruption;
-
-static void
-arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after,
- size_t offset, uint8_t byte)
-{
-
- detected_redzone_corruption = true;
-}
-
-TEST_BEGIN(test_quarantine_redzone)
-{
- char *s;
- arena_redzone_corruption_t *arena_redzone_corruption_orig;
-
- test_skip_if(!config_fill);
-
- arena_redzone_corruption_orig = arena_redzone_corruption;
- arena_redzone_corruption = arena_redzone_corruption_replacement;
-
- /* Test underflow. */
- detected_redzone_corruption = false;
- s = (char *)mallocx(1, 0);
- assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
- s[-1] = 0xbb;
- dallocx(s, 0);
- assert_true(detected_redzone_corruption,
- "Did not detect redzone corruption");
-
- /* Test overflow. */
- detected_redzone_corruption = false;
- s = (char *)mallocx(1, 0);
- assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
- s[sallocx(s, 0)] = 0xbb;
- dallocx(s, 0);
- assert_true(detected_redzone_corruption,
- "Did not detect redzone corruption");
-
- arena_redzone_corruption = arena_redzone_corruption_orig;
-}
-TEST_END
-
-int
-main(void)
-{
-
- return (test(
- test_quarantine,
- test_quarantine_redzone));
-}
diff --git a/deps/jemalloc/test/unit/rb.c b/deps/jemalloc/test/unit/rb.c
index b737485a7..65c049207 100644
--- a/deps/jemalloc/test/unit/rb.c
+++ b/deps/jemalloc/test/unit/rb.c
@@ -1,27 +1,29 @@
#include "test/jemalloc_test.h"
-#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \
- a_type *rbp_bh_t; \
- for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; \
- rbp_bh_t != &(a_rbt)->rbt_nil; \
- rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) { \
- if (rbtn_red_get(a_type, a_field, rbp_bh_t) == false) { \
- (r_height)++; \
+#include "jemalloc/internal/rb.h"
+
+#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do { \
+ a_type *rbp_bh_t; \
+ for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t != \
+ NULL; rbp_bh_t = rbtn_left_get(a_type, a_field, \
+ rbp_bh_t)) { \
+ if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) { \
+ (r_height)++; \
+ } \
} \
- } \
} while (0)
typedef struct node_s node_t;
struct node_s {
-#define NODE_MAGIC 0x9823af7e
+#define NODE_MAGIC 0x9823af7e
uint32_t magic;
rb_node(node_t) link;
uint64_t key;
};
static int
-node_cmp(node_t *a, node_t *b) {
+node_cmp(const node_t *a, const node_t *b) {
int ret;
assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
@@ -36,19 +38,19 @@ node_cmp(node_t *a, node_t *b) {
ret = (((uintptr_t)a) > ((uintptr_t)b))
- (((uintptr_t)a) < ((uintptr_t)b));
}
- return (ret);
+ return ret;
}
typedef rb_tree(node_t) tree_t;
rb_gen(static, tree_, tree_t, node_t, link, node_cmp);
-TEST_BEGIN(test_rb_empty)
-{
+TEST_BEGIN(test_rb_empty) {
tree_t tree;
node_t key;
tree_new(&tree);
+ assert_true(tree_empty(&tree), "Tree should be empty");
assert_ptr_null(tree_first(&tree), "Unexpected node");
assert_ptr_null(tree_last(&tree), "Unexpected node");
@@ -67,47 +69,56 @@ TEST_BEGIN(test_rb_empty)
TEST_END
static unsigned
-tree_recurse(node_t *node, unsigned black_height, unsigned black_depth,
- node_t *nil)
-{
+tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
unsigned ret = 0;
- node_t *left_node = rbtn_left_get(node_t, link, node);
- node_t *right_node = rbtn_right_get(node_t, link, node);
+ node_t *left_node;
+ node_t *right_node;
- if (rbtn_red_get(node_t, link, node) == false)
+ if (node == NULL) {
+ return ret;
+ }
+
+ left_node = rbtn_left_get(node_t, link, node);
+ right_node = rbtn_right_get(node_t, link, node);
+
+ if (!rbtn_red_get(node_t, link, node)) {
black_depth++;
+ }
/* Red nodes must be interleaved with black nodes. */
if (rbtn_red_get(node_t, link, node)) {
- assert_false(rbtn_red_get(node_t, link, left_node),
- "Node should be black");
- assert_false(rbtn_red_get(node_t, link, right_node),
- "Node should be black");
+ if (left_node != NULL) {
+ assert_false(rbtn_red_get(node_t, link, left_node),
+ "Node should be black");
+ }
+ if (right_node != NULL) {
+ assert_false(rbtn_red_get(node_t, link, right_node),
+ "Node should be black");
+ }
}
- if (node == nil)
- return (ret);
/* Self. */
assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
/* Left subtree. */
- if (left_node != nil)
- ret += tree_recurse(left_node, black_height, black_depth, nil);
- else
+ if (left_node != NULL) {
+ ret += tree_recurse(left_node, black_height, black_depth);
+ } else {
ret += (black_depth != black_height);
+ }
/* Right subtree. */
- if (right_node != nil)
- ret += tree_recurse(right_node, black_height, black_depth, nil);
- else
+ if (right_node != NULL) {
+ ret += tree_recurse(right_node, black_height, black_depth);
+ } else {
ret += (black_depth != black_height);
+ }
- return (ret);
+ return ret;
}
static node_t *
-tree_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
unsigned *i = (unsigned *)data;
node_t *search_node;
@@ -130,34 +141,31 @@ tree_iterate_cb(tree_t *tree, node_t *node, void *data)
(*i)++;
- return (NULL);
+ return NULL;
}
static unsigned
-tree_iterate(tree_t *tree)
-{
+tree_iterate(tree_t *tree) {
unsigned i;
i = 0;
tree_iter(tree, NULL, tree_iterate_cb, (void *)&i);
- return (i);
+ return i;
}
static unsigned
-tree_iterate_reverse(tree_t *tree)
-{
+tree_iterate_reverse(tree_t *tree) {
unsigned i;
i = 0;
tree_reverse_iter(tree, NULL, tree_iterate_cb, (void *)&i);
- return (i);
+ return i;
}
static void
-node_remove(tree_t *tree, node_t *node, unsigned nnodes)
-{
+node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
node_t *search_node;
unsigned black_height, imbalances;
@@ -180,8 +188,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes)
node->magic = 0;
rbtn_black_height(node_t, link, tree, black_height);
- imbalances = tree_recurse(tree->rbt_root, black_height, 0,
- &(tree->rbt_nil));
+ imbalances = tree_recurse(tree->rbt_root, black_height, 0);
assert_u_eq(imbalances, 0, "Tree is unbalanced");
assert_u_eq(tree_iterate(tree), nnodes-1,
"Unexpected node iteration count");
@@ -190,32 +197,37 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes)
}
static node_t *
-remove_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
unsigned *nnodes = (unsigned *)data;
node_t *ret = tree_next(tree, node);
node_remove(tree, node, *nnodes);
- return (ret);
+ return ret;
}
static node_t *
-remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
unsigned *nnodes = (unsigned *)data;
node_t *ret = tree_prev(tree, node);
node_remove(tree, node, *nnodes);
- return (ret);
+ return ret;
}
-TEST_BEGIN(test_rb_random)
-{
-#define NNODES 25
-#define NBAGS 250
-#define SEED 42
+static void
+destroy_cb(node_t *node, void *data) {
+ unsigned *nnodes = (unsigned *)data;
+
+ assert_u_gt(*nnodes, 0, "Destruction removed too many nodes");
+ (*nnodes)--;
+}
+
+TEST_BEGIN(test_rb_random) {
+#define NNODES 25
+#define NBAGS 250
+#define SEED 42
sfmt_t *sfmt;
uint64_t bag[NNODES];
tree_t tree;
@@ -227,23 +239,25 @@ TEST_BEGIN(test_rb_random)
switch (i) {
case 0:
/* Insert in order. */
- for (j = 0; j < NNODES; j++)
+ for (j = 0; j < NNODES; j++) {
bag[j] = j;
+ }
break;
case 1:
/* Insert in reverse order. */
- for (j = 0; j < NNODES; j++)
+ for (j = 0; j < NNODES; j++) {
bag[j] = NNODES - j - 1;
+ }
break;
default:
- for (j = 0; j < NNODES; j++)
+ for (j = 0; j < NNODES; j++) {
bag[j] = gen_rand64_range(sfmt, NNODES);
+ }
}
for (j = 1; j <= NNODES; j++) {
/* Initialize tree and nodes. */
tree_new(&tree);
- tree.rbt_nil.magic = 0;
for (k = 0; k < j; k++) {
nodes[k].magic = NODE_MAGIC;
nodes[k].key = bag[k];
@@ -256,7 +270,7 @@ TEST_BEGIN(test_rb_random)
rbtn_black_height(node_t, link, &tree,
black_height);
imbalances = tree_recurse(tree.rbt_root,
- black_height, 0, &(tree.rbt_nil));
+ black_height, 0);
assert_u_eq(imbalances, 0,
"Tree is unbalanced");
@@ -265,6 +279,8 @@ TEST_BEGIN(test_rb_random)
assert_u_eq(tree_iterate_reverse(&tree), k+1,
"Unexpected node iteration count");
+ assert_false(tree_empty(&tree),
+ "Tree should not be empty");
assert_ptr_not_null(tree_first(&tree),
"Tree should not be empty");
assert_ptr_not_null(tree_last(&tree),
@@ -275,14 +291,16 @@ TEST_BEGIN(test_rb_random)
}
/* Remove nodes. */
- switch (i % 4) {
+ switch (i % 5) {
case 0:
- for (k = 0; k < j; k++)
+ for (k = 0; k < j; k++) {
node_remove(&tree, &nodes[k], j - k);
+ }
break;
case 1:
- for (k = j; k > 0; k--)
+ for (k = j; k > 0; k--) {
node_remove(&tree, &nodes[k-1], k);
+ }
break;
case 2: {
node_t *start;
@@ -311,6 +329,12 @@ TEST_BEGIN(test_rb_random)
assert_u_eq(nnodes, 0,
"Removal terminated early");
break;
+ } case 4: {
+ unsigned nnodes = j;
+ tree_destroy(&tree, destroy_cb, &nnodes);
+ assert_u_eq(nnodes, 0,
+ "Destruction terminated early");
+ break;
} default:
not_reached();
}
@@ -324,10 +348,8 @@ TEST_BEGIN(test_rb_random)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_rb_empty,
- test_rb_random));
+ test_rb_random);
}
diff --git a/deps/jemalloc/test/unit/retained.c b/deps/jemalloc/test/unit/retained.c
new file mode 100644
index 000000000..d51a59811
--- /dev/null
+++ b/deps/jemalloc/test/unit/retained.c
@@ -0,0 +1,181 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/spin.h"
+
+static unsigned arena_ind;
+static size_t sz;
+static size_t esz;
+#define NEPOCHS 8
+#define PER_THD_NALLOCS 1
+static atomic_u_t epoch;
+static atomic_u_t nfinished;
+
+static unsigned
+do_arena_create(extent_hooks_t *h) {
+ unsigned arena_ind;
+ size_t sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+ (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
+ "Unexpected mallctl() failure");
+ return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+ size_t mib[3];
+ size_t miblen;
+
+ miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() failure");
+ mib[1] = (size_t)arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+ "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_refresh(void) {
+ uint64_t epoch = 1;
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+ sizeof(epoch)), 0, "Unexpected mallctl() failure");
+}
+
+static size_t
+do_get_size_impl(const char *cmd, unsigned arena_ind) {
+ size_t mib[4];
+ size_t miblen = sizeof(mib) / sizeof(size_t);
+ size_t z = sizeof(size_t);
+
+ assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+ 0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+ mib[2] = arena_ind;
+ size_t size;
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
+ 0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
+
+ return size;
+}
+
+static size_t
+do_get_active(unsigned arena_ind) {
+ return do_get_size_impl("stats.arenas.0.pactive", arena_ind) * PAGE;
+}
+
+static size_t
+do_get_mapped(unsigned arena_ind) {
+ return do_get_size_impl("stats.arenas.0.mapped", arena_ind);
+}
+
+static void *
+thd_start(void *arg) {
+ for (unsigned next_epoch = 1; next_epoch < NEPOCHS; next_epoch++) {
+ /* Busy-wait for next epoch. */
+ unsigned cur_epoch;
+ spin_t spinner = SPIN_INITIALIZER;
+ while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE)) !=
+ next_epoch) {
+ spin_adaptive(&spinner);
+ }
+ assert_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
+
+ /*
+ * Allocate. The main thread will reset the arena, so there's
+ * no need to deallocate.
+ */
+ for (unsigned i = 0; i < PER_THD_NALLOCS; i++) {
+ void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
+ MALLOCX_TCACHE_NONE
+ );
+ assert_ptr_not_null(p,
+ "Unexpected mallocx() failure\n");
+ }
+
+ /* Let the main thread know we've finished this iteration. */
+ atomic_fetch_add_u(&nfinished, 1, ATOMIC_RELEASE);
+ }
+
+ return NULL;
+}
+
+TEST_BEGIN(test_retained) {
+ test_skip_if(!config_stats);
+
+ arena_ind = do_arena_create(NULL);
+ sz = nallocx(HUGEPAGE, 0);
+ esz = sz + sz_large_pad;
+
+ atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
+
+ unsigned nthreads = ncpus * 2;
+ VARIABLE_ARRAY(thd_t, threads, nthreads);
+ for (unsigned i = 0; i < nthreads; i++) {
+ thd_create(&threads[i], thd_start, NULL);
+ }
+
+ for (unsigned e = 1; e < NEPOCHS; e++) {
+ atomic_store_u(&nfinished, 0, ATOMIC_RELEASE);
+ atomic_store_u(&epoch, e, ATOMIC_RELEASE);
+
+ /* Wait for threads to finish allocating. */
+ spin_t spinner = SPIN_INITIALIZER;
+ while (atomic_load_u(&nfinished, ATOMIC_ACQUIRE) < nthreads) {
+ spin_adaptive(&spinner);
+ }
+
+ /*
+ * Assert that retained is no more than the sum of size classes
+ * that should have been used to satisfy the worker threads'
+ * requests, discounting per growth fragmentation.
+ */
+ do_refresh();
+
+ size_t allocated = esz * nthreads * PER_THD_NALLOCS;
+ size_t active = do_get_active(arena_ind);
+ assert_zu_le(allocated, active, "Unexpected active memory");
+ size_t mapped = do_get_mapped(arena_ind);
+ assert_zu_le(active, mapped, "Unexpected mapped memory");
+
+ arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
+ size_t usable = 0;
+ size_t fragmented = 0;
+ for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
+ arena->extent_grow_next; pind++) {
+ size_t psz = sz_pind2sz(pind);
+ size_t psz_fragmented = psz % esz;
+ size_t psz_usable = psz - psz_fragmented;
+ /*
+ * Only consider size classes that wouldn't be skipped.
+ */
+ if (psz_usable > 0) {
+ assert_zu_lt(usable, allocated,
+ "Excessive retained memory "
+ "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
+ allocated);
+ fragmented += psz_fragmented;
+ usable += psz_usable;
+ }
+ }
+
+ /*
+ * Clean up arena. Destroying and recreating the arena
+ * is simpler that specifying extent hooks that deallocate
+ * (rather than retaining) during reset.
+ */
+ do_arena_destroy(arena_ind);
+ assert_u_eq(do_arena_create(NULL), arena_ind,
+ "Unexpected arena index");
+ }
+
+ for (unsigned i = 0; i < nthreads; i++) {
+ thd_join(threads[i], NULL);
+ }
+
+ do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_retained);
+}
diff --git a/deps/jemalloc/test/unit/rtree.c b/deps/jemalloc/test/unit/rtree.c
index 5463055fe..908100fac 100644
--- a/deps/jemalloc/test/unit/rtree.c
+++ b/deps/jemalloc/test/unit/rtree.c
@@ -1,105 +1,207 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_rtree_get_empty)
-{
- unsigned i;
-
- for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
- assert_u_eq(rtree_get(rtree, 0), 0,
- "rtree_get() should return NULL for empty tree");
- rtree_delete(rtree);
+#include "jemalloc/internal/rtree.h"
+
+rtree_node_alloc_t *rtree_node_alloc_orig;
+rtree_node_dalloc_t *rtree_node_dalloc_orig;
+rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
+rtree_leaf_dalloc_t *rtree_leaf_dalloc_orig;
+
+/* Potentially too large to safely place on the stack. */
+rtree_t test_rtree;
+
+static rtree_node_elm_t *
+rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+ rtree_node_elm_t *node;
+
+ if (rtree != &test_rtree) {
+ return rtree_node_alloc_orig(tsdn, rtree, nelms);
}
+
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
+ node = (rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t));
+ assert_ptr_not_null(node, "Unexpected calloc() failure");
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
+
+ return node;
}
-TEST_END
-TEST_BEGIN(test_rtree_extrema)
-{
- unsigned i;
+static void
+rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_node_elm_t *node) {
+ if (rtree != &test_rtree) {
+ rtree_node_dalloc_orig(tsdn, rtree, node);
+ return;
+ }
- for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
+ free(node);
+}
- rtree_set(rtree, 0, 1);
- assert_u_eq(rtree_get(rtree, 0), 1,
- "rtree_get() should return previously set value");
+static rtree_leaf_elm_t *
+rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+ rtree_leaf_elm_t *leaf;
- rtree_set(rtree, ~((uintptr_t)0), 1);
- assert_u_eq(rtree_get(rtree, ~((uintptr_t)0)), 1,
- "rtree_get() should return previously set value");
+ if (rtree != &test_rtree) {
+ return rtree_leaf_alloc_orig(tsdn, rtree, nelms);
+ }
+
+ malloc_mutex_unlock(tsdn, &rtree->init_lock);
+ leaf = (rtree_leaf_elm_t *)calloc(nelms, sizeof(rtree_leaf_elm_t));
+ assert_ptr_not_null(leaf, "Unexpected calloc() failure");
+ malloc_mutex_lock(tsdn, &rtree->init_lock);
+
+ return leaf;
+}
- rtree_delete(rtree);
+static void
+rtree_leaf_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
+ rtree_leaf_elm_t *leaf) {
+ if (rtree != &test_rtree) {
+ rtree_leaf_dalloc_orig(tsdn, rtree, leaf);
+ return;
}
+
+ free(leaf);
+}
+
+TEST_BEGIN(test_rtree_read_empty) {
+ tsdn_t *tsdn;
+
+ tsdn = tsdn_fetch();
+
+ rtree_t *rtree = &test_rtree;
+ rtree_ctx_t rtree_ctx;
+ rtree_ctx_data_init(&rtree_ctx);
+ assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+ assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE,
+ false), "rtree_extent_read() should return NULL for empty tree");
+ rtree_delete(tsdn, rtree);
}
TEST_END
-TEST_BEGIN(test_rtree_bits)
-{
- unsigned i, j, k;
-
- for (i = 1; i < (sizeof(uintptr_t) << 3); i++) {
- uintptr_t keys[] = {0, 1,
- (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
-
- for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
- rtree_set(rtree, keys[j], 1);
- for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
- assert_u_eq(rtree_get(rtree, keys[k]), 1,
- "rtree_get() should return previously set "
- "value and ignore insignificant key bits; "
- "i=%u, j=%u, k=%u, set key=%#"PRIxPTR", "
- "get key=%#"PRIxPTR, i, j, k, keys[j],
- keys[k]);
- }
- assert_u_eq(rtree_get(rtree,
- (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))), 0,
- "Only leftmost rtree leaf should be set; "
- "i=%u, j=%u", i, j);
- rtree_set(rtree, keys[j], 0);
- }
+#undef NTHREADS
+#undef NITERS
+#undef SEED
- rtree_delete(rtree);
- }
+TEST_BEGIN(test_rtree_extrema) {
+ extent_t extent_a, extent_b;
+ extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
+ sz_size2index(LARGE_MINCLASS), 0, extent_state_active, false,
+ false, true);
+ extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
+ extent_state_active, false, false, true);
+
+ tsdn_t *tsdn = tsdn_fetch();
+
+ rtree_t *rtree = &test_rtree;
+ rtree_ctx_t rtree_ctx;
+ rtree_ctx_data_init(&rtree_ctx);
+ assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+
+ assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &extent_a,
+ extent_szind_get(&extent_a), extent_slab_get(&extent_a)),
+ "Unexpected rtree_write() failure");
+ rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
+ extent_szind_get(&extent_a), extent_slab_get(&extent_a));
+ assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE, true),
+ &extent_a,
+ "rtree_extent_read() should return previously set value");
+
+ assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
+ &extent_b, extent_szind_get_maybe_invalid(&extent_b),
+ extent_slab_get(&extent_b)), "Unexpected rtree_write() failure");
+ assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ ~((uintptr_t)0), true), &extent_b,
+ "rtree_extent_read() should return previously set value");
+
+ rtree_delete(tsdn, rtree);
}
TEST_END
-TEST_BEGIN(test_rtree_random)
-{
- unsigned i;
- sfmt_t *sfmt;
-#define NSET 100
-#define SEED 42
-
- sfmt = init_gen_rand(SEED);
- for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
- rtree_t *rtree = rtree_new(i, imalloc, idalloc);
- uintptr_t keys[NSET];
- unsigned j;
-
- for (j = 0; j < NSET; j++) {
- keys[j] = (uintptr_t)gen_rand64(sfmt);
- rtree_set(rtree, keys[j], 1);
- assert_u_eq(rtree_get(rtree, keys[j]), 1,
- "rtree_get() should return previously set value");
- }
- for (j = 0; j < NSET; j++) {
- assert_u_eq(rtree_get(rtree, keys[j]), 1,
- "rtree_get() should return previously set value");
+TEST_BEGIN(test_rtree_bits) {
+ tsdn_t *tsdn = tsdn_fetch();
+
+ uintptr_t keys[] = {PAGE, PAGE + 1,
+ PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
+
+ extent_t extent;
+ extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+ extent_state_active, false, false, true);
+
+ rtree_t *rtree = &test_rtree;
+ rtree_ctx_t rtree_ctx;
+ rtree_ctx_data_init(&rtree_ctx);
+ assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+
+ for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+ assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
+ &extent, NSIZES, false),
+ "Unexpected rtree_write() failure");
+ for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
+ assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ keys[j], true), &extent,
+ "rtree_extent_read() should return previously set "
+ "value and ignore insignificant key bits; i=%u, "
+ "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
+ j, keys[i], keys[j]);
}
+ assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ (((uintptr_t)2) << LG_PAGE), false),
+ "Only leftmost rtree leaf should be set; i=%u", i);
+ rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
+ }
- for (j = 0; j < NSET; j++) {
- rtree_set(rtree, keys[j], 0);
- assert_u_eq(rtree_get(rtree, keys[j]), 0,
- "rtree_get() should return previously set value");
- }
- for (j = 0; j < NSET; j++) {
- assert_u_eq(rtree_get(rtree, keys[j]), 0,
- "rtree_get() should return previously set value");
- }
+ rtree_delete(tsdn, rtree);
+}
+TEST_END
+
+TEST_BEGIN(test_rtree_random) {
+#define NSET 16
+#define SEED 42
+ sfmt_t *sfmt = init_gen_rand(SEED);
+ tsdn_t *tsdn = tsdn_fetch();
+ uintptr_t keys[NSET];
+ rtree_t *rtree = &test_rtree;
+ rtree_ctx_t rtree_ctx;
+ rtree_ctx_data_init(&rtree_ctx);
+
+ extent_t extent;
+ extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+ extent_state_active, false, false, true);
+
+ assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+
+ for (unsigned i = 0; i < NSET; i++) {
+ keys[i] = (uintptr_t)gen_rand64(sfmt);
+ rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
+ &rtree_ctx, keys[i], false, true);
+ assert_ptr_not_null(elm,
+ "Unexpected rtree_leaf_elm_lookup() failure");
+ rtree_leaf_elm_write(tsdn, rtree, elm, &extent, NSIZES, false);
+ assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ keys[i], true), &extent,
+ "rtree_extent_read() should return previously set value");
+ }
+ for (unsigned i = 0; i < NSET; i++) {
+ assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ keys[i], true), &extent,
+ "rtree_extent_read() should return previously set value, "
+ "i=%u", i);
+ }
- rtree_delete(rtree);
+ for (unsigned i = 0; i < NSET; i++) {
+ rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
+ assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ keys[i], true),
+ "rtree_extent_read() should return previously set value");
+ }
+ for (unsigned i = 0; i < NSET; i++) {
+ assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+ keys[i], true),
+ "rtree_extent_read() should return previously set value");
}
+
+ rtree_delete(tsdn, rtree);
fini_gen_rand(sfmt);
#undef NSET
#undef SEED
@@ -107,12 +209,19 @@ TEST_BEGIN(test_rtree_random)
TEST_END
int
-main(void)
-{
-
- return (test(
- test_rtree_get_empty,
+main(void) {
+ rtree_node_alloc_orig = rtree_node_alloc;
+ rtree_node_alloc = rtree_node_alloc_intercept;
+ rtree_node_dalloc_orig = rtree_node_dalloc;
+ rtree_node_dalloc = rtree_node_dalloc_intercept;
+ rtree_leaf_alloc_orig = rtree_leaf_alloc;
+ rtree_leaf_alloc = rtree_leaf_alloc_intercept;
+ rtree_leaf_dalloc_orig = rtree_leaf_dalloc;
+ rtree_leaf_dalloc = rtree_leaf_dalloc_intercept;
+
+ return test(
+ test_rtree_read_empty,
test_rtree_extrema,
test_rtree_bits,
- test_rtree_random));
+ test_rtree_random);
}
diff --git a/deps/jemalloc/test/unit/size_classes.c b/deps/jemalloc/test/unit/size_classes.c
new file mode 100644
index 000000000..bcff56098
--- /dev/null
+++ b/deps/jemalloc/test/unit/size_classes.c
@@ -0,0 +1,183 @@
+#include "test/jemalloc_test.h"
+
+static size_t
+get_max_size_class(void) {
+ unsigned nlextents;
+ size_t mib[4];
+ size_t sz, miblen, max_size_class;
+
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+ 0), 0, "Unexpected mallctl() error");
+
+ miblen = sizeof(mib) / sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+ "Unexpected mallctlnametomib() error");
+ mib[2] = nlextents - 1;
+
+ sz = sizeof(size_t);
+ assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+ NULL, 0), 0, "Unexpected mallctlbymib() error");
+
+ return max_size_class;
+}
+
+TEST_BEGIN(test_size_classes) {
+ size_t size_class, max_size_class;
+ szind_t index, max_index;
+
+ max_size_class = get_max_size_class();
+ max_index = sz_size2index(max_size_class);
+
+ for (index = 0, size_class = sz_index2size(index); index < max_index ||
+ size_class < max_size_class; index++, size_class =
+ sz_index2size(index)) {
+ assert_true(index < max_index,
+ "Loop conditionals should be equivalent; index=%u, "
+ "size_class=%zu (%#zx)", index, size_class, size_class);
+ assert_true(size_class < max_size_class,
+ "Loop conditionals should be equivalent; index=%u, "
+ "size_class=%zu (%#zx)", index, size_class, size_class);
+
+ assert_u_eq(index, sz_size2index(size_class),
+ "sz_size2index() does not reverse sz_index2size(): index=%u"
+ " --> size_class=%zu --> index=%u --> size_class=%zu",
+ index, size_class, sz_size2index(size_class),
+ sz_index2size(sz_size2index(size_class)));
+ assert_zu_eq(size_class,
+ sz_index2size(sz_size2index(size_class)),
+ "sz_index2size() does not reverse sz_size2index(): index=%u"
+ " --> size_class=%zu --> index=%u --> size_class=%zu",
+ index, size_class, sz_size2index(size_class),
+ sz_index2size(sz_size2index(size_class)));
+
+ assert_u_eq(index+1, sz_size2index(size_class+1),
+ "Next size_class does not round up properly");
+
+ assert_zu_eq(size_class, (index > 0) ?
+ sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
+ "sz_s2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_s2u(size_class-1),
+ "sz_s2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_s2u(size_class),
+ "sz_s2u() does not compute same size class");
+ assert_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+ "sz_s2u() does not round up to next size class");
+ }
+
+ assert_u_eq(index, sz_size2index(sz_index2size(index)),
+ "sz_size2index() does not reverse sz_index2size()");
+ assert_zu_eq(max_size_class, sz_index2size(
+ sz_size2index(max_size_class)),
+ "sz_index2size() does not reverse sz_size2index()");
+
+ assert_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+ "sz_s2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_s2u(size_class-1),
+ "sz_s2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_s2u(size_class),
+ "sz_s2u() does not compute same size class");
+}
+TEST_END
+
+TEST_BEGIN(test_psize_classes) {
+ size_t size_class, max_psz;
+ pszind_t pind, max_pind;
+
+ max_psz = get_max_size_class() + PAGE;
+ max_pind = sz_psz2ind(max_psz);
+
+ for (pind = 0, size_class = sz_pind2sz(pind);
+ pind < max_pind || size_class < max_psz;
+ pind++, size_class = sz_pind2sz(pind)) {
+ assert_true(pind < max_pind,
+ "Loop conditionals should be equivalent; pind=%u, "
+ "size_class=%zu (%#zx)", pind, size_class, size_class);
+ assert_true(size_class < max_psz,
+ "Loop conditionals should be equivalent; pind=%u, "
+ "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+ assert_u_eq(pind, sz_psz2ind(size_class),
+ "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
+ " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+ size_class, sz_psz2ind(size_class),
+ sz_pind2sz(sz_psz2ind(size_class)));
+ assert_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
+ "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
+ " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+ size_class, sz_psz2ind(size_class),
+ sz_pind2sz(sz_psz2ind(size_class)));
+
+ assert_u_eq(pind+1, sz_psz2ind(size_class+1),
+ "Next size_class does not round up properly");
+
+ assert_zu_eq(size_class, (pind > 0) ?
+ sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
+ "sz_psz2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_psz2u(size_class-1),
+ "sz_psz2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_psz2u(size_class),
+ "sz_psz2u() does not compute same size class");
+ assert_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+ "sz_psz2u() does not round up to next size class");
+ }
+
+ assert_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
+ "sz_psz2ind() does not reverse sz_pind2sz()");
+ assert_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
+ "sz_pind2sz() does not reverse sz_psz2ind()");
+
+ assert_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+ "sz_psz2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_psz2u(size_class-1),
+ "sz_psz2u() does not round up to size class");
+ assert_zu_eq(size_class, sz_psz2u(size_class),
+ "sz_psz2u() does not compute same size class");
+}
+TEST_END
+
+TEST_BEGIN(test_overflow) {
+ size_t max_size_class, max_psz;
+
+ max_size_class = get_max_size_class();
+ max_psz = max_size_class + PAGE;
+
+ assert_u_eq(sz_size2index(max_size_class+1), NSIZES,
+ "sz_size2index() should return NSIZES on overflow");
+ assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+ "sz_size2index() should return NSIZES on overflow");
+ assert_u_eq(sz_size2index(SIZE_T_MAX), NSIZES,
+ "sz_size2index() should return NSIZES on overflow");
+
+ assert_zu_eq(sz_s2u(max_size_class+1), 0,
+ "sz_s2u() should return 0 for unsupported size");
+ assert_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+ "sz_s2u() should return 0 for unsupported size");
+ assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
+ "sz_s2u() should return 0 on overflow");
+
+ assert_u_eq(sz_psz2ind(max_size_class+1), NPSIZES,
+ "sz_psz2ind() should return NPSIZES on overflow");
+ assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+ "sz_psz2ind() should return NPSIZES on overflow");
+ assert_u_eq(sz_psz2ind(SIZE_T_MAX), NPSIZES,
+ "sz_psz2ind() should return NPSIZES on overflow");
+
+ assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+ "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
+ " size");
+ assert_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+ "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
+ "size");
+ assert_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
+ "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_size_classes,
+ test_psize_classes,
+ test_overflow);
+}
diff --git a/deps/jemalloc/test/unit/slab.c b/deps/jemalloc/test/unit/slab.c
new file mode 100644
index 000000000..7e662aed1
--- /dev/null
+++ b/deps/jemalloc/test/unit/slab.c
@@ -0,0 +1,32 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_arena_slab_regind) {
+ szind_t binind;
+
+ for (binind = 0; binind < NBINS; binind++) {
+ size_t regind;
+ extent_t slab;
+ const bin_info_t *bin_info = &bin_infos[binind];
+ extent_init(&slab, NULL, mallocx(bin_info->slab_size,
+ MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
+ binind, 0, extent_state_active, false, true, true);
+ assert_ptr_not_null(extent_addr_get(&slab),
+ "Unexpected malloc() failure");
+ for (regind = 0; regind < bin_info->nregs; regind++) {
+ void *reg = (void *)((uintptr_t)extent_addr_get(&slab) +
+ (bin_info->reg_size * regind));
+ assert_zu_eq(arena_slab_regind(&slab, binind, reg),
+ regind,
+ "Incorrect region index computed for size %zu",
+ bin_info->reg_size);
+ }
+ free(extent_addr_get(&slab));
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_arena_slab_regind);
+}
diff --git a/deps/jemalloc/test/unit/smoothstep.c b/deps/jemalloc/test/unit/smoothstep.c
new file mode 100644
index 000000000..7c5dbb7e0
--- /dev/null
+++ b/deps/jemalloc/test/unit/smoothstep.c
@@ -0,0 +1,102 @@
+#include "test/jemalloc_test.h"
+
+static const uint64_t smoothstep_tab[] = {
+#define STEP(step, h, x, y) \
+ h,
+ SMOOTHSTEP
+#undef STEP
+};
+
+TEST_BEGIN(test_smoothstep_integral) {
+ uint64_t sum, min, max;
+ unsigned i;
+
+ /*
+ * The integral of smoothstep in the [0..1] range equals 1/2. Verify
+ * that the fixed point representation's integral is no more than
+ * rounding error distant from 1/2. Regarding rounding, each table
+ * element is rounded down to the nearest fixed point value, so the
+ * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps.
+ */
+ sum = 0;
+ for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+ sum += smoothstep_tab[i];
+ }
+
+ max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
+ min = max - SMOOTHSTEP_NSTEPS;
+
+ assert_u64_ge(sum, min,
+ "Integral too small, even accounting for truncation");
+ assert_u64_le(sum, max, "Integral exceeds 1/2");
+ if (false) {
+ malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
+ max - sum, SMOOTHSTEP_NSTEPS);
+ }
+}
+TEST_END
+
+TEST_BEGIN(test_smoothstep_monotonic) {
+ uint64_t prev_h;
+ unsigned i;
+
+ /*
+ * The smoothstep function is monotonic in [0..1], i.e. its slope is
+ * non-negative. In practice we want to parametrize table generation
+ * such that piecewise slope is greater than zero, but do not require
+ * that here.
+ */
+ prev_h = 0;
+ for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+ uint64_t h = smoothstep_tab[i];
+ assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
+ prev_h = h;
+ }
+ assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+ (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
+}
+TEST_END
+
+TEST_BEGIN(test_smoothstep_slope) {
+ uint64_t prev_h, prev_delta;
+ unsigned i;
+
+ /*
+ * The smoothstep slope strictly increases until x=0.5, and then
+ * strictly decreases until x=1.0. Verify the slightly weaker
+ * requirement of monotonicity, so that inadequate table precision does
+ * not cause false test failures.
+ */
+ prev_h = 0;
+ prev_delta = 0;
+ for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) {
+ uint64_t h = smoothstep_tab[i];
+ uint64_t delta = h - prev_h;
+ assert_u64_ge(delta, prev_delta,
+ "Slope must monotonically increase in 0.0 <= x <= 0.5, "
+ "i=%u", i);
+ prev_h = h;
+ prev_delta = delta;
+ }
+
+ prev_h = KQU(1) << SMOOTHSTEP_BFP;
+ prev_delta = 0;
+ for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
+ uint64_t h = smoothstep_tab[i];
+ uint64_t delta = prev_h - h;
+ assert_u64_ge(delta, prev_delta,
+ "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
+ "i=%u", i);
+ prev_h = h;
+ prev_delta = delta;
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_smoothstep_integral,
+ test_smoothstep_monotonic,
+ test_smoothstep_slope);
+}
diff --git a/deps/jemalloc/test/unit/spin.c b/deps/jemalloc/test/unit/spin.c
new file mode 100644
index 000000000..b965f7427
--- /dev/null
+++ b/deps/jemalloc/test/unit/spin.c
@@ -0,0 +1,18 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/spin.h"
+
+TEST_BEGIN(test_spin) {
+ spin_t spinner = SPIN_INITIALIZER;
+
+ for (unsigned i = 0; i < 100; i++) {
+ spin_adaptive(&spinner);
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_spin);
+}
diff --git a/deps/jemalloc/test/unit/stats.c b/deps/jemalloc/test/unit/stats.c
index 03a55c7fd..231010e43 100644
--- a/deps/jemalloc/test/unit/stats.c
+++ b/deps/jemalloc/test/unit/stats.c
@@ -1,162 +1,146 @@
#include "test/jemalloc_test.h"
-TEST_BEGIN(test_stats_summary)
-{
- size_t *cactive;
- size_t sz, allocated, active, mapped;
+TEST_BEGIN(test_stats_summary) {
+ size_t sz, allocated, active, resident, mapped;
int expected = config_stats ? 0 : ENOENT;
- sz = sizeof(cactive);
- assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected,
- "Unexpected mallctl() result");
-
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0),
+ assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
+ 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected,
- "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected,
- "Unexpected mallctl() result");
-
- if (config_stats) {
- assert_zu_le(active, *cactive,
- "active should be no larger than cactive");
- assert_zu_le(allocated, active,
- "allocated should be no larger than active");
- assert_zu_le(active, mapped,
- "active should be no larger than mapped");
- }
-}
-TEST_END
-
-TEST_BEGIN(test_stats_chunks)
-{
- size_t current, high;
- uint64_t total;
- size_t sz;
- int expected = config_stats ? 0 : ENOENT;
-
- sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.chunks.current", &current, &sz, NULL, 0),
+ assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
expected, "Unexpected mallctl() result");
- sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.chunks.total", &total, &sz, NULL, 0),
+ assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
expected, "Unexpected mallctl() result");
- sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.chunks.high", &high, &sz, NULL, 0), expected,
- "Unexpected mallctl() result");
if (config_stats) {
- assert_zu_le(current, high,
- "current should be no larger than high");
- assert_u64_le((uint64_t)high, total,
- "high should be no larger than total");
+ assert_zu_le(allocated, active,
+ "allocated should be no larger than active");
+ assert_zu_lt(active, resident,
+ "active should be less than resident");
+ assert_zu_lt(active, mapped,
+ "active should be less than mapped");
}
}
TEST_END
-TEST_BEGIN(test_stats_huge)
-{
+TEST_BEGIN(test_stats_large) {
void *p;
uint64_t epoch;
size_t allocated;
- uint64_t nmalloc, ndalloc;
+ uint64_t nmalloc, ndalloc, nrequests;
size_t sz;
int expected = config_stats ? 0 : ENOENT;
- p = mallocx(arena_maxclass+1, 0);
+ p = mallocx(SMALL_MAXCLASS+1, MALLOCX_ARENA(0));
assert_ptr_not_null(p, "Unexpected mallocx() failure");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.huge.allocated", &allocated, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+ (void *)&allocated, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.huge.nmalloc", &nmalloc, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.huge.ndalloc", &ndalloc, &sz, NULL, 0),
- expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.nrequests",
+ (void *)&nrequests, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
if (config_stats) {
assert_zu_gt(allocated, 0,
"allocated should be greater than zero");
assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
+ assert_u64_le(nmalloc, nrequests,
+ "nmalloc should no larger than nrequests");
}
dallocx(p, 0);
}
TEST_END
-TEST_BEGIN(test_stats_arenas_summary)
-{
- unsigned arena;
- void *small, *large;
+TEST_BEGIN(test_stats_arenas_summary) {
+ void *little, *large;
uint64_t epoch;
size_t sz;
int expected = config_stats ? 0 : ENOENT;
size_t mapped;
- uint64_t npurge, nmadvise, purged;
+ uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
+ uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
- arena = 0;
- assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
-
- small = mallocx(SMALL_MAXCLASS, 0);
- assert_ptr_not_null(small, "Unexpected mallocx() failure");
- large = mallocx(arena_maxclass, 0);
+ little = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
+ assert_ptr_not_null(little, "Unexpected mallocx() failure");
+ large = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
assert_ptr_not_null(large, "Unexpected mallocx() failure");
+ dallocx(little, 0);
+ dallocx(large, 0);
+
+ assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+ opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
"Unexpected mallctl() failure");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0),
- expected, "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
+ 0), expected, "Unexepected mallctl() result");
+
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0),
- expected, "Unexepected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0),
- expected, "Unexepected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0),
- expected, "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.dirty_npurge",
+ (void *)&dirty_npurge, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
+ (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.dirty_purged",
+ (void *)&dirty_purged, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
+ (void *)&muzzy_npurge, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
+ (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.muzzy_purged",
+ (void *)&muzzy_purged, &sz, NULL, 0), expected,
+ "Unexepected mallctl() result");
if (config_stats) {
- assert_u64_gt(npurge, 0,
- "At least one purge should have occurred");
- assert_u64_le(nmadvise, purged,
- "nmadvise should be no greater than purged");
+ if (!background_thread_enabled()) {
+ assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
+ "At least one purge should have occurred");
+ }
+ assert_u64_le(dirty_nmadvise, dirty_purged,
+ "dirty_nmadvise should be no greater than dirty_purged");
+ assert_u64_le(muzzy_nmadvise, muzzy_purged,
+ "muzzy_nmadvise should be no greater than muzzy_purged");
}
-
- dallocx(small, 0);
- dallocx(large, 0);
}
TEST_END
void *
-thd_start(void *arg)
-{
-
- return (NULL);
+thd_start(void *arg) {
+ return NULL;
}
static void
-no_lazy_lock(void)
-{
+no_lazy_lock(void) {
thd_t thd;
thd_create(&thd, thd_start, NULL);
thd_join(thd, NULL);
}
-TEST_BEGIN(test_stats_arenas_small)
-{
- unsigned arena;
+TEST_BEGIN(test_stats_arenas_small) {
void *p;
size_t sz, allocated;
uint64_t epoch, nmalloc, ndalloc, nrequests;
@@ -164,29 +148,27 @@ TEST_BEGIN(test_stats_arenas_small)
no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
- arena = 0;
- assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
-
- p = mallocx(SMALL_MAXCLASS, 0);
+ p = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
assert_ptr_not_null(p, "Unexpected mallocx() failure");
assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
- config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
+ opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.small.allocated",
+ (void *)&allocated, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.small.nrequests",
+ (void *)&nrequests, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
if (config_stats) {
assert_zu_gt(allocated, 0,
@@ -203,161 +185,170 @@ TEST_BEGIN(test_stats_arenas_small)
}
TEST_END
-TEST_BEGIN(test_stats_arenas_large)
-{
- unsigned arena;
+TEST_BEGIN(test_stats_arenas_large) {
void *p;
size_t sz, allocated;
- uint64_t epoch, nmalloc, ndalloc, nrequests;
+ uint64_t epoch, nmalloc, ndalloc;
int expected = config_stats ? 0 : ENOENT;
- arena = 0;
- assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
-
- p = mallocx(arena_maxclass, 0);
+ p = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
assert_ptr_not_null(p, "Unexpected mallocx() failure");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+ (void *)&allocated, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+ &sz, NULL, 0), expected, "Unexpected mallctl() result");
if (config_stats) {
assert_zu_gt(allocated, 0,
"allocated should be greater than zero");
- assert_zu_gt(nmalloc, 0,
+ assert_u64_gt(nmalloc, 0,
"nmalloc should be greater than zero");
- assert_zu_ge(nmalloc, ndalloc,
+ assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
- assert_zu_gt(nrequests, 0,
- "nrequests should be greater than zero");
}
dallocx(p, 0);
}
TEST_END
-TEST_BEGIN(test_stats_arenas_bins)
-{
- unsigned arena;
+static void
+gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
+ sprintf(cmd, "stats.arenas.%u.bins.0.%s", arena_ind, name);
+}
+
+TEST_BEGIN(test_stats_arenas_bins) {
void *p;
- size_t sz, allocated, curruns;
+ size_t sz, curslabs, curregs;
uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
- uint64_t nruns, nreruns;
+ uint64_t nslabs, nreslabs;
int expected = config_stats ? 0 : ENOENT;
- arena = 0;
- assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
+ /* Make sure allocation below isn't satisfied by tcache. */
+ assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+ opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
+
+ unsigned arena_ind, old_arena_ind;
+ sz = sizeof(unsigned);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+ 0, "Arena creation failure");
+ sz = sizeof(arena_ind);
+ assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+ (void *)&arena_ind, sizeof(arena_ind)), 0,
+ "Unexpected mallctl() failure");
- p = mallocx(arena_bin_info[0].reg_size, 0);
- assert_ptr_not_null(p, "Unexpected mallocx() failure");
+ p = malloc(bin_infos[0].reg_size);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
- config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
+ opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
+ char cmd[128];
+ sz = sizeof(uint64_t);
+ gen_mallctl_str(cmd, "nmalloc", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "ndalloc", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "nrequests", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.bins.0.allocated", &allocated, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "curregs", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
+
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
-
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz,
- NULL, 0), config_tcache ? expected : ENOENT,
+ gen_mallctl_str(cmd, "nfills", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
"Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz,
- NULL, 0), config_tcache ? expected : ENOENT,
+ gen_mallctl_str(cmd, "nflushes", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
"Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "nslabs", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "nreslabs", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ gen_mallctl_str(cmd, "curslabs", arena_ind);
+ assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
if (config_stats) {
- assert_zu_gt(allocated, 0,
- "allocated should be greater than zero");
assert_u64_gt(nmalloc, 0,
"nmalloc should be greater than zero");
assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
assert_u64_gt(nrequests, 0,
"nrequests should be greater than zero");
- if (config_tcache) {
+ assert_zu_gt(curregs, 0,
+ "allocated should be greater than zero");
+ if (opt_tcache) {
assert_u64_gt(nfills, 0,
"At least one fill should have occurred");
assert_u64_gt(nflushes, 0,
"At least one flush should have occurred");
}
- assert_u64_gt(nruns, 0,
- "At least one run should have been allocated");
- assert_zu_gt(curruns, 0,
- "At least one run should be currently allocated");
+ assert_u64_gt(nslabs, 0,
+ "At least one slab should have been allocated");
+ assert_zu_gt(curslabs, 0,
+ "At least one slab should be currently allocated");
}
dallocx(p, 0);
}
TEST_END
-TEST_BEGIN(test_stats_arenas_lruns)
-{
- unsigned arena;
+TEST_BEGIN(test_stats_arenas_lextents) {
void *p;
- uint64_t epoch, nmalloc, ndalloc, nrequests;
- size_t curruns, sz;
+ uint64_t epoch, nmalloc, ndalloc;
+ size_t curlextents, sz, hsize;
int expected = config_stats ? 0 : ENOENT;
- arena = 0;
- assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
- 0, "Unexpected mallctl() failure");
+ sz = sizeof(size_t);
+ assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
+ 0), 0, "Unexpected mallctl() failure");
- p = mallocx(SMALL_MAXCLASS+1, 0);
+ p = mallocx(hsize, MALLOCX_ARENA(0));
assert_ptr_not_null(p, "Unexpected mallocx() failure");
- assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
- "Unexpected mallctl() failure");
+ assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+ 0, "Unexpected mallctl() failure");
sz = sizeof(uint64_t);
- assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
- assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
+ (void *)&nmalloc, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
+ (void *)&ndalloc, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
sz = sizeof(size_t);
- assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz,
- NULL, 0), expected, "Unexpected mallctl() result");
+ assert_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
+ (void *)&curlextents, &sz, NULL, 0), expected,
+ "Unexpected mallctl() result");
if (config_stats) {
assert_u64_gt(nmalloc, 0,
"nmalloc should be greater than zero");
assert_u64_ge(nmalloc, ndalloc,
"nmalloc should be at least as large as ndalloc");
- assert_u64_gt(nrequests, 0,
- "nrequests should be greater than zero");
- assert_u64_gt(curruns, 0,
- "At least one run should be currently allocated");
+ assert_u64_gt(curlextents, 0,
+ "At least one extent should be currently allocated");
}
dallocx(p, 0);
@@ -365,16 +356,13 @@ TEST_BEGIN(test_stats_arenas_lruns)
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test_no_reentrancy(
test_stats_summary,
- test_stats_chunks,
- test_stats_huge,
+ test_stats_large,
test_stats_arenas_summary,
test_stats_arenas_small,
test_stats_arenas_large,
test_stats_arenas_bins,
- test_stats_arenas_lruns));
+ test_stats_arenas_lextents);
}
diff --git a/deps/jemalloc/test/unit/stats_print.c b/deps/jemalloc/test/unit/stats_print.c
new file mode 100644
index 000000000..014d002fd
--- /dev/null
+++ b/deps/jemalloc/test/unit/stats_print.c
@@ -0,0 +1,999 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/util.h"
+
+typedef enum {
+ TOKEN_TYPE_NONE,
+ TOKEN_TYPE_ERROR,
+ TOKEN_TYPE_EOI,
+ TOKEN_TYPE_NULL,
+ TOKEN_TYPE_FALSE,
+ TOKEN_TYPE_TRUE,
+ TOKEN_TYPE_LBRACKET,
+ TOKEN_TYPE_RBRACKET,
+ TOKEN_TYPE_LBRACE,
+ TOKEN_TYPE_RBRACE,
+ TOKEN_TYPE_COLON,
+ TOKEN_TYPE_COMMA,
+ TOKEN_TYPE_STRING,
+ TOKEN_TYPE_NUMBER
+} token_type_t;
+
+typedef struct parser_s parser_t;
+typedef struct {
+ parser_t *parser;
+ token_type_t token_type;
+ size_t pos;
+ size_t len;
+ size_t line;
+ size_t col;
+} token_t;
+
+struct parser_s {
+ bool verbose;
+ char *buf; /* '\0'-terminated. */
+ size_t len; /* Number of characters preceding '\0' in buf. */
+ size_t pos;
+ size_t line;
+ size_t col;
+ token_t token;
+};
+
+static void
+token_init(token_t *token, parser_t *parser, token_type_t token_type,
+ size_t pos, size_t len, size_t line, size_t col) {
+ token->parser = parser;
+ token->token_type = token_type;
+ token->pos = pos;
+ token->len = len;
+ token->line = line;
+ token->col = col;
+}
+
+static void
+token_error(token_t *token) {
+ if (!token->parser->verbose) {
+ return;
+ }
+ switch (token->token_type) {
+ case TOKEN_TYPE_NONE:
+ not_reached();
+ case TOKEN_TYPE_ERROR:
+ malloc_printf("%zu:%zu: Unexpected character in token: ",
+ token->line, token->col);
+ break;
+ default:
+ malloc_printf("%zu:%zu: Unexpected token: ", token->line,
+ token->col);
+ break;
+ }
+ UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO,
+ &token->parser->buf[token->pos], token->len);
+ malloc_printf("\n");
+}
+
+static void
+parser_init(parser_t *parser, bool verbose) {
+ parser->verbose = verbose;
+ parser->buf = NULL;
+ parser->len = 0;
+ parser->pos = 0;
+ parser->line = 1;
+ parser->col = 0;
+}
+
+static void
+parser_fini(parser_t *parser) {
+ if (parser->buf != NULL) {
+ dallocx(parser->buf, MALLOCX_TCACHE_NONE);
+ }
+}
+
+static bool
+parser_append(parser_t *parser, const char *str) {
+ size_t len = strlen(str);
+ char *buf = (parser->buf == NULL) ? mallocx(len + 1,
+ MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
+ MALLOCX_TCACHE_NONE);
+ if (buf == NULL) {
+ return true;
+ }
+ memcpy(&buf[parser->len], str, len + 1);
+ parser->buf = buf;
+ parser->len += len;
+ return false;
+}
+
+static bool
+parser_tokenize(parser_t *parser) {
+ enum {
+ STATE_START,
+ STATE_EOI,
+ STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
+ STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
+ STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+ STATE_LBRACKET,
+ STATE_RBRACKET,
+ STATE_LBRACE,
+ STATE_RBRACE,
+ STATE_COLON,
+ STATE_COMMA,
+ STATE_CHARS,
+ STATE_CHAR_ESCAPE,
+ STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+ STATE_STRING,
+ STATE_MINUS,
+ STATE_LEADING_ZERO,
+ STATE_DIGITS,
+ STATE_DECIMAL,
+ STATE_FRAC_DIGITS,
+ STATE_EXP,
+ STATE_EXP_SIGN,
+ STATE_EXP_DIGITS,
+ STATE_ACCEPT
+ } state = STATE_START;
+ size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+ size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
+ size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
+
+ assert_zu_le(parser->pos, parser->len,
+ "Position is past end of buffer");
+
+ while (state != STATE_ACCEPT) {
+ char c = parser->buf[parser->pos];
+
+ switch (state) {
+ case STATE_START:
+ token_pos = parser->pos;
+ token_line = parser->line;
+ token_col = parser->col;
+ switch (c) {
+ case ' ': case '\b': case '\n': case '\r': case '\t':
+ break;
+ case '\0':
+ state = STATE_EOI;
+ break;
+ case 'n':
+ state = STATE_N;
+ break;
+ case 'f':
+ state = STATE_F;
+ break;
+ case 't':
+ state = STATE_T;
+ break;
+ case '[':
+ state = STATE_LBRACKET;
+ break;
+ case ']':
+ state = STATE_RBRACKET;
+ break;
+ case '{':
+ state = STATE_LBRACE;
+ break;
+ case '}':
+ state = STATE_RBRACE;
+ break;
+ case ':':
+ state = STATE_COLON;
+ break;
+ case ',':
+ state = STATE_COMMA;
+ break;
+ case '"':
+ state = STATE_CHARS;
+ break;
+ case '-':
+ state = STATE_MINUS;
+ break;
+ case '0':
+ state = STATE_LEADING_ZERO;
+ break;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ state = STATE_DIGITS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_EOI:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_EOI, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_N:
+ switch (c) {
+ case 'u':
+ state = STATE_NU;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_NU:
+ switch (c) {
+ case 'l':
+ state = STATE_NUL;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_NUL:
+ switch (c) {
+ case 'l':
+ state = STATE_NULL;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_NULL:
+ switch (c) {
+ case ' ': case '\b': case '\n': case '\r': case '\t':
+ case '\0':
+ case '[': case ']': case '{': case '}': case ':':
+ case ',':
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ token_init(&parser->token, parser, TOKEN_TYPE_NULL,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_F:
+ switch (c) {
+ case 'a':
+ state = STATE_FA;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_FA:
+ switch (c) {
+ case 'l':
+ state = STATE_FAL;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_FAL:
+ switch (c) {
+ case 's':
+ state = STATE_FALS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_FALS:
+ switch (c) {
+ case 'e':
+ state = STATE_FALSE;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_FALSE:
+ switch (c) {
+ case ' ': case '\b': case '\n': case '\r': case '\t':
+ case '\0':
+ case '[': case ']': case '{': case '}': case ':':
+ case ',':
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_FALSE, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_T:
+ switch (c) {
+ case 'r':
+ state = STATE_TR;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_TR:
+ switch (c) {
+ case 'u':
+ state = STATE_TRU;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_TRU:
+ switch (c) {
+ case 'e':
+ state = STATE_TRUE;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_TRUE:
+ switch (c) {
+ case ' ': case '\b': case '\n': case '\r': case '\t':
+ case '\0':
+ case '[': case ']': case '{': case '}': case ':':
+ case ',':
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_LBRACKET:
+ token_init(&parser->token, parser, TOKEN_TYPE_LBRACKET,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_RBRACKET:
+ token_init(&parser->token, parser, TOKEN_TYPE_RBRACKET,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_LBRACE:
+ token_init(&parser->token, parser, TOKEN_TYPE_LBRACE,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_RBRACE:
+ token_init(&parser->token, parser, TOKEN_TYPE_RBRACE,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_COLON:
+ token_init(&parser->token, parser, TOKEN_TYPE_COLON,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_COMMA:
+ token_init(&parser->token, parser, TOKEN_TYPE_COMMA,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_CHARS:
+ switch (c) {
+ case '\\':
+ state = STATE_CHAR_ESCAPE;
+ break;
+ case '"':
+ state = STATE_STRING;
+ break;
+ case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
+ case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
+ case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
+ case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
+ case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
+ case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
+ case 0x1e: case 0x1f:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ default:
+ break;
+ }
+ break;
+ case STATE_CHAR_ESCAPE:
+ switch (c) {
+ case '"': case '\\': case '/': case 'b': case 'n':
+ case 'r': case 't':
+ state = STATE_CHARS;
+ break;
+ case 'u':
+ state = STATE_CHAR_U;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_CHAR_U:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ state = STATE_CHAR_UD;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_CHAR_UD:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ state = STATE_CHAR_UDD;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_CHAR_UDD:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ state = STATE_CHAR_UDDD;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_CHAR_UDDD:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ state = STATE_CHARS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_STRING:
+ token_init(&parser->token, parser, TOKEN_TYPE_STRING,
+ token_pos, parser->pos - token_pos, token_line,
+ token_col);
+ state = STATE_ACCEPT;
+ break;
+ case STATE_MINUS:
+ switch (c) {
+ case '0':
+ state = STATE_LEADING_ZERO;
+ break;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ state = STATE_DIGITS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_LEADING_ZERO:
+ switch (c) {
+ case '.':
+ state = STATE_DECIMAL;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ }
+ break;
+ case STATE_DIGITS:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ break;
+ case '.':
+ state = STATE_DECIMAL;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ }
+ break;
+ case STATE_DECIMAL:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ state = STATE_FRAC_DIGITS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_FRAC_DIGITS:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ break;
+ case 'e': case 'E':
+ state = STATE_EXP;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ }
+ break;
+ case STATE_EXP:
+ switch (c) {
+ case '-': case '+':
+ state = STATE_EXP_SIGN;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ state = STATE_EXP_DIGITS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_EXP_SIGN:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ state = STATE_EXP_DIGITS;
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+ - token_pos, token_line, token_col);
+ return true;
+ }
+ break;
+ case STATE_EXP_DIGITS:
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ break;
+ default:
+ token_init(&parser->token, parser,
+ TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+ token_pos, token_line, token_col);
+ state = STATE_ACCEPT;
+ break;
+ }
+ break;
+ default:
+ not_reached();
+ }
+
+ if (state != STATE_ACCEPT) {
+ if (c == '\n') {
+ parser->line++;
+ parser->col = 0;
+ } else {
+ parser->col++;
+ }
+ parser->pos++;
+ }
+ }
+ return false;
+}
+
+static bool parser_parse_array(parser_t *parser);
+static bool parser_parse_object(parser_t *parser);
+
+static bool
+parser_parse_value(parser_t *parser) {
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_NULL:
+ case TOKEN_TYPE_FALSE:
+ case TOKEN_TYPE_TRUE:
+ case TOKEN_TYPE_STRING:
+ case TOKEN_TYPE_NUMBER:
+ return false;
+ case TOKEN_TYPE_LBRACE:
+ return parser_parse_object(parser);
+ case TOKEN_TYPE_LBRACKET:
+ return parser_parse_array(parser);
+ default:
+ return true;
+ }
+ not_reached();
+}
+
+static bool
+parser_parse_pair(parser_t *parser) {
+ assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+ "Pair should start with string");
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_COLON:
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ return parser_parse_value(parser);
+ default:
+ return true;
+ }
+}
+
+static bool
+parser_parse_values(parser_t *parser) {
+ if (parser_parse_value(parser)) {
+ return true;
+ }
+
+ while (true) {
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_COMMA:
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ if (parser_parse_value(parser)) {
+ return true;
+ }
+ break;
+ case TOKEN_TYPE_RBRACKET:
+ return false;
+ default:
+ return true;
+ }
+ }
+}
+
+static bool
+parser_parse_array(parser_t *parser) {
+ assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
+ "Array should start with [");
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_RBRACKET:
+ return false;
+ default:
+ return parser_parse_values(parser);
+ }
+ not_reached();
+}
+
+static bool
+parser_parse_pairs(parser_t *parser) {
+ assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+ "Object should start with string");
+ if (parser_parse_pair(parser)) {
+ return true;
+ }
+
+ while (true) {
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_COMMA:
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_STRING:
+ if (parser_parse_pair(parser)) {
+ return true;
+ }
+ break;
+ default:
+ return true;
+ }
+ break;
+ case TOKEN_TYPE_RBRACE:
+ return false;
+ default:
+ return true;
+ }
+ }
+}
+
+static bool
+parser_parse_object(parser_t *parser) {
+ assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
+ "Object should start with {");
+ if (parser_tokenize(parser)) {
+ return true;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_STRING:
+ return parser_parse_pairs(parser);
+ case TOKEN_TYPE_RBRACE:
+ return false;
+ default:
+ return true;
+ }
+ not_reached();
+}
+
+static bool
+parser_parse(parser_t *parser) {
+ if (parser_tokenize(parser)) {
+ goto label_error;
+ }
+ if (parser_parse_value(parser)) {
+ goto label_error;
+ }
+
+ if (parser_tokenize(parser)) {
+ goto label_error;
+ }
+ switch (parser->token.token_type) {
+ case TOKEN_TYPE_EOI:
+ return false;
+ default:
+ goto label_error;
+ }
+ not_reached();
+
+label_error:
+ token_error(&parser->token);
+ return true;
+}
+
+TEST_BEGIN(test_json_parser) {
+ size_t i;
+ const char *invalid_inputs[] = {
+ /* Tokenizer error case tests. */
+ "{ \"string\": X }",
+ "{ \"string\": nXll }",
+ "{ \"string\": nuXl }",
+ "{ \"string\": nulX }",
+ "{ \"string\": nullX }",
+ "{ \"string\": fXlse }",
+ "{ \"string\": faXse }",
+ "{ \"string\": falXe }",
+ "{ \"string\": falsX }",
+ "{ \"string\": falseX }",
+ "{ \"string\": tXue }",
+ "{ \"string\": trXe }",
+ "{ \"string\": truX }",
+ "{ \"string\": trueX }",
+ "{ \"string\": \"\n\" }",
+ "{ \"string\": \"\\z\" }",
+ "{ \"string\": \"\\uX000\" }",
+ "{ \"string\": \"\\u0X00\" }",
+ "{ \"string\": \"\\u00X0\" }",
+ "{ \"string\": \"\\u000X\" }",
+ "{ \"string\": -X }",
+ "{ \"string\": 0.X }",
+ "{ \"string\": 0.0eX }",
+ "{ \"string\": 0.0e+X }",
+
+ /* Parser error test cases. */
+ "{\"string\": }",
+ "{\"string\" }",
+ "{\"string\": [ 0 }",
+ "{\"string\": {\"a\":0, 1 } }",
+ "{\"string\": {\"a\":0: } }",
+ "{",
+ "{}{",
+ };
+ const char *valid_inputs[] = {
+ /* Token tests. */
+ "null",
+ "false",
+ "true",
+ "{}",
+ "{\"a\": 0}",
+ "[]",
+ "[0, 1]",
+ "0",
+ "1",
+ "10",
+ "-10",
+ "10.23",
+ "10.23e4",
+ "10.23e-4",
+ "10.23e+4",
+ "10.23E4",
+ "10.23E-4",
+ "10.23E+4",
+ "-10.23",
+ "-10.23e4",
+ "-10.23e-4",
+ "-10.23e+4",
+ "-10.23E4",
+ "-10.23E-4",
+ "-10.23E+4",
+ "\"value\"",
+ "\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+
+ /* Parser test with various nesting. */
+ "{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+ };
+
+ for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+ const char *input = invalid_inputs[i];
+ parser_t parser;
+ parser_init(&parser, false);
+ assert_false(parser_append(&parser, input),
+ "Unexpected input appending failure");
+ assert_true(parser_parse(&parser),
+ "Unexpected parse success for input: %s", input);
+ parser_fini(&parser);
+ }
+
+ for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+ const char *input = valid_inputs[i];
+ parser_t parser;
+ parser_init(&parser, true);
+ assert_false(parser_append(&parser, input),
+ "Unexpected input appending failure");
+ assert_false(parser_parse(&parser),
+ "Unexpected parse error for input: %s", input);
+ parser_fini(&parser);
+ }
+}
+TEST_END
+
+void
+write_cb(void *opaque, const char *str) {
+ parser_t *parser = (parser_t *)opaque;
+ if (parser_append(parser, str)) {
+ test_fail("Unexpected input appending failure");
+ }
+}
+
+TEST_BEGIN(test_stats_print_json) {
+ const char *opts[] = {
+ "J",
+ "Jg",
+ "Jm",
+ "Jd",
+ "Jmd",
+ "Jgd",
+ "Jgm",
+ "Jgmd",
+ "Ja",
+ "Jb",
+ "Jl",
+ "Jx",
+ "Jbl",
+ "Jal",
+ "Jab",
+ "Jabl",
+ "Jax",
+ "Jbx",
+ "Jlx",
+ "Jablx",
+ "Jgmdablx",
+ };
+ unsigned arena_ind, i;
+
+ for (i = 0; i < 3; i++) {
+ unsigned j;
+
+ switch (i) {
+ case 0:
+ break;
+ case 1: {
+ size_t sz = sizeof(arena_ind);
+ assert_d_eq(mallctl("arenas.create", (void *)&arena_ind,
+ &sz, NULL, 0), 0, "Unexpected mallctl failure");
+ break;
+ } case 2: {
+ size_t mib[3];
+ size_t miblen = sizeof(mib)/sizeof(size_t);
+ assert_d_eq(mallctlnametomib("arena.0.destroy",
+ mib, &miblen), 0,
+ "Unexpected mallctlnametomib failure");
+ mib[1] = arena_ind;
+ assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
+ 0), 0, "Unexpected mallctlbymib failure");
+ break;
+ } default:
+ not_reached();
+ }
+
+ for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+ parser_t parser;
+
+ parser_init(&parser, true);
+ malloc_stats_print(write_cb, (void *)&parser, opts[j]);
+ assert_false(parser_parse(&parser),
+ "Unexpected parse error, opts=\"%s\"", opts[j]);
+ parser_fini(&parser);
+ }
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_json_parser,
+ test_stats_print_json);
+}
diff --git a/deps/jemalloc/test/unit/ticker.c b/deps/jemalloc/test/unit/ticker.c
new file mode 100644
index 000000000..e5790a316
--- /dev/null
+++ b/deps/jemalloc/test/unit/ticker.c
@@ -0,0 +1,73 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ticker.h"
+
+TEST_BEGIN(test_ticker_tick) {
+#define NREPS 2
+#define NTICKS 3
+ ticker_t ticker;
+ int32_t i, j;
+
+ ticker_init(&ticker, NTICKS);
+ for (i = 0; i < NREPS; i++) {
+ for (j = 0; j < NTICKS; j++) {
+ assert_u_eq(ticker_read(&ticker), NTICKS - j,
+ "Unexpected ticker value (i=%d, j=%d)", i, j);
+ assert_false(ticker_tick(&ticker),
+ "Unexpected ticker fire (i=%d, j=%d)", i, j);
+ }
+ assert_u32_eq(ticker_read(&ticker), 0,
+ "Expected ticker depletion");
+ assert_true(ticker_tick(&ticker),
+ "Expected ticker fire (i=%d)", i);
+ assert_u32_eq(ticker_read(&ticker), NTICKS,
+ "Expected ticker reset");
+ }
+#undef NTICKS
+}
+TEST_END
+
+TEST_BEGIN(test_ticker_ticks) {
+#define NTICKS 3
+ ticker_t ticker;
+
+ ticker_init(&ticker, NTICKS);
+
+ assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+ assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+ assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
+ assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+ assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+
+ assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+ assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+#undef NTICKS
+}
+TEST_END
+
+TEST_BEGIN(test_ticker_copy) {
+#define NTICKS 3
+ ticker_t ta, tb;
+
+ ticker_init(&ta, NTICKS);
+ ticker_copy(&tb, &ta);
+ assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+ assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+ assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+
+ ticker_tick(&ta);
+ ticker_copy(&tb, &ta);
+ assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
+ assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+ assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+#undef NTICKS
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_ticker_tick,
+ test_ticker_ticks,
+ test_ticker_copy);
+}
diff --git a/deps/jemalloc/test/unit/tsd.c b/deps/jemalloc/test/unit/tsd.c
index f421c1a3c..6c479139b 100644
--- a/deps/jemalloc/test/unit/tsd.c
+++ b/deps/jemalloc/test/unit/tsd.c
@@ -1,71 +1,139 @@
#include "test/jemalloc_test.h"
-#define THREAD_DATA 0x72b65c10
-
-typedef unsigned int data_t;
-
-static bool data_cleanup_executed;
+static int data_cleanup_count;
void
-data_cleanup(void *arg)
-{
- data_t *data = (data_t *)arg;
-
- assert_x_eq(*data, THREAD_DATA,
- "Argument passed into cleanup function should match tsd value");
- data_cleanup_executed = true;
+data_cleanup(int *data) {
+ if (data_cleanup_count == 0) {
+ assert_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
+ "Argument passed into cleanup function should match tsd "
+ "value");
+ }
+ ++data_cleanup_count;
+
+ /*
+ * Allocate during cleanup for two rounds, in order to assure that
+ * jemalloc's internal tsd reinitialization happens.
+ */
+ bool reincarnate = false;
+ switch (*data) {
+ case MALLOC_TSD_TEST_DATA_INIT:
+ *data = 1;
+ reincarnate = true;
+ break;
+ case 1:
+ *data = 2;
+ reincarnate = true;
+ break;
+ case 2:
+ return;
+ default:
+ not_reached();
+ }
+
+ if (reincarnate) {
+ void *p = mallocx(1, 0);
+ assert_ptr_not_null(p, "Unexpeced mallocx() failure");
+ dallocx(p, 0);
+ }
}
-malloc_tsd_protos(, data, data_t)
-malloc_tsd_externs(data, data_t)
-#define DATA_INIT 0x12345678
-malloc_tsd_data(, data, data_t, DATA_INIT)
-malloc_tsd_funcs(, data, data_t, DATA_INIT, data_cleanup)
-
static void *
-thd_start(void *arg)
-{
- data_t d = (data_t)(uintptr_t)arg;
- assert_x_eq(*data_tsd_get(), DATA_INIT,
+thd_start(void *arg) {
+ int d = (int)(uintptr_t)arg;
+ void *p;
+
+ tsd_t *tsd = tsd_fetch();
+ assert_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
"Initial tsd get should return initialization value");
- data_tsd_set(&d);
- assert_x_eq(*data_tsd_get(), d,
+ p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+ tsd_test_data_set(tsd, d);
+ assert_x_eq(tsd_test_data_get(tsd), d,
"After tsd set, tsd get should return value that was set");
d = 0;
- assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
+ assert_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
"Resetting local data should have no effect on tsd");
- return (NULL);
-}
+ tsd_test_callback_set(tsd, &data_cleanup);
-TEST_BEGIN(test_tsd_main_thread)
-{
+ free(p);
+ return NULL;
+}
- thd_start((void *) 0xa5f3e329);
+TEST_BEGIN(test_tsd_main_thread) {
+ thd_start((void *)(uintptr_t)0xa5f3e329);
}
TEST_END
-TEST_BEGIN(test_tsd_sub_thread)
-{
+TEST_BEGIN(test_tsd_sub_thread) {
thd_t thd;
- data_cleanup_executed = false;
- thd_create(&thd, thd_start, (void *)THREAD_DATA);
+ data_cleanup_count = 0;
+ thd_create(&thd, thd_start, (void *)MALLOC_TSD_TEST_DATA_INIT);
thd_join(thd, NULL);
- assert_true(data_cleanup_executed,
- "Cleanup function should have executed");
+ /*
+ * We reincarnate twice in the data cleanup, so it should execute at
+ * least 3 times.
+ */
+ assert_x_ge(data_cleanup_count, 3,
+ "Cleanup function should have executed multiple times.");
}
TEST_END
-int
-main(void)
-{
+static void *
+thd_start_reincarnated(void *arg) {
+ tsd_t *tsd = tsd_fetch();
+ assert(tsd);
+
+ void *p = malloc(1);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+ /* Manually trigger reincarnation. */
+ assert_ptr_not_null(tsd_arena_get(tsd),
+ "Should have tsd arena set.");
+ tsd_cleanup((void *)tsd);
+ assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+ "TSD arena should have been cleared.");
+ assert_u_eq(tsd->state, tsd_state_purgatory,
+ "TSD state should be purgatory\n");
+
+ free(p);
+ assert_u_eq(tsd->state, tsd_state_reincarnated,
+ "TSD state should be reincarnated\n");
+ p = mallocx(1, MALLOCX_TCACHE_NONE);
+ assert_ptr_not_null(p, "Unexpected malloc() failure");
+ assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+ "Should not have tsd arena set after reincarnation.");
+
+ free(p);
+ tsd_cleanup((void *)tsd);
+ assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+ "TSD arena should have been cleared after 2nd cleanup.");
+
+ return NULL;
+}
- data_tsd_boot();
+TEST_BEGIN(test_tsd_reincarnation) {
+ thd_t thd;
+ thd_create(&thd, thd_start_reincarnated, NULL);
+ thd_join(thd, NULL);
+}
+TEST_END
- return (test(
+int
+main(void) {
+ /* Ensure tsd bootstrapped. */
+ if (nallocx(1, 0) == 0) {
+ malloc_printf("Initialization error");
+ return test_status_fail;
+ }
+
+ return test_no_reentrancy(
test_tsd_main_thread,
- test_tsd_sub_thread));
+ test_tsd_sub_thread,
+ test_tsd_reincarnation);
}
diff --git a/deps/jemalloc/test/unit/witness.c b/deps/jemalloc/test/unit/witness.c
new file mode 100644
index 000000000..5986da400
--- /dev/null
+++ b/deps/jemalloc/test/unit/witness.c
@@ -0,0 +1,280 @@
+#include "test/jemalloc_test.h"
+
+static witness_lock_error_t *witness_lock_error_orig;
+static witness_owner_error_t *witness_owner_error_orig;
+static witness_not_owner_error_t *witness_not_owner_error_orig;
+static witness_depth_error_t *witness_depth_error_orig;
+
+static bool saw_lock_error;
+static bool saw_owner_error;
+static bool saw_not_owner_error;
+static bool saw_depth_error;
+
+static void
+witness_lock_error_intercept(const witness_list_t *witnesses,
+ const witness_t *witness) {
+ saw_lock_error = true;
+}
+
+static void
+witness_owner_error_intercept(const witness_t *witness) {
+ saw_owner_error = true;
+}
+
+static void
+witness_not_owner_error_intercept(const witness_t *witness) {
+ saw_not_owner_error = true;
+}
+
+static void
+witness_depth_error_intercept(const witness_list_t *witnesses,
+ witness_rank_t rank_inclusive, unsigned depth) {
+ saw_depth_error = true;
+}
+
+static int
+witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
+ assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+ assert(oa == (void *)a);
+ assert(ob == (void *)b);
+
+ return strcmp(a->name, b->name);
+}
+
+static int
+witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
+ void *ob) {
+ assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+ assert(oa == (void *)a);
+ assert(ob == (void *)b);
+
+ return -strcmp(a->name, b->name);
+}
+
+TEST_BEGIN(test_witness) {
+ witness_t a, b;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 0);
+
+ witness_init(&a, "a", 1, NULL, NULL);
+ witness_assert_not_owner(&witness_tsdn, &a);
+ witness_lock(&witness_tsdn, &a);
+ witness_assert_owner(&witness_tsdn, &a);
+ witness_assert_depth(&witness_tsdn, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 0);
+
+ witness_init(&b, "b", 2, NULL, NULL);
+ witness_assert_not_owner(&witness_tsdn, &b);
+ witness_lock(&witness_tsdn, &b);
+ witness_assert_owner(&witness_tsdn, &b);
+ witness_assert_depth(&witness_tsdn, 2);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 2);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)3U, 0);
+
+ witness_unlock(&witness_tsdn, &a);
+ witness_assert_depth(&witness_tsdn, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 1);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)3U, 0);
+ witness_unlock(&witness_tsdn, &b);
+
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+ witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_witness_comp) {
+ witness_t a, b, c, d;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_init(&a, "a", 1, witness_comp, &a);
+ witness_assert_not_owner(&witness_tsdn, &a);
+ witness_lock(&witness_tsdn, &a);
+ witness_assert_owner(&witness_tsdn, &a);
+ witness_assert_depth(&witness_tsdn, 1);
+
+ witness_init(&b, "b", 1, witness_comp, &b);
+ witness_assert_not_owner(&witness_tsdn, &b);
+ witness_lock(&witness_tsdn, &b);
+ witness_assert_owner(&witness_tsdn, &b);
+ witness_assert_depth(&witness_tsdn, 2);
+ witness_unlock(&witness_tsdn, &b);
+ witness_assert_depth(&witness_tsdn, 1);
+
+ witness_lock_error_orig = witness_lock_error;
+ witness_lock_error = witness_lock_error_intercept;
+ saw_lock_error = false;
+
+ witness_init(&c, "c", 1, witness_comp_reverse, &c);
+ witness_assert_not_owner(&witness_tsdn, &c);
+ assert_false(saw_lock_error, "Unexpected witness lock error");
+ witness_lock(&witness_tsdn, &c);
+ assert_true(saw_lock_error, "Expected witness lock error");
+ witness_unlock(&witness_tsdn, &c);
+ witness_assert_depth(&witness_tsdn, 1);
+
+ saw_lock_error = false;
+
+ witness_init(&d, "d", 1, NULL, NULL);
+ witness_assert_not_owner(&witness_tsdn, &d);
+ assert_false(saw_lock_error, "Unexpected witness lock error");
+ witness_lock(&witness_tsdn, &d);
+ assert_true(saw_lock_error, "Expected witness lock error");
+ witness_unlock(&witness_tsdn, &d);
+ witness_assert_depth(&witness_tsdn, 1);
+
+ witness_unlock(&witness_tsdn, &a);
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_reversal) {
+ witness_t a, b;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_lock_error_orig = witness_lock_error;
+ witness_lock_error = witness_lock_error_intercept;
+ saw_lock_error = false;
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_init(&a, "a", 1, NULL, NULL);
+ witness_init(&b, "b", 2, NULL, NULL);
+
+ witness_lock(&witness_tsdn, &b);
+ witness_assert_depth(&witness_tsdn, 1);
+ assert_false(saw_lock_error, "Unexpected witness lock error");
+ witness_lock(&witness_tsdn, &a);
+ assert_true(saw_lock_error, "Expected witness lock error");
+
+ witness_unlock(&witness_tsdn, &a);
+ witness_assert_depth(&witness_tsdn, 1);
+ witness_unlock(&witness_tsdn, &b);
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_recursive) {
+ witness_t a;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_not_owner_error_orig = witness_not_owner_error;
+ witness_not_owner_error = witness_not_owner_error_intercept;
+ saw_not_owner_error = false;
+
+ witness_lock_error_orig = witness_lock_error;
+ witness_lock_error = witness_lock_error_intercept;
+ saw_lock_error = false;
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_init(&a, "a", 1, NULL, NULL);
+
+ witness_lock(&witness_tsdn, &a);
+ assert_false(saw_lock_error, "Unexpected witness lock error");
+ assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+ witness_lock(&witness_tsdn, &a);
+ assert_true(saw_lock_error, "Expected witness lock error");
+ assert_true(saw_not_owner_error, "Expected witness not owner error");
+
+ witness_unlock(&witness_tsdn, &a);
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_owner_error = witness_owner_error_orig;
+ witness_lock_error = witness_lock_error_orig;
+
+}
+TEST_END
+
+TEST_BEGIN(test_witness_unlock_not_owned) {
+ witness_t a;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_owner_error_orig = witness_owner_error;
+ witness_owner_error = witness_owner_error_intercept;
+ saw_owner_error = false;
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_init(&a, "a", 1, NULL, NULL);
+
+ assert_false(saw_owner_error, "Unexpected owner error");
+ witness_unlock(&witness_tsdn, &a);
+ assert_true(saw_owner_error, "Expected owner error");
+
+ witness_assert_lockless(&witness_tsdn);
+
+ witness_owner_error = witness_owner_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_depth) {
+ witness_t a;
+ witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+
+ test_skip_if(!config_debug);
+
+ witness_depth_error_orig = witness_depth_error;
+ witness_depth_error = witness_depth_error_intercept;
+ saw_depth_error = false;
+
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+
+ witness_init(&a, "a", 1, NULL, NULL);
+
+ assert_false(saw_depth_error, "Unexpected depth error");
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+
+ witness_lock(&witness_tsdn, &a);
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+ assert_true(saw_depth_error, "Expected depth error");
+
+ witness_unlock(&witness_tsdn, &a);
+
+ witness_assert_lockless(&witness_tsdn);
+ witness_assert_depth(&witness_tsdn, 0);
+
+ witness_depth_error = witness_depth_error_orig;
+}
+TEST_END
+
+int
+main(void) {
+ return test(
+ test_witness,
+ test_witness_comp,
+ test_witness_reversal,
+ test_witness_recursive,
+ test_witness_unlock_not_owned,
+ test_witness_depth);
+}
diff --git a/deps/jemalloc/test/unit/zero.c b/deps/jemalloc/test/unit/zero.c
index 65a8f0c9c..553692ba7 100644
--- a/deps/jemalloc/test/unit/zero.c
+++ b/deps/jemalloc/test/unit/zero.c
@@ -1,78 +1,59 @@
#include "test/jemalloc_test.h"
-#ifdef JEMALLOC_FILL
-const char *malloc_conf =
- "abort:false,junk:false,zero:true,redzone:false,quarantine:0";
-#endif
-
static void
-test_zero(size_t sz_min, size_t sz_max)
-{
- char *s;
+test_zero(size_t sz_min, size_t sz_max) {
+ uint8_t *s;
size_t sz_prev, sz, i;
+#define MAGIC ((uint8_t)0x61)
sz_prev = 0;
- s = (char *)mallocx(sz_min, 0);
+ s = (uint8_t *)mallocx(sz_min, 0);
assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
for (sz = sallocx(s, 0); sz <= sz_max;
sz_prev = sz, sz = sallocx(s, 0)) {
if (sz_prev > 0) {
- assert_c_eq(s[0], 'a',
+ assert_u_eq(s[0], MAGIC,
"Previously allocated byte %zu/%zu is corrupted",
ZU(0), sz_prev);
- assert_c_eq(s[sz_prev-1], 'a',
+ assert_u_eq(s[sz_prev-1], MAGIC,
"Previously allocated byte %zu/%zu is corrupted",
sz_prev-1, sz_prev);
}
for (i = sz_prev; i < sz; i++) {
- assert_c_eq(s[i], 0x0,
+ assert_u_eq(s[i], 0x0,
"Newly allocated byte %zu/%zu isn't zero-filled",
i, sz);
- s[i] = 'a';
+ s[i] = MAGIC;
}
if (xallocx(s, sz+1, 0, 0) == sz) {
- s = (char *)rallocx(s, sz+1, 0);
+ s = (uint8_t *)rallocx(s, sz+1, 0);
assert_ptr_not_null((void *)s,
"Unexpected rallocx() failure");
}
}
dallocx(s, 0);
+#undef MAGIC
}
-TEST_BEGIN(test_zero_small)
-{
-
+TEST_BEGIN(test_zero_small) {
test_skip_if(!config_fill);
test_zero(1, SMALL_MAXCLASS-1);
}
TEST_END
-TEST_BEGIN(test_zero_large)
-{
-
+TEST_BEGIN(test_zero_large) {
test_skip_if(!config_fill);
- test_zero(SMALL_MAXCLASS+1, arena_maxclass);
-}
-TEST_END
-
-TEST_BEGIN(test_zero_huge)
-{
-
- test_skip_if(!config_fill);
- test_zero(arena_maxclass+1, chunksize*2);
+ test_zero(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
}
TEST_END
int
-main(void)
-{
-
- return (test(
+main(void) {
+ return test(
test_zero_small,
- test_zero_large,
- test_zero_huge));
+ test_zero_large);
}
diff --git a/deps/jemalloc/test/unit/zero.sh b/deps/jemalloc/test/unit/zero.sh
new file mode 100644
index 000000000..b4540b27e
--- /dev/null
+++ b/deps/jemalloc/test/unit/zero.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+ export MALLOC_CONF="abort:false,junk:false,zero:true"
+fi
diff --git a/deps/linenoise/README.markdown b/deps/linenoise/README.markdown
index c845673cd..e01642cf8 100644
--- a/deps/linenoise/README.markdown
+++ b/deps/linenoise/README.markdown
@@ -6,6 +6,7 @@ MongoDB, and Android.
* Single and multi line editing mode with the usual key bindings implemented.
* History handling.
* Completion.
+* Hints (suggestions at the right of the prompt as you type).
* About 1,100 lines of BSD license source code.
* Only uses a subset of VT100 escapes (ANSI.SYS compatible).
@@ -20,7 +21,7 @@ So what usually happens is either:
The result is a pollution of binaries without line editing support.
-So I spent more or less two hours doing a reality check resulting in this little library: is it *really* needed for a line editing library to be 20k lines of code? Apparently not, it is possibe to get a very small, zero configuration, trivial to embed library, that solves the problem. Smaller programs will just include this, supporing line editing out of the box. Larger programs may use this little library or just checking with configure if readline/libedit is available and resorting to linenoise if not.
+So I spent more or less two hours doing a reality check resulting in this little library: is it *really* needed for a line editing library to be 20k lines of code? Apparently not, it is possibe to get a very small, zero configuration, trivial to embed library, that solves the problem. Smaller programs will just include this, supporing line editing out of the box. Larger programs may use this little library or just checking with configure if readline/libedit is available and resorting to Linenoise if not.
## Terminals, in 2010.
@@ -41,12 +42,183 @@ The library is currently about 1100 lines of code. In order to use it in your pr
* IBM AIX 6.1
* FreeBSD xterm ($TERM = xterm)
* ANSI.SYS
+ * Emacs comint mode ($TERM = dumb)
Please test it everywhere you can and report back!
## Let's push this forward!
-Patches should be provided in the respect of linenoise sensibility for small
+Patches should be provided in the respect of Linenoise sensibility for small
easy to understand code.
Send feedbacks to antirez at gmail
+
+# The API
+
+Linenoise is very easy to use, and reading the example shipped with the
+library should get you up to speed ASAP. Here is a list of API calls
+and how to use them.
+
+ char *linenoise(const char *prompt);
+
+This is the main Linenoise call: it shows the user a prompt with line editing
+and history capabilities. The prompt you specify is used as a prompt, that is,
+it will be printed to the left of the cursor. The library returns a buffer
+with the line composed by the user, or NULL on end of file or when there
+is an out of memory condition.
+
+When a tty is detected (the user is actually typing into a terminal session)
+the maximum editable line length is `LINENOISE_MAX_LINE`. When instead the
+standard input is not a tty, which happens every time you redirect a file
+to a program, or use it in an Unix pipeline, there are no limits to the
+length of the line that can be returned.
+
+The returned line should be freed with the `free()` standard system call.
+However sometimes it could happen that your program uses a different dynamic
+allocation library, so you may also used `linenoiseFree` to make sure the
+line is freed with the same allocator it was created.
+
+The canonical loop used by a program using Linenoise will be something like
+this:
+
+ while((line = linenoise("hello> ")) != NULL) {
+ printf("You wrote: %s\n", line);
+ linenoiseFree(line); /* Or just free(line) if you use libc malloc. */
+ }
+
+## Single line VS multi line editing
+
+By default, Linenoise uses single line editing, that is, a single row on the
+screen will be used, and as the user types more, the text will scroll towards
+left to make room. This works if your program is one where the user is
+unlikely to write a lot of text, otherwise multi line editing, where multiple
+screens rows are used, can be a lot more comfortable.
+
+In order to enable multi line editing use the following API call:
+
+ linenoiseSetMultiLine(1);
+
+You can disable it using `0` as argument.
+
+## History
+
+Linenoise supporst history, so that the user does not have to retype
+again and again the same things, but can use the down and up arrows in order
+to search and re-edit already inserted lines of text.
+
+The followings are the history API calls:
+
+ int linenoiseHistoryAdd(const char *line);
+ int linenoiseHistorySetMaxLen(int len);
+ int linenoiseHistorySave(const char *filename);
+ int linenoiseHistoryLoad(const char *filename);
+
+Use `linenoiseHistoryAdd` every time you want to add a new element
+to the top of the history (it will be the first the user will see when
+using the up arrow).
+
+Note that for history to work, you have to set a length for the history
+(which is zero by default, so history will be disabled if you don't set
+a proper one). This is accomplished using the `linenoiseHistorySetMaxLen`
+function.
+
+Linenoise has direct support for persisting the history into an history
+file. The functions `linenoiseHistorySave` and `linenoiseHistoryLoad` do
+just that. Both functions return -1 on error and 0 on success.
+
+## Completion
+
+Linenoise supports completion, which is the ability to complete the user
+input when she or he presses the `<TAB>` key.
+
+In order to use completion, you need to register a completion callback, which
+is called every time the user presses `<TAB>`. Your callback will return a
+list of items that are completions for the current string.
+
+The following is an example of registering a completion callback:
+
+ linenoiseSetCompletionCallback(completion);
+
+The completion must be a function returning `void` and getting as input
+a `const char` pointer, which is the line the user has typed so far, and
+a `linenoiseCompletions` object pointer, which is used as argument of
+`linenoiseAddCompletion` in order to add completions inside the callback.
+An example will make it more clear:
+
+ void completion(const char *buf, linenoiseCompletions *lc) {
+ if (buf[0] == 'h') {
+ linenoiseAddCompletion(lc,"hello");
+ linenoiseAddCompletion(lc,"hello there");
+ }
+ }
+
+Basically in your completion callback, you inspect the input, and return
+a list of items that are good completions by using `linenoiseAddCompletion`.
+
+If you want to test the completion feature, compile the example program
+with `make`, run it, type `h` and press `<TAB>`.
+
+## Hints
+
+Linenoise has a feature called *hints* which is very useful when you
+use Linenoise in order to implement a REPL (Read Eval Print Loop) for
+a program that accepts commands and arguments, but may also be useful in
+other conditions.
+
+The feature shows, on the right of the cursor, as the user types, hints that
+may be useful. The hints can be displayed using a different color compared
+to the color the user is typing, and can also be bold.
+
+For example as the user starts to type `"git remote add"`, with hints it's
+possible to show on the right of the prompt a string `<name> <url>`.
+
+The feature works similarly to the history feature, using a callback.
+To register the callback we use:
+
+ linenoiseSetHintsCallback(hints);
+
+The callback itself is implemented like this:
+
+ char *hints(const char *buf, int *color, int *bold) {
+ if (!strcasecmp(buf,"git remote add")) {
+ *color = 35;
+ *bold = 0;
+ return " <name> <url>";
+ }
+ return NULL;
+ }
+
+The callback function returns the string that should be displayed or NULL
+if no hint is available for the text the user currently typed. The returned
+string will be trimmed as needed depending on the number of columns available
+on the screen.
+
+It is possible to return a string allocated in dynamic way, by also registering
+a function to deallocate the hint string once used:
+
+ void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
+
+The free hint callback will just receive the pointer and free the string
+as needed (depending on how the hits callback allocated it).
+
+As you can see in the example above, a `color` (in xterm color terminal codes)
+can be provided together with a `bold` attribute. If no color is set, the
+current terminal foreground color is used. If no bold attribute is set,
+non-bold text is printed.
+
+Color codes are:
+
+ red = 31
+ green = 32
+ yellow = 33
+ blue = 34
+ magenta = 35
+ cyan = 36
+ white = 37;
+
+## Screen handling
+
+Sometimes you may want to clear the screen as a result of something the
+user typed. You can do this by calling the following function:
+
+ void linenoiseClearScreen(void);
diff --git a/deps/linenoise/example.c b/deps/linenoise/example.c
index a2f0936ed..3a544d3c6 100644
--- a/deps/linenoise/example.c
+++ b/deps/linenoise/example.c
@@ -11,6 +11,15 @@ void completion(const char *buf, linenoiseCompletions *lc) {
}
}
+char *hints(const char *buf, int *color, int *bold) {
+ if (!strcasecmp(buf,"hello")) {
+ *color = 35;
+ *bold = 0;
+ return " World";
+ }
+ return NULL;
+}
+
int main(int argc, char **argv) {
char *line;
char *prgname = argv[0];
@@ -34,6 +43,7 @@ int main(int argc, char **argv) {
/* Set the completion callback. This will be called every time the
* user uses the <tab> key. */
linenoiseSetCompletionCallback(completion);
+ linenoiseSetHintsCallback(hints);
/* Load history from file. The history file is just a plain text file
* where entries are separated by newlines. */
diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c
index 36c0c5f6d..fce14a7c5 100644
--- a/deps/linenoise/linenoise.c
+++ b/deps/linenoise/linenoise.c
@@ -10,7 +10,7 @@
*
* ------------------------------------------------------------------------
*
- * Copyright (c) 2010-2013, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
*
* All rights reserved.
@@ -111,6 +111,7 @@
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <unistd.h>
@@ -120,6 +121,8 @@
#define LINENOISE_MAX_LINE 4096
static char *unsupported_term[] = {"dumb","cons25","emacs",NULL};
static linenoiseCompletionCallback *completionCallback = NULL;
+static linenoiseHintsCallback *hintsCallback = NULL;
+static linenoiseFreeHintsCallback *freeHintsCallback = NULL;
static struct termios orig_termios; /* In order to restore at exit.*/
static int rawmode = 0; /* For atexit() function to check if restore is needed*/
@@ -407,6 +410,18 @@ void linenoiseSetCompletionCallback(linenoiseCompletionCallback *fn) {
completionCallback = fn;
}
+/* Register a hits function to be called to show hits to the user at the
+ * right of the prompt. */
+void linenoiseSetHintsCallback(linenoiseHintsCallback *fn) {
+ hintsCallback = fn;
+}
+
+/* Register a function to free the hints returned by the hints callback
+ * registered with linenoiseSetHintsCallback(). */
+void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *fn) {
+ freeHintsCallback = fn;
+}
+
/* This function is used by the callback function registered by the user
* in order to add completion options given the input string when the
* user typed <tab>. See the example.c source code for a very easy to
@@ -456,6 +471,30 @@ static void abFree(struct abuf *ab) {
free(ab->b);
}
+/* Helper of refreshSingleLine() and refreshMultiLine() to show hints
+ * to the right of the prompt. */
+void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int plen) {
+ char seq[64];
+ if (hintsCallback && plen+l->len < l->cols) {
+ int color = -1, bold = 0;
+ char *hint = hintsCallback(l->buf,&color,&bold);
+ if (hint) {
+ int hintlen = strlen(hint);
+ int hintmaxlen = l->cols-(plen+l->len);
+ if (hintlen > hintmaxlen) hintlen = hintmaxlen;
+ if (bold == 1 && color == -1) color = 37;
+ if (color != -1 || bold != 0)
+ snprintf(seq,64,"\033[%d;%d;49m",bold,color);
+ abAppend(ab,seq,strlen(seq));
+ abAppend(ab,hint,hintlen);
+ if (color != -1 || bold != 0)
+ abAppend(ab,"\033[0m",4);
+ /* Call the function to free the hint returned. */
+ if (freeHintsCallback) freeHintsCallback(hint);
+ }
+ }
+}
+
/* Single line low level line refresh.
*
* Rewrite the currently edited line accordingly to the buffer content,
@@ -485,6 +524,8 @@ static void refreshSingleLine(struct linenoiseState *l) {
/* Write the prompt and the current buffer content */
abAppend(&ab,l->prompt,strlen(l->prompt));
abAppend(&ab,buf,len);
+ /* Show hits if any. */
+ refreshShowHints(&ab,l,plen);
/* Erase to right */
snprintf(seq,64,"\x1b[0K");
abAppend(&ab,seq,strlen(seq));
@@ -538,6 +579,9 @@ static void refreshMultiLine(struct linenoiseState *l) {
abAppend(&ab,l->prompt,strlen(l->prompt));
abAppend(&ab,l->buf,l->len);
+ /* Show hits if any. */
+ refreshShowHints(&ab,l,plen);
+
/* If we are at the very end of the screen with our prompt, we need to
* emit a newline and move the prompt to the first column. */
if (l->pos &&
@@ -598,7 +642,7 @@ int linenoiseEditInsert(struct linenoiseState *l, char c) {
l->pos++;
l->len++;
l->buf[l->len] = '\0';
- if ((!mlmode && l->plen+l->len < l->cols) /* || mlmode */) {
+ if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) {
/* Avoid a full update of the line in the
* trivial case. */
if (write(l->ofd,&c,1) == -1) return -1;
@@ -772,6 +816,14 @@ static int linenoiseEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen,
history_len--;
free(history[history_len]);
if (mlmode) linenoiseEditMoveEnd(&l);
+ if (hintsCallback) {
+ /* Force a refresh without hints to leave the previous
+ * line as the user typed it after a newline. */
+ linenoiseHintsCallback *hc = hintsCallback;
+ hintsCallback = NULL;
+ refreshLine(&l);
+ hintsCallback = hc;
+ }
return (int)l.len;
case CTRL_C: /* ctrl-c */
errno = EAGAIN;
@@ -934,22 +986,48 @@ static int linenoiseRaw(char *buf, size_t buflen, const char *prompt) {
errno = EINVAL;
return -1;
}
- if (!isatty(STDIN_FILENO)) {
- /* Not a tty: read from file / pipe. */
- if (fgets(buf, buflen, stdin) == NULL) return -1;
- count = strlen(buf);
- if (count && buf[count-1] == '\n') {
- count--;
- buf[count] = '\0';
+
+ if (enableRawMode(STDIN_FILENO) == -1) return -1;
+ count = linenoiseEdit(STDIN_FILENO, STDOUT_FILENO, buf, buflen, prompt);
+ disableRawMode(STDIN_FILENO);
+ printf("\n");
+ return count;
+}
+
+/* This function is called when linenoise() is called with the standard
+ * input file descriptor not attached to a TTY. So for example when the
+ * program using linenoise is called in pipe or with a file redirected
+ * to its standard input. In this case, we want to be able to return the
+ * line regardless of its length (by default we are limited to 4k). */
+static char *linenoiseNoTTY(void) {
+ char *line = NULL;
+ size_t len = 0, maxlen = 0;
+
+ while(1) {
+ if (len == maxlen) {
+ if (maxlen == 0) maxlen = 16;
+ maxlen *= 2;
+ char *oldval = line;
+ line = realloc(line,maxlen);
+ if (line == NULL) {
+ if (oldval) free(oldval);
+ return NULL;
+ }
+ }
+ int c = fgetc(stdin);
+ if (c == EOF || c == '\n') {
+ if (c == EOF && len == 0) {
+ free(line);
+ return NULL;
+ } else {
+ line[len] = '\0';
+ return line;
+ }
+ } else {
+ line[len] = c;
+ len++;
}
- } else {
- /* Interactive editing. */
- if (enableRawMode(STDIN_FILENO) == -1) return -1;
- count = linenoiseEdit(STDIN_FILENO, STDOUT_FILENO, buf, buflen, prompt);
- disableRawMode(STDIN_FILENO);
- printf("\n");
}
- return count;
}
/* The high level function that is the main API of the linenoise library.
@@ -961,7 +1039,11 @@ char *linenoise(const char *prompt) {
char buf[LINENOISE_MAX_LINE];
int count;
- if (isUnsupportedTerm()) {
+ if (!isatty(STDIN_FILENO)) {
+ /* Not a tty: read from file / pipe. In this mode we don't want any
+ * limit to the line size, so we call a function to handle that. */
+ return linenoiseNoTTY();
+ } else if (isUnsupportedTerm()) {
size_t len;
printf("%s",prompt);
@@ -980,6 +1062,14 @@ char *linenoise(const char *prompt) {
}
}
+/* This is just a wrapper the user may want to call in order to make sure
+ * the linenoise returned buffer is freed with the same allocator it was
+ * created with. Useful when the main program is using an alternative
+ * allocator. */
+void linenoiseFree(void *ptr) {
+ free(ptr);
+}
+
/* ================================ History ================================= */
/* Free the history, but does not reset it. Only used when we have to
@@ -1071,10 +1161,14 @@ int linenoiseHistorySetMaxLen(int len) {
/* Save the history in the specified file. On success 0 is returned
* otherwise -1 is returned. */
int linenoiseHistorySave(const char *filename) {
- FILE *fp = fopen(filename,"w");
+ mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO);
+ FILE *fp;
int j;
+ fp = fopen(filename,"w");
+ umask(old_umask);
if (fp == NULL) return -1;
+ chmod(filename,S_IRUSR|S_IWUSR);
for (j = 0; j < history_len; j++)
fprintf(fp,"%s\n",history[j]);
fclose(fp);
diff --git a/deps/linenoise/linenoise.h b/deps/linenoise/linenoise.h
index 36394eb99..ed20232c5 100644
--- a/deps/linenoise/linenoise.h
+++ b/deps/linenoise/linenoise.h
@@ -1,12 +1,14 @@
-/* linenoise.h -- guerrilla line editing library against the idea that a
- * line editing lib needs to be 20,000 lines of C code.
+/* linenoise.h -- VERSION 1.0
+ *
+ * Guerrilla line editing library against the idea that a line editing lib
+ * needs to be 20,000 lines of C code.
*
* See linenoise.c for more information.
*
* ------------------------------------------------------------------------
*
- * Copyright (c) 2010, Salvatore Sanfilippo <antirez at gmail dot com>
- * Copyright (c) 2010, Pieter Noordhuis <pcnoordhuis at gmail dot com>
+ * Copyright (c) 2010-2014, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2010-2013, Pieter Noordhuis <pcnoordhuis at gmail dot com>
*
* All rights reserved.
*
@@ -47,10 +49,15 @@ typedef struct linenoiseCompletions {
} linenoiseCompletions;
typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *);
+typedef char*(linenoiseHintsCallback)(const char *, int *color, int *bold);
+typedef void(linenoiseFreeHintsCallback)(void *);
void linenoiseSetCompletionCallback(linenoiseCompletionCallback *);
+void linenoiseSetHintsCallback(linenoiseHintsCallback *);
+void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *);
void linenoiseAddCompletion(linenoiseCompletions *, const char *);
char *linenoise(const char *prompt);
+void linenoiseFree(void *ptr);
int linenoiseHistoryAdd(const char *line);
int linenoiseHistorySetMaxLen(int len);
int linenoiseHistorySave(const char *filename);
diff --git a/deps/lua/src/ldo.c b/deps/lua/src/ldo.c
index d1bf786cb..514f7a2a3 100644
--- a/deps/lua/src/ldo.c
+++ b/deps/lua/src/ldo.c
@@ -495,7 +495,7 @@ static void f_parser (lua_State *L, void *ud) {
struct SParser *p = cast(struct SParser *, ud);
int c = luaZ_lookahead(p->z);
luaC_checkGC(L);
- tf = ((c == LUA_SIGNATURE[0]) ? luaU_undump : luaY_parser)(L, p->z,
+ tf = (luaY_parser)(L, p->z,
&p->buff, p->name);
cl = luaF_newLclosure(L, tf->nups, hvalue(gt(L)));
cl->l.p = tf;
diff --git a/deps/lua/src/lua_cmsgpack.c b/deps/lua/src/lua_cmsgpack.c
index e13f053d2..892154793 100644
--- a/deps/lua/src/lua_cmsgpack.c
+++ b/deps/lua/src/lua_cmsgpack.c
@@ -66,7 +66,7 @@
/* Reverse memory bytes if arch is little endian. Given the conceptual
* simplicity of the Lua build system we prefer check for endianess at runtime.
* The performance difference should be acceptable. */
-static void memrevifle(void *ptr, size_t len) {
+void memrevifle(void *ptr, size_t len) {
unsigned char *p = (unsigned char *)ptr,
*e = (unsigned char *)p+len-1,
aux;
@@ -91,12 +91,11 @@ static void memrevifle(void *ptr, size_t len) {
* behavior. */
typedef struct mp_buf {
- lua_State *L;
unsigned char *b;
size_t len, free;
} mp_buf;
-static void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
+void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
void *(*local_realloc) (void *, void *, size_t osize, size_t nsize) = NULL;
void *ud;
@@ -105,33 +104,32 @@ static void *mp_realloc(lua_State *L, void *target, size_t osize,size_t nsize) {
return local_realloc(ud, target, osize, nsize);
}
-static mp_buf *mp_buf_new(lua_State *L) {
+mp_buf *mp_buf_new(lua_State *L) {
mp_buf *buf = NULL;
/* Old size = 0; new size = sizeof(*buf) */
buf = (mp_buf*)mp_realloc(L, NULL, 0, sizeof(*buf));
- buf->L = L;
buf->b = NULL;
buf->len = buf->free = 0;
return buf;
}
-static void mp_buf_append(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_buf_append(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
if (buf->free < len) {
- size_t newlen = buf->len+len;
+ size_t newsize = (buf->len+len)*2;
- buf->b = (unsigned char*)mp_realloc(buf->L, buf->b, buf->len, newlen*2);
- buf->free = newlen;
+ buf->b = (unsigned char*)mp_realloc(L, buf->b, buf->len + buf->free, newsize);
+ buf->free = newsize - buf->len;
}
memcpy(buf->b+buf->len,s,len);
buf->len += len;
buf->free -= len;
}
-void mp_buf_free(mp_buf *buf) {
- mp_realloc(buf->L, buf->b, buf->len, 0); /* realloc to 0 = free */
- mp_realloc(buf->L, buf, sizeof(*buf), 0);
+void mp_buf_free(lua_State *L, mp_buf *buf) {
+ mp_realloc(L, buf->b, buf->len + buf->free, 0); /* realloc to 0 = free */
+ mp_realloc(L, buf, sizeof(*buf), 0);
}
/* ---------------------------- String cursor ----------------------------------
@@ -153,7 +151,7 @@ typedef struct mp_cur {
int err;
} mp_cur;
-static void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
+void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
cursor->p = s;
cursor->left = len;
cursor->err = MP_CUR_ERROR_NONE;
@@ -173,13 +171,17 @@ static void mp_cur_init(mp_cur *cursor, const unsigned char *s, size_t len) {
/* ------------------------- Low level MP encoding -------------------------- */
-static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
+void mp_encode_bytes(lua_State *L, mp_buf *buf, const unsigned char *s, size_t len) {
unsigned char hdr[5];
int hdrlen;
if (len < 32) {
hdr[0] = 0xa0 | (len&0xff); /* fix raw */
hdrlen = 1;
+ } else if (len <= 0xff) {
+ hdr[0] = 0xd9;
+ hdr[1] = len;
+ hdrlen = 2;
} else if (len <= 0xffff) {
hdr[0] = 0xda;
hdr[1] = (len&0xff00)>>8;
@@ -193,12 +195,12 @@ static void mp_encode_bytes(mp_buf *buf, const unsigned char *s, size_t len) {
hdr[4] = len&0xff;
hdrlen = 5;
}
- mp_buf_append(buf,hdr,hdrlen);
- mp_buf_append(buf,s,len);
+ mp_buf_append(L,buf,hdr,hdrlen);
+ mp_buf_append(L,buf,s,len);
}
/* we assume IEEE 754 internal format for single and double precision floats. */
-static void mp_encode_double(mp_buf *buf, double d) {
+void mp_encode_double(lua_State *L, mp_buf *buf, double d) {
unsigned char b[9];
float f = d;
@@ -207,16 +209,16 @@ static void mp_encode_double(mp_buf *buf, double d) {
b[0] = 0xca; /* float IEEE 754 */
memcpy(b+1,&f,4);
memrevifle(b+1,4);
- mp_buf_append(buf,b,5);
+ mp_buf_append(L,buf,b,5);
} else if (sizeof(d) == 8) {
b[0] = 0xcb; /* double IEEE 754 */
memcpy(b+1,&d,8);
memrevifle(b+1,8);
- mp_buf_append(buf,b,9);
+ mp_buf_append(L,buf,b,9);
}
}
-static void mp_encode_int(mp_buf *buf, int64_t n) {
+void mp_encode_int(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[9];
int enclen;
@@ -285,10 +287,10 @@ static void mp_encode_int(mp_buf *buf, int64_t n) {
enclen = 9;
}
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
-static void mp_encode_array(mp_buf *buf, int64_t n) {
+void mp_encode_array(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -308,10 +310,10 @@ static void mp_encode_array(mp_buf *buf, int64_t n) {
b[4] = n & 0xff;
enclen = 5;
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
-static void mp_encode_map(mp_buf *buf, int64_t n) {
+void mp_encode_map(lua_State *L, mp_buf *buf, int64_t n) {
unsigned char b[5];
int enclen;
@@ -331,58 +333,59 @@ static void mp_encode_map(mp_buf *buf, int64_t n) {
b[4] = n & 0xff;
enclen = 5;
}
- mp_buf_append(buf,b,enclen);
+ mp_buf_append(L,buf,b,enclen);
}
/* --------------------------- Lua types encoding --------------------------- */
-static void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_string(lua_State *L, mp_buf *buf) {
size_t len;
const char *s;
s = lua_tolstring(L,-1,&len);
- mp_encode_bytes(buf,(const unsigned char*)s,len);
+ mp_encode_bytes(L,buf,(const unsigned char*)s,len);
}
-static void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_bool(lua_State *L, mp_buf *buf) {
unsigned char b = lua_toboolean(L,-1) ? 0xc3 : 0xc2;
- mp_buf_append(buf,&b,1);
+ mp_buf_append(L,buf,&b,1);
}
/* Lua 5.3 has a built in 64-bit integer type */
-static void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_integer(lua_State *L, mp_buf *buf) {
#if (LUA_VERSION_NUM < 503) && BITS_32
lua_Number i = lua_tonumber(L,-1);
#else
lua_Integer i = lua_tointeger(L,-1);
#endif
- mp_encode_int(buf, (int64_t)i);
+ mp_encode_int(L, buf, (int64_t)i);
}
/* Lua 5.2 and lower only has 64-bit doubles, so we need to
* detect if the double may be representable as an int
* for Lua < 5.3 */
-static void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_number(lua_State *L, mp_buf *buf) {
lua_Number n = lua_tonumber(L,-1);
if (IS_INT64_EQUIVALENT(n)) {
mp_encode_lua_integer(L, buf);
} else {
- mp_encode_double(buf,(double)n);
+ mp_encode_double(L,buf,(double)n);
}
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level);
/* Convert a lua table into a message pack list. */
-static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
#if LUA_VERSION_NUM < 502
size_t len = lua_objlen(L,-1), j;
#else
size_t len = lua_rawlen(L,-1), j;
#endif
- mp_encode_array(buf,len);
+ mp_encode_array(L,buf,len);
+ luaL_checkstack(L, 1, "in function mp_encode_lua_table_as_array");
for (j = 1; j <= len; j++) {
lua_pushnumber(L,j);
lua_gettable(L,-2);
@@ -391,13 +394,14 @@ static void mp_encode_lua_table_as_array(lua_State *L, mp_buf *buf, int level) {
}
/* Convert a lua table into a message pack key-value map. */
-static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
size_t len = 0;
/* First step: count keys into table. No other way to do it with the
* Lua API, we need to iterate a first time. Note that an alternative
* would be to do a single run, and then hack the buffer to insert the
* map opcodes for message pack. Too hackish for this lib. */
+ luaL_checkstack(L, 3, "in function mp_encode_lua_table_as_map");
lua_pushnil(L);
while(lua_next(L,-2)) {
lua_pop(L,1); /* remove value, keep key for next iteration. */
@@ -405,7 +409,7 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
}
/* Step two: actually encoding of the map. */
- mp_encode_map(buf,len);
+ mp_encode_map(L,buf,len);
lua_pushnil(L);
while(lua_next(L,-2)) {
/* Stack: ... key value */
@@ -418,7 +422,7 @@ static void mp_encode_lua_table_as_map(lua_State *L, mp_buf *buf, int level) {
/* Returns true if the Lua table on top of the stack is exclusively composed
* of keys from numerical keys from 1 up to N, with N being the total number
* of elements, without any hole in the middle. */
-static int table_is_an_array(lua_State *L) {
+int table_is_an_array(lua_State *L) {
int count = 0, max = 0;
#if LUA_VERSION_NUM < 503
lua_Number n;
@@ -461,22 +465,21 @@ static int table_is_an_array(lua_State *L) {
/* If the length operator returns non-zero, that is, there is at least
* an object at key '1', we serialize to message pack list. Otherwise
* we use a map. */
-static void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_table(lua_State *L, mp_buf *buf, int level) {
if (table_is_an_array(L))
mp_encode_lua_table_as_array(L,buf,level);
else
mp_encode_lua_table_as_map(L,buf,level);
}
-static void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
+void mp_encode_lua_null(lua_State *L, mp_buf *buf) {
unsigned char b[1];
- (void)L;
b[0] = 0xc0;
- mp_buf_append(buf,b,1);
+ mp_buf_append(L,buf,b,1);
}
-static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
+void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
int t = lua_type(L,-1);
/* Limit the encoding of nested tables to a specified maximum depth, so that
@@ -506,7 +509,7 @@ static void mp_encode_lua_type(lua_State *L, mp_buf *buf, int level) {
* Packs all arguments as a stream for multiple upacking later.
* Returns error if no arguments provided.
*/
-static int mp_pack(lua_State *L) {
+int mp_pack(lua_State *L) {
int nargs = lua_gettop(L);
int i;
mp_buf *buf;
@@ -514,10 +517,14 @@ static int mp_pack(lua_State *L) {
if (nargs == 0)
return luaL_argerror(L, 0, "MessagePack pack needs input.");
+ if (!lua_checkstack(L, nargs))
+ return luaL_argerror(L, 0, "Too many arguments for MessagePack pack.");
+
buf = mp_buf_new(L);
for(i = 1; i <= nargs; i++) {
/* Copy argument i to top of stack for _encode processing;
* the encode function pops it from the stack when complete. */
+ luaL_checkstack(L, 1, "in function mp_check");
lua_pushvalue(L, i);
mp_encode_lua_type(L,buf,0);
@@ -530,7 +537,7 @@ static int mp_pack(lua_State *L) {
buf->free += buf->len;
buf->len = 0;
}
- mp_buf_free(buf);
+ mp_buf_free(L, buf);
/* Concatenate all nargs buffers together */
lua_concat(L, nargs);
@@ -546,6 +553,7 @@ void mp_decode_to_lua_array(lua_State *L, mp_cur *c, size_t len) {
int index = 1;
lua_newtable(L);
+ luaL_checkstack(L, 1, "in function mp_decode_to_lua_array");
while(len--) {
lua_pushnumber(L,index++);
mp_decode_to_lua_type(L,c);
@@ -687,6 +695,15 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
mp_cur_consume(c,9);
}
break;
+ case 0xd9: /* raw 8 */
+ mp_cur_need(c,2);
+ {
+ size_t l = c->p[1];
+ mp_cur_need(c,2+l);
+ lua_pushlstring(L,(char*)c->p+2,l);
+ mp_cur_consume(c,2+l);
+ }
+ break;
case 0xda: /* raw 16 */
mp_cur_need(c,3);
{
@@ -773,7 +790,7 @@ void mp_decode_to_lua_type(lua_State *L, mp_cur *c) {
}
}
-static int mp_unpack_full(lua_State *L, int limit, int offset) {
+int mp_unpack_full(lua_State *L, int limit, int offset) {
size_t len;
const char *s;
mp_cur c;
@@ -811,6 +828,9 @@ static int mp_unpack_full(lua_State *L, int limit, int offset) {
* subtract the entire buffer size from the unprocessed size
* to get our next start offset */
int offset = len - c.left;
+
+ luaL_checkstack(L, 1, "in function mp_unpack_full");
+
/* Return offset -1 when we have have processed the entire buffer. */
lua_pushinteger(L, c.left == 0 ? -1 : offset);
/* Results are returned with the arg elements still
@@ -826,18 +846,18 @@ static int mp_unpack_full(lua_State *L, int limit, int offset) {
return cnt;
}
-static int mp_unpack(lua_State *L) {
+int mp_unpack(lua_State *L) {
return mp_unpack_full(L, 0, 0);
}
-static int mp_unpack_one(lua_State *L) {
+int mp_unpack_one(lua_State *L) {
int offset = luaL_optinteger(L, 2, 0);
/* Variable pop because offset may not exist */
lua_pop(L, lua_gettop(L)-1);
return mp_unpack_full(L, 1, offset);
}
-static int mp_unpack_limit(lua_State *L) {
+int mp_unpack_limit(lua_State *L) {
int limit = luaL_checkinteger(L, 2);
int offset = luaL_optinteger(L, 3, 0);
/* Variable pop because offset may not exist */
@@ -846,7 +866,7 @@ static int mp_unpack_limit(lua_State *L) {
return mp_unpack_full(L, limit, offset);
}
-static int mp_safe(lua_State *L) {
+int mp_safe(lua_State *L) {
int argc, err, total_results;
argc = lua_gettop(L);
@@ -869,7 +889,7 @@ static int mp_safe(lua_State *L) {
}
/* -------------------------------------------------------------------------- */
-static const struct luaL_Reg cmds[] = {
+const struct luaL_Reg cmds[] = {
{"pack", mp_pack},
{"unpack", mp_unpack},
{"unpack_one", mp_unpack_one},
@@ -877,7 +897,7 @@ static const struct luaL_Reg cmds[] = {
{0}
};
-static int luaopen_create(lua_State *L) {
+int luaopen_create(lua_State *L) {
int i;
/* Manually construct our module table instead of
* relying on _register or _newlib */
diff --git a/deps/lua/src/lua_struct.c b/deps/lua/src/lua_struct.c
index ec78bcbc0..4d5f027b8 100644
--- a/deps/lua/src/lua_struct.c
+++ b/deps/lua/src/lua_struct.c
@@ -1,7 +1,7 @@
/*
** {======================================================
** Library for packing/unpacking structures.
-** $Id: struct.c,v 1.4 2012/07/04 18:54:29 roberto Exp $
+** $Id: struct.c,v 1.7 2018/05/11 22:04:31 roberto Exp $
** See Copyright Notice at the end of this file
** =======================================================
*/
@@ -15,8 +15,8 @@
** h/H - signed/unsigned short
** l/L - signed/unsigned long
** T - size_t
-** i/In - signed/unsigned integer with size `n' (default is size of int)
-** cn - sequence of `n' chars (from/to a string); when packing, n==0 means
+** i/In - signed/unsigned integer with size 'n' (default is size of int)
+** cn - sequence of 'n' chars (from/to a string); when packing, n==0 means
the whole string; when unpacking, n==0 means use the previous
read number as the string length
** s - zero-terminated string
@@ -293,21 +293,26 @@ static int b_unpack (lua_State *L) {
const char *fmt = luaL_checkstring(L, 1);
size_t ld;
const char *data = luaL_checklstring(L, 2, &ld);
- size_t pos = luaL_optinteger(L, 3, 1) - 1;
+ size_t pos = luaL_optinteger(L, 3, 1);
+ luaL_argcheck(L, pos > 0, 3, "offset must be 1 or greater");
+ pos--; /* Lua indexes are 1-based, but here we want 0-based for C
+ * pointer math. */
+ int n = 0; /* number of results */
defaultoptions(&h);
- lua_settop(L, 2);
while (*fmt) {
int opt = *fmt++;
size_t size = optsize(L, opt, &fmt);
pos += gettoalign(pos, &h, opt, size);
- luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
- luaL_checkstack(L, 1, "too many results");
+ luaL_argcheck(L, size <= ld && pos <= ld - size,
+ 2, "data string too short");
+ /* stack space for item + next position */
+ luaL_checkstack(L, 2, "too many results");
switch (opt) {
case 'b': case 'B': case 'h': case 'H':
case 'l': case 'L': case 'T': case 'i': case 'I': { /* integer types */
int issigned = islower(opt);
lua_Number res = getinteger(data+pos, h.endian, issigned, size);
- lua_pushnumber(L, res);
+ lua_pushnumber(L, res); n++;
break;
}
case 'x': {
@@ -317,25 +322,26 @@ static int b_unpack (lua_State *L) {
float f;
memcpy(&f, data+pos, size);
correctbytes((char *)&f, sizeof(f), h.endian);
- lua_pushnumber(L, f);
+ lua_pushnumber(L, f); n++;
break;
}
case 'd': {
double d;
memcpy(&d, data+pos, size);
correctbytes((char *)&d, sizeof(d), h.endian);
- lua_pushnumber(L, d);
+ lua_pushnumber(L, d); n++;
break;
}
case 'c': {
if (size == 0) {
- if (!lua_isnumber(L, -1))
- luaL_error(L, "format `c0' needs a previous size");
+ if (n == 0 || !lua_isnumber(L, -1))
+ luaL_error(L, "format 'c0' needs a previous size");
size = lua_tonumber(L, -1);
- lua_pop(L, 1);
- luaL_argcheck(L, pos+size <= ld, 2, "data string too short");
+ lua_pop(L, 1); n--;
+ luaL_argcheck(L, size <= ld && pos <= ld - size,
+ 2, "data string too short");
}
- lua_pushlstring(L, data+pos, size);
+ lua_pushlstring(L, data+pos, size); n++;
break;
}
case 's': {
@@ -343,15 +349,15 @@ static int b_unpack (lua_State *L) {
if (e == NULL)
luaL_error(L, "unfinished string in data");
size = (e - (data+pos)) + 1;
- lua_pushlstring(L, data+pos, size - 1);
+ lua_pushlstring(L, data+pos, size - 1); n++;
break;
}
default: controloptions(L, opt, &fmt, &h);
}
pos += size;
}
- lua_pushinteger(L, pos + 1);
- return lua_gettop(L) - 2;
+ lua_pushinteger(L, pos + 1); /* next position */
+ return n + 1;
}
@@ -397,7 +403,7 @@ LUALIB_API int luaopen_struct (lua_State *L) {
/******************************************************************************
-* Copyright (C) 2010-2012 Lua.org, PUC-Rio. All rights reserved.
+* Copyright (C) 2010-2018 Lua.org, PUC-Rio. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
diff --git a/redis.conf b/redis.conf
index 38e258698..5cbc74bbd 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1,4 +1,9 @@
-# Redis configuration file example
+# Redis configuration file example.
+#
+# Note that in order to read the configuration file, Redis must be
+# started with the file path as first argument:
+#
+# ./redis-server /path/to/redis.conf
# Note on units: when memory size is needed, it is possible to specify
# it in the usual form of 1k 5GB 4M and so forth:
@@ -30,28 +35,59 @@
# include /path/to/local.conf
# include /path/to/other.conf
-################################ GENERAL #####################################
+################################## MODULES #####################################
-# By default Redis does not run as a daemon. Use 'yes' if you need it.
-# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
-daemonize no
+# Load modules at startup. If the server is not able to load modules
+# it will abort. It is possible to use multiple loadmodule directives.
+#
+# loadmodule /path/to/my_module.so
+# loadmodule /path/to/other_module.so
-# If you run Redis from upstart or systemd, Redis can interact with your
-# supervision tree. Options:
-# supervised no - no supervision interaction
-# supervised upstart - signal upstart by putting Redis into SIGSTOP mode
-# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
-# supervised auto - detect upstart or systemd method based on
-# UPSTART_JOB or NOTIFY_SOCKET environment variables
-# Note: these supervision methods only signal "process is ready."
-# They do not enable continuous liveness pings back to your supervisor.
-supervised no
+################################## NETWORK #####################################
-# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
-# default. You can specify a custom pid file location here.
-pidfile /var/run/redis.pid
+# By default, if no "bind" configuration directive is specified, Redis listens
+# for connections from all the network interfaces available on the server.
+# It is possible to listen to just one or multiple selected interfaces using
+# the "bind" configuration directive, followed by one or more IP addresses.
+#
+# Examples:
+#
+# bind 192.168.1.100 10.0.0.1
+# bind 127.0.0.1 ::1
+#
+# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the
+# internet, binding to all the interfaces is dangerous and will expose the
+# instance to everybody on the internet. So by default we uncomment the
+# following bind directive, that will force Redis to listen only into
+# the IPv4 loopback interface address (this means Redis will be able to
+# accept connections only from clients running into the same computer it
+# is running).
+#
+# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES
+# JUST COMMENT THE FOLLOWING LINE.
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+bind 127.0.0.1
-# Accept connections on the specified port, default is 6379.
+# Protected mode is a layer of security protection, in order to avoid that
+# Redis instances left open on the internet are accessed and exploited.
+#
+# When protected mode is on and if:
+#
+# 1) The server is not binding explicitly to a set of addresses using the
+# "bind" directive.
+# 2) No password is configured.
+#
+# The server only accepts connections from clients connecting from the
+# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain
+# sockets.
+#
+# By default protected mode is enabled. You should disable it only if
+# you are sure you want clients from other hosts to connect to Redis
+# even if no authentication is configured, nor a specific set of interfaces
+# are explicitly listed using the "bind" directive.
+protected-mode yes
+
+# Accept connections on the specified port, default is 6379 (IANA #815344).
# If port 0 is specified Redis will not listen on a TCP socket.
port 6379
@@ -64,16 +100,8 @@ port 6379
# in order to get the desired effect.
tcp-backlog 511
-# By default Redis listens for connections from all the network interfaces
-# available on the server. It is possible to listen to just one or multiple
-# interfaces using the "bind" configuration directive, followed by one or
-# more IP addresses.
+# Unix socket.
#
-# Examples:
-#
-# bind 192.168.1.100 10.0.0.1
-# bind 127.0.0.1
-
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen
# on a unix socket when not specified.
@@ -97,8 +125,37 @@ timeout 0
# Note that to close the connection the double of the time is needed.
# On other kernels the period depends on the kernel configuration.
#
-# A reasonable value for this option is 60 seconds.
-tcp-keepalive 0
+# A reasonable value for this option is 300 seconds, which is the new
+# Redis default starting with Redis 3.2.1.
+tcp-keepalive 300
+
+################################# GENERAL #####################################
+
+# By default Redis does not run as a daemon. Use 'yes' if you need it.
+# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
+daemonize no
+
+# If you run Redis from upstart or systemd, Redis can interact with your
+# supervision tree. Options:
+# supervised no - no supervision interaction
+# supervised upstart - signal upstart by putting Redis into SIGSTOP mode
+# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
+# supervised auto - detect upstart or systemd method based on
+# UPSTART_JOB or NOTIFY_SOCKET environment variables
+# Note: these supervision methods only signal "process is ready."
+# They do not enable continuous liveness pings back to your supervisor.
+supervised no
+
+# If a pid file is specified, Redis writes it where specified at startup
+# and removes it at exit.
+#
+# When the server runs non daemonized, no pid file is created if none is
+# specified in the configuration. When the server is daemonized, the pid file
+# is used even if not specified, defaulting to "/var/run/redis.pid".
+#
+# Creating a pid file is best effort: if Redis is not able to create it
+# nothing bad happens, the server will start and run normally.
+pidfile /var/run/redis_6379.pid
# Specify the server verbosity level.
# This can be one of:
@@ -128,6 +185,14 @@ logfile ""
# dbid is a number between 0 and 'databases'-1
databases 16
+# By default Redis shows an ASCII art logo only when started to log to the
+# standard output and if the standard output is a TTY. Basically this means
+# that normally a logo is displayed only in interactive sessions.
+#
+# However it is possible to force the pre-4.0 behavior and always show a
+# ASCII art logo in startup logs by setting the following option to yes.
+always-show-logo yes
+
################################ SNAPSHOTTING ################################
#
# Save the DB on disk:
@@ -231,7 +296,9 @@ dir ./
#
# 2) if slave-serve-stale-data is set to 'no' the slave will reply with
# an error "SYNC with master in progress" to all the kind of commands
-# but to INFO and SLAVEOF.
+# but to INFO, SLAVEOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG,
+# SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB,
+# COMMAND, POST, HOST: and LATENCY.
#
slave-serve-stale-data yes
@@ -345,6 +412,10 @@ repl-disable-tcp-nodelay no
# need to elapse, starting from the time the last slave disconnected, for
# the backlog buffer to be freed.
#
+# Note that slaves never free the backlog for timeout, since they may be
+# promoted to masters later, and should be able to correctly "partially
+# resynchronize" with the slaves: hence they should always accumulate backlog.
+#
# A value of 0 means to never release the backlog.
#
# repl-backlog-ttl 3600
@@ -386,6 +457,35 @@ slave-priority 100
# By default min-slaves-to-write is set to 0 (feature disabled) and
# min-slaves-max-lag is set to 10.
+# A Redis master is able to list the address and port of the attached
+# slaves in different ways. For example the "INFO replication" section
+# offers this information, which is used, among other tools, by
+# Redis Sentinel in order to discover slave instances.
+# Another place where this info is available is in the output of the
+# "ROLE" command of a master.
+#
+# The listed IP and address normally reported by a slave is obtained
+# in the following way:
+#
+# IP: The address is auto detected by checking the peer address
+# of the socket used by the slave to connect with the master.
+#
+# Port: The port is communicated by the slave during the replication
+# handshake, and is normally the port that the slave is using to
+# list for connections.
+#
+# However when port forwarding or Network Address Translation (NAT) is
+# used, the slave may be actually reachable via different IP and port
+# pairs. The following two options can be used by a slave in order to
+# report to its master a specific set of IP and port, so that both INFO
+# and ROLE will report those values.
+#
+# There is no need to use both the options if you need to override just
+# the port or the IP address.
+#
+# slave-announce-ip 5.5.5.5
+# slave-announce-port 1234
+
################################## SECURITY ###################################
# Require clients to issue AUTH <PASSWORD> before processing any other
@@ -420,7 +520,7 @@ slave-priority 100
# Please note that changing the name of commands that are logged into the
# AOF file or transmitted to slaves may cause problems.
-################################### LIMITS ####################################
+################################### CLIENTS ####################################
# Set the max number of connected clients at the same time. By default
# this limit is set to 10000 clients, however if the Redis server is not
@@ -433,7 +533,9 @@ slave-priority 100
#
# maxclients 10000
-# Don't use more memory than the specified amount of bytes.
+############################## MEMORY MANAGEMENT ################################
+
+# Set a memory usage limit to the specified amount of bytes.
# When the memory limit is reached Redis will try to remove keys
# according to the eviction policy selected (see maxmemory-policy).
#
@@ -442,8 +544,8 @@ slave-priority 100
# that would use more memory, like SET, LPUSH, and so on, and will continue
# to reply to read-only commands like GET.
#
-# This option is usually useful when using Redis as an LRU cache, or to set
-# a hard memory limit for an instance (using the 'noeviction' policy).
+# This option is usually useful when using Redis as an LRU or LFU cache, or to
+# set a hard memory limit for an instance (using the 'noeviction' policy).
#
# WARNING: If you have slaves attached to an instance with maxmemory on,
# the size of the output buffers needed to feed the slaves are subtracted
@@ -461,12 +563,20 @@ slave-priority 100
# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
# is reached. You can select among five behaviors:
#
-# volatile-lru -> remove the key with an expire set using an LRU algorithm
-# allkeys-lru -> remove any key according to the LRU algorithm
-# volatile-random -> remove a random key with an expire set
-# allkeys-random -> remove a random key, any key
-# volatile-ttl -> remove the key with the nearest expire time (minor TTL)
-# noeviction -> don't expire at all, just return an error on write operations
+# volatile-lru -> Evict using approximated LRU among the keys with an expire set.
+# allkeys-lru -> Evict any key using approximated LRU.
+# volatile-lfu -> Evict using approximated LFU among the keys with an expire set.
+# allkeys-lfu -> Evict any key using approximated LFU.
+# volatile-random -> Remove a random key among the ones with an expire set.
+# allkeys-random -> Remove a random key, any key.
+# volatile-ttl -> Remove the key with the nearest expire time (minor TTL)
+# noeviction -> Don't evict anything, just return an error on write operations.
+#
+# LRU means Least Recently Used
+# LFU means Least Frequently Used
+#
+# Both LRU, LFU and volatile-ttl are implemented using approximated
+# randomized algorithms.
#
# Note: with any of the above policies, Redis will return an error on write
# operations, when there are no suitable keys for eviction.
@@ -481,17 +591,66 @@ slave-priority 100
#
# maxmemory-policy noeviction
-# LRU and minimal TTL algorithms are not precise algorithms but approximated
+# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated
# algorithms (in order to save memory), so you can tune it for speed or
# accuracy. For default Redis will check five keys and pick the one that was
# used less recently, you can change the sample size using the following
# configuration directive.
#
# The default of 5 produces good enough results. 10 Approximates very closely
-# true LRU but costs a bit more CPU. 3 is very fast but not very accurate.
+# true LRU but costs more CPU. 3 is faster but not very accurate.
#
# maxmemory-samples 5
+############################# LAZY FREEING ####################################
+
+# Redis has two primitives to delete keys. One is called DEL and is a blocking
+# deletion of the object. It means that the server stops processing new commands
+# in order to reclaim all the memory associated with an object in a synchronous
+# way. If the key deleted is associated with a small object, the time needed
+# in order to execute the DEL command is very small and comparable to most other
+# O(1) or O(log_N) commands in Redis. However if the key is associated with an
+# aggregated value containing millions of elements, the server can block for
+# a long time (even seconds) in order to complete the operation.
+#
+# For the above reasons Redis also offers non blocking deletion primitives
+# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and
+# FLUSHDB commands, in order to reclaim memory in background. Those commands
+# are executed in constant time. Another thread will incrementally free the
+# object in the background as fast as possible.
+#
+# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled.
+# It's up to the design of the application to understand when it is a good
+# idea to use one or the other. However the Redis server sometimes has to
+# delete keys or flush the whole database as a side effect of other operations.
+# Specifically Redis deletes objects independently of a user call in the
+# following scenarios:
+#
+# 1) On eviction, because of the maxmemory and maxmemory policy configurations,
+# in order to make room for new data, without going over the specified
+# memory limit.
+# 2) Because of expire: when a key with an associated time to live (see the
+# EXPIRE command) must be deleted from memory.
+# 3) Because of a side effect of a command that stores data on a key that may
+# already exist. For example the RENAME command may delete the old key
+# content when it is replaced with another one. Similarly SUNIONSTORE
+# or SORT with STORE option may delete existing keys. The SET command
+# itself removes any old content of the specified key in order to replace
+# it with the specified string.
+# 4) During replication, when a slave performs a full resynchronization with
+# its master, the content of the whole database is removed in order to
+# load the RDB file just transferred.
+#
+# In all the above cases the default is to delete objects in a blocking way,
+# like if DEL was called. However you can configure each case specifically
+# in order to instead release memory in a non-blocking way like if UNLINK
+# was called, using the following configuration directives:
+
+lazyfree-lazy-eviction no
+lazyfree-lazy-expire no
+lazyfree-lazy-server-del no
+slave-lazy-flush no
+
############################## APPEND ONLY MODE ###############################
# By default Redis asynchronously dumps the dataset on disk. This mode is
@@ -610,6 +769,17 @@ auto-aof-rewrite-min-size 64mb
# will be found.
aof-load-truncated yes
+# When rewriting the AOF file, Redis is able to use an RDB preamble in the
+# AOF file for faster rewrites and recoveries. When this option is turned
+# on the rewritten AOF file is composed of two different stanzas:
+#
+# [RDB file][AOF tail]
+#
+# When loading Redis recognizes that the AOF file starts with the "REDIS"
+# string and loads the prefixed RDB file, and continues loading the AOF
+# tail.
+aof-use-rdb-preamble yes
+
################################ LUA SCRIPTING ###############################
# Max execution time of a Lua script in milliseconds.
@@ -659,7 +829,7 @@ lua-time-limit 5000
# A slave of a failing master will avoid to start a failover if its data
# looks too old.
#
-# There is no simple way for a slave to actually have a exact measure of
+# There is no simple way for a slave to actually have an exact measure of
# its "data age", so the following two checks are performed:
#
# 1) If there are multiple slaves able to failover, they exchange messages
@@ -733,9 +903,52 @@ lua-time-limit 5000
#
# cluster-require-full-coverage yes
+# This option, when set to yes, prevents slaves from trying to failover its
+# master during master failures. However the master can still perform a
+# manual failover, if forced to do so.
+#
+# This is useful in different scenarios, especially in the case of multiple
+# data center operations, where we want one side to never be promoted if not
+# in the case of a total DC failure.
+#
+# cluster-slave-no-failover no
+
# In order to setup your cluster make sure to read the documentation
# available at http://redis.io web site.
+########################## CLUSTER DOCKER/NAT support ########################
+
+# In certain deployments, Redis Cluster nodes address discovery fails, because
+# addresses are NAT-ted or because ports are forwarded (the typical case is
+# Docker and other containers).
+#
+# In order to make Redis Cluster working in such environments, a static
+# configuration where each node knows its public address is needed. The
+# following two options are used for this scope, and are:
+#
+# * cluster-announce-ip
+# * cluster-announce-port
+# * cluster-announce-bus-port
+#
+# Each instruct the node about its address, client port, and cluster message
+# bus port. The information is then published in the header of the bus packets
+# so that other nodes will be able to correctly map the address of the node
+# publishing the information.
+#
+# If the above options are not used, the normal Redis Cluster auto-detection
+# will be used instead.
+#
+# Note that when remapped, the bus port may not be at the fixed offset of
+# clients port + 10000, so you can specify any port and bus-port depending
+# on how they get remapped. If the bus-port is not set, a fixed offset of
+# 10000 will be used as usually.
+#
+# Example:
+#
+# cluster-announce-ip 10.1.1.5
+# cluster-announce-port 6379
+# cluster-announce-bus-port 6380
+
################################## SLOW LOG ###################################
# The Redis Slow Log is a system to log queries that exceeded a specified
@@ -781,7 +994,7 @@ slowlog-max-len 128
# "CONFIG SET latency-monitor-threshold <milliseconds>" if needed.
latency-monitor-threshold 0
-############################# Event notification ##############################
+############################# EVENT NOTIFICATION ##############################
# Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at http://redis.io/topics/notifications
@@ -893,6 +1106,17 @@ zset-max-ziplist-value 64
# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
hll-sparse-max-bytes 3000
+# Streams macro node max size / items. The stream data structure is a radix
+# tree of big nodes that encode multiple items inside. Using this configuration
+# it is possible to configure how big a single node can be in bytes, and the
+# maximum number of items it may contain before switching to a new node when
+# appending new stream entries. If any of the following settings are set to
+# zero, the limit is ignored, so for instance it is possible to set just a
+# max entires limit by setting max-bytes to 0 and max-entries to the desired
+# value.
+stream-node-max-bytes 4096
+stream-node-max-entries 100
+
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
# order to help rehashing the main Redis hash table (the one mapping top-level
# keys to values). The hash table implementation Redis uses (see dict.c)
@@ -950,6 +1174,20 @@ client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
+# Client query buffers accumulate new commands. They are limited to a fixed
+# amount by default in order to avoid that a protocol desynchronization (for
+# instance due to a bug in the client) will lead to unbound memory usage in
+# the query buffer. However you can configure it here if you have very special
+# needs, such us huge multi/exec requests or alike.
+#
+# client-query-buffer-limit 1gb
+
+# In the Redis protocol, bulk requests, that are, elements representing single
+# strings, are normally limited ot 512 mb. However you can change this limit
+# here.
+#
+# proto-max-bulk-len 512mb
+
# Redis calls an internal function to perform many background tasks, like
# closing connections of clients in timeout, purging expired keys that are
# never requested, and so forth.
@@ -967,8 +1205,149 @@ client-output-buffer-limit pubsub 32mb 8mb 60
# 100 only in environments where very low latency is required.
hz 10
+# Normally it is useful to have an HZ value which is proportional to the
+# number of clients connected. This is useful in order, for instance, to
+# avoid too many clients are processed for each background task invocation
+# in order to avoid latency spikes.
+#
+# Since the default HZ value by default is conservatively set to 10, Redis
+# offers, and enables by default, the ability to use an adaptive HZ value
+# which will temporary raise when there are many connected clients.
+#
+# When dynamic HZ is enabled, the actual configured HZ will be used as
+# as a baseline, but multiples of the configured HZ value will be actually
+# used as needed once more clients are connected. In this way an idle
+# instance will use very little CPU time while a busy instance will be
+# more responsive.
+dynamic-hz yes
+
# When a child rewrites the AOF file, if the following option is enabled
# the file will be fsync-ed every 32 MB of data generated. This is useful
# in order to commit the file to the disk more incrementally and avoid
# big latency spikes.
aof-rewrite-incremental-fsync yes
+
+# When redis saves RDB file, if the following option is enabled
+# the file will be fsync-ed every 32 MB of data generated. This is useful
+# in order to commit the file to the disk more incrementally and avoid
+# big latency spikes.
+rdb-save-incremental-fsync yes
+
+# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good
+# idea to start with the default settings and only change them after investigating
+# how to improve the performances and how the keys LFU change over time, which
+# is possible to inspect via the OBJECT FREQ command.
+#
+# There are two tunable parameters in the Redis LFU implementation: the
+# counter logarithm factor and the counter decay time. It is important to
+# understand what the two parameters mean before changing them.
+#
+# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis
+# uses a probabilistic increment with logarithmic behavior. Given the value
+# of the old counter, when a key is accessed, the counter is incremented in
+# this way:
+#
+# 1. A random number R between 0 and 1 is extracted.
+# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1).
+# 3. The counter is incremented only if R < P.
+#
+# The default lfu-log-factor is 10. This is a table of how the frequency
+# counter changes with a different number of accesses with different
+# logarithmic factors:
+#
+# +--------+------------+------------+------------+------------+------------+
+# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits |
+# +--------+------------+------------+------------+------------+------------+
+# | 0 | 104 | 255 | 255 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 1 | 18 | 49 | 255 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 10 | 10 | 18 | 142 | 255 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+# | 100 | 8 | 11 | 49 | 143 | 255 |
+# +--------+------------+------------+------------+------------+------------+
+#
+# NOTE: The above table was obtained by running the following commands:
+#
+# redis-benchmark -n 1000000 incr foo
+# redis-cli object freq foo
+#
+# NOTE 2: The counter initial value is 5 in order to give new objects a chance
+# to accumulate hits.
+#
+# The counter decay time is the time, in minutes, that must elapse in order
+# for the key counter to be divided by two (or decremented if it has a value
+# less <= 10).
+#
+# The default value for the lfu-decay-time is 1. A Special value of 0 means to
+# decay the counter every time it happens to be scanned.
+#
+# lfu-log-factor 10
+# lfu-decay-time 1
+
+########################### ACTIVE DEFRAGMENTATION #######################
+#
+# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested
+# even in production and manually tested by multiple engineers for some
+# time.
+#
+# What is active defragmentation?
+# -------------------------------
+#
+# Active (online) defragmentation allows a Redis server to compact the
+# spaces left between small allocations and deallocations of data in memory,
+# thus allowing to reclaim back memory.
+#
+# Fragmentation is a natural process that happens with every allocator (but
+# less so with Jemalloc, fortunately) and certain workloads. Normally a server
+# restart is needed in order to lower the fragmentation, or at least to flush
+# away all the data and create it again. However thanks to this feature
+# implemented by Oran Agra for Redis 4.0 this process can happen at runtime
+# in an "hot" way, while the server is running.
+#
+# Basically when the fragmentation is over a certain level (see the
+# configuration options below) Redis will start to create new copies of the
+# values in contiguous memory regions by exploiting certain specific Jemalloc
+# features (in order to understand if an allocation is causing fragmentation
+# and to allocate it in a better place), and at the same time, will release the
+# old copies of the data. This process, repeated incrementally for all the keys
+# will cause the fragmentation to drop back to normal values.
+#
+# Important things to understand:
+#
+# 1. This feature is disabled by default, and only works if you compiled Redis
+# to use the copy of Jemalloc we ship with the source code of Redis.
+# This is the default with Linux builds.
+#
+# 2. You never need to enable this feature if you don't have fragmentation
+# issues.
+#
+# 3. Once you experience fragmentation, you can enable this feature when
+# needed with the command "CONFIG SET activedefrag yes".
+#
+# The configuration parameters are able to fine tune the behavior of the
+# defragmentation process. If you are not sure about what they mean it is
+# a good idea to leave the defaults untouched.
+
+# Enabled active defragmentation
+# activedefrag yes
+
+# Minimum amount of fragmentation waste to start active defrag
+# active-defrag-ignore-bytes 100mb
+
+# Minimum percentage of fragmentation to start active defrag
+# active-defrag-threshold-lower 10
+
+# Maximum percentage of fragmentation at which we use maximum effort
+# active-defrag-threshold-upper 100
+
+# Minimal effort for defrag in CPU percentage
+# active-defrag-cycle-min 5
+
+# Maximal effort for defrag in CPU percentage
+# active-defrag-cycle-max 75
+
+# Maximum number of set/hash/zset/list fields that will be processed from
+# the main dictionary scan
+# active-defrag-max-scan-fields 1000
+
diff --git a/runtest b/runtest
index d8451df57..ade1bd09a 100755
--- a/runtest
+++ b/runtest
@@ -11,4 +11,4 @@ then
echo "You need tcl 8.5 or newer in order to run the Redis test"
exit 1
fi
-$TCLSH tests/test_helper.tcl $*
+$TCLSH tests/test_helper.tcl "${@}"
diff --git a/sentinel.conf b/sentinel.conf
index d627b8536..551defef9 100644
--- a/sentinel.conf
+++ b/sentinel.conf
@@ -1,5 +1,21 @@
# Example sentinel.conf
+# *** IMPORTANT ***
+#
+# By default Sentinel will not be reachable from interfaces different than
+# localhost, either use the 'bind' directive to bind to a list of network
+# interfaces, or disable protected mode with "protected-mode no" by
+# adding it to this configuration file.
+#
+# Before doing that MAKE SURE the instance is protected from the outside
+# world via firewalling or other means.
+#
+# For example you may use one of the following:
+#
+# bind 127.0.0.1 192.168.1.1
+#
+# protected-mode no
+
# port <sentinel-port>
# The port that this sentinel instance will run on
port 26379
@@ -203,4 +219,31 @@ sentinel failover-timeout mymaster 180000
#
# sentinel client-reconfig-script mymaster /var/redis/reconfig.sh
+# SECURITY
+#
+# By default SENTINEL SET will not be able to change the notification-script
+# and client-reconfig-script at runtime. This avoids a trivial security issue
+# where clients can set the script to anything and trigger a failover in order
+# to get the program executed.
+sentinel deny-scripts-reconfig yes
+
+# REDIS COMMANDS RENAMING
+#
+# Sometimes the Redis server has certain commands, that are needed for Sentinel
+# to work correctly, renamed to unguessable strings. This is often the case
+# of CONFIG and SLAVEOF in the context of providers that provide Redis as
+# a service, and don't want the customers to reconfigure the instances outside
+# of the administration console.
+#
+# In such case it is possible to tell Sentinel to use different command names
+# instead of the normal ones. For example if the master "mymaster", and the
+# associated slaves, have "CONFIG" all renamed to "GUESSME", I could use:
+#
+# sentinel rename-command mymaster CONFIG GUESSME
+#
+# After such configuration is set, every time Sentinel would use CONFIG it will
+# use GUESSME instead. Note that there is no actual need to respect the command
+# case, so writing "config guessme" is the same in the example above.
+#
+# SENTINEL SET can also be used in order to perform this configuration at runtime.
diff --git a/src/Makefile b/src/Makefile
index 295600c4e..f5525bd6d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -14,23 +14,33 @@
release_hdr := $(shell sh -c './mkreleasehdr.sh')
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
OPTIMIZATION?=-O2
DEPENDENCY_TARGETS=hiredis linenoise lua
+NODEPS:=clean distclean
# Default settings
STD=-std=c99 -pedantic -DREDIS_STATIC=''
-WARN=-Wall -W
+WARN=-Wall -W -Wno-missing-field-initializers
OPT=$(OPTIMIZATION)
PREFIX?=/usr/local
INSTALL_BIN=$(PREFIX)/bin
INSTALL=install
-# Default allocator
+# Default allocator defaults to Jemalloc if it's not an ARM
+MALLOC=libc
+ifneq ($(uname_M),armv6l)
+ifneq ($(uname_M),armv7l)
ifeq ($(uname_S),Linux)
MALLOC=jemalloc
-else
- MALLOC=libc
+endif
+endif
+endif
+
+# To get ARM stack traces if Redis crashes we need a special C flag.
+ifneq (,$(filter aarch64 armv,$(uname_M)))
+ CFLAGS+=-funwind-tables
endif
# Backwards compatibility for selecting an allocator
@@ -60,22 +70,39 @@ DEBUG=-g -ggdb
ifeq ($(uname_S),SunOS)
# SunOS
+ ifneq ($(@@),32bit)
+ CFLAGS+= -m64
+ LDFLAGS+= -m64
+ endif
+ DEBUG=-g
+ DEBUG_FLAGS=-g
+ export CFLAGS LDFLAGS DEBUG DEBUG_FLAGS
INSTALL=cp -pf
FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6
FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt
else
ifeq ($(uname_S),Darwin)
- # Darwin (nothing to do)
+ # Darwin
+ FINAL_LIBS+= -ldl
else
ifeq ($(uname_S),AIX)
# AIX
FINAL_LDFLAGS+= -Wl,-bexpall
- FINAL_LIBS+= -pthread -lcrypt -lbsd
-
+ FINAL_LIBS+=-ldl -pthread -lcrypt -lbsd
+else
+ifeq ($(uname_S),OpenBSD)
+ # OpenBSD
+ FINAL_LIBS+= -lpthread
+else
+ifeq ($(uname_S),FreeBSD)
+ # FreeBSD
+ FINAL_LIBS+= -lpthread
else
# All the other OSes (notably Linux)
FINAL_LDFLAGS+= -rdynamic
- FINAL_LIBS+= -pthread
+ FINAL_LIBS+=-ldl -pthread -lrt
+endif
+endif
endif
endif
endif
@@ -95,7 +122,7 @@ endif
ifeq ($(MALLOC),jemalloc)
DEPENDENCY_TARGETS+= jemalloc
FINAL_CFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include
- FINAL_LIBS+= ../deps/jemalloc/lib/libjemalloc.a -ldl
+ FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS)
endif
REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS)
@@ -117,30 +144,27 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o redis.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o
REDIS_CLI_NAME=redis-cli
-REDIS_CLI_OBJ=anet.o sds.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
+REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o
REDIS_BENCHMARK_NAME=redis-benchmark
-REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o sds.o adlist.o zmalloc.o redis-benchmark.o
-REDIS_CHECK_DUMP_NAME=redis-check-dump
-REDIS_CHECK_DUMP_OBJ=redis-check-dump.o lzf_c.o lzf_d.o crc64.o
+REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o
+REDIS_CHECK_RDB_NAME=redis-check-rdb
REDIS_CHECK_AOF_NAME=redis-check-aof
-REDIS_CHECK_AOF_OBJ=redis-check-aof.o
-all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME)
+all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME)
@echo ""
@echo "Hint: It's a good idea to run 'make test' ;)"
@echo ""
-.PHONY: all
-
-# Deps (use make dep to generate this)
-include Makefile.dep
+Makefile.dep:
+ -$(REDIS_CC) -MM *.c > Makefile.dep 2> /dev/null || true
-dep:
- $(REDIS_CC) -MM *.c > Makefile.dep
+ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS))))
+-include Makefile.dep
+endif
-.PHONY: dep
+.PHONY: all
persist-settings: distclean
echo STD=$(STD) >> .make-settings
@@ -178,6 +202,14 @@ $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME)
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME)
+# redis-check-rdb
+$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME)
+
+# redis-check-aof
+$(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME)
+ $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME)
+
# redis-cli
$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o $(FINAL_LIBS)
@@ -186,13 +218,8 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a $(FINAL_LIBS)
-# redis-check-dump
-$(REDIS_CHECK_DUMP_NAME): $(REDIS_CHECK_DUMP_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
-
-# redis-check-aof
-$(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
- $(REDIS_LD) -o $@ $^ $(FINAL_LIBS)
+dict-benchmark: dict.c zmalloc.c sds.c siphash.c
+ $(REDIS_CC) $(FINAL_CFLAGS) $^ -D DICT_BENCHMARK_MAIN -o $@ $(FINAL_LIBS)
# Because the jemalloc.h header is generated as a part of the jemalloc build,
# building it should complete before building any other object. Instead of
@@ -201,7 +228,7 @@ $(REDIS_CHECK_AOF_NAME): $(REDIS_CHECK_AOF_OBJ)
$(REDIS_CC) -c $<
clean:
- rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_DUMP_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html
+ rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep dict-benchmark
.PHONY: clean
@@ -226,7 +253,7 @@ lcov:
@genhtml --legend -o lcov-html redis.info
test-sds: sds.c sds.h
- $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN -o /tmp/sds_test
+ $(REDIS_CC) sds.c zmalloc.c -DSDS_TEST_MAIN $(FINAL_LIBS) -o /tmp/sds_test
/tmp/sds_test
.PHONY: lcov
@@ -249,6 +276,9 @@ noopt:
valgrind:
$(MAKE) OPTIMIZATION="-O0" MALLOC="libc"
+helgrind:
+ $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" CFLAGS="-D__ATOMIC_VAR_FORCE_SYNC_MACROS"
+
src/help.h:
@../utils/generate-command-help.rb > help.h
@@ -257,6 +287,6 @@ install: all
$(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_BENCHMARK_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CLI_NAME) $(INSTALL_BIN)
- $(REDIS_INSTALL) $(REDIS_CHECK_DUMP_NAME) $(INSTALL_BIN)
+ $(REDIS_INSTALL) $(REDIS_CHECK_RDB_NAME) $(INSTALL_BIN)
$(REDIS_INSTALL) $(REDIS_CHECK_AOF_NAME) $(INSTALL_BIN)
@ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME)
diff --git a/src/Makefile.dep b/src/Makefile.dep
deleted file mode 100644
index 33e89137d..000000000
--- a/src/Makefile.dep
+++ /dev/null
@@ -1,142 +0,0 @@
-adlist.o: adlist.c adlist.h zmalloc.h
-ae.o: ae.c ae.h zmalloc.h config.h ae_kqueue.c ae_epoll.c ae_select.c ae_evport.c
-ae_epoll.o: ae_epoll.c
-ae_evport.o: ae_evport.c
-ae_kqueue.o: ae_kqueue.c
-ae_select.o: ae_select.c
-anet.o: anet.c fmacros.h anet.h
-aof.o: aof.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- bio.h
-bio.o: bio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- bio.h
-bitops.o: bitops.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-blocked.o: blocked.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-cluster.o: cluster.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h endianconv.h
-config.o: config.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h
-crc16.o: crc16.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-crc64.o: crc64.c
-db.o: db.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h
-debug.o: debug.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- sha1.h crc64.h bio.h
-dict.o: dict.c fmacros.h dict.h zmalloc.h redisassert.h
-endianconv.o: endianconv.c
-hyperloglog.o: hyperloglog.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-intset.o: intset.c intset.h zmalloc.h endianconv.h config.h
-latency.o: latency.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-lzf_c.o: lzf_c.c lzfP.h
-lzf_d.o: lzf_d.c lzfP.h
-memtest.o: memtest.c config.h
-multi.o: multi.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-networking.o: networking.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-notify.o: notify.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-object.o: object.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-pqsort.o: pqsort.c
-pubsub.o: pubsub.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-rand.o: rand.c
-rdb.o: rdb.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- lzf.h zipmap.h endianconv.h
-redis-benchmark.o: redis-benchmark.c fmacros.h ae.h \
- ../deps/hiredis/hiredis.h sds.h adlist.h zmalloc.h
-redis-check-aof.o: redis-check-aof.c fmacros.h config.h
-redis-check-dump.o: redis-check-dump.c lzf.h crc64.h
-redis-cli.o: redis-cli.c fmacros.h version.h ../deps/hiredis/hiredis.h \
- sds.h zmalloc.h ../deps/linenoise/linenoise.h help.h anet.h ae.h
-redis.o: redis.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- cluster.h slowlog.h bio.h asciilogo.h
-release.o: release.c release.h version.h crc64.h
-replication.o: replication.c redis.h fmacros.h config.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h sds.h dict.h \
- adlist.h zmalloc.h anet.h ziplist.h intset.h version.h util.h latency.h \
- sparkline.h rdb.h rio.h
-rio.o: rio.c fmacros.h rio.h sds.h util.h crc64.h config.h redis.h \
- ../deps/lua/src/lua.h ../deps/lua/src/luaconf.h ae.h dict.h adlist.h \
- zmalloc.h anet.h ziplist.h intset.h version.h latency.h sparkline.h \
- rdb.h
-scripting.o: scripting.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- sha1.h rand.h ../deps/lua/src/lauxlib.h ../deps/lua/src/lua.h \
- ../deps/lua/src/lualib.h
-sds.o: sds.c sds.h zmalloc.h
-sentinel.o: sentinel.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- ../deps/hiredis/hiredis.h ../deps/hiredis/async.h \
- ../deps/hiredis/hiredis.h
-setproctitle.o: setproctitle.c
-sha1.o: sha1.c sha1.h config.h
-slowlog.o: slowlog.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- slowlog.h
-sort.o: sort.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h \
- pqsort.h
-sparkline.o: sparkline.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-syncio.o: syncio.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_hash.o: t_hash.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_list.o: t_list.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_set.o: t_set.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_string.o: t_string.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-t_zset.o: t_zset.c redis.h fmacros.h config.h ../deps/lua/src/lua.h \
- ../deps/lua/src/luaconf.h ae.h sds.h dict.h adlist.h zmalloc.h anet.h \
- ziplist.h intset.h version.h util.h latency.h sparkline.h rdb.h rio.h
-util.o: util.c fmacros.h util.h sds.h
-ziplist.o: ziplist.c zmalloc.h util.h sds.h ziplist.h endianconv.h \
- config.h redisassert.h
-zipmap.o: zipmap.c zmalloc.h endianconv.h config.h
-zmalloc.o: zmalloc.c config.h zmalloc.h
diff --git a/src/adlist.c b/src/adlist.c
index b4cc785be..ec5f8bbf4 100644
--- a/src/adlist.c
+++ b/src/adlist.c
@@ -52,10 +52,8 @@ list *listCreate(void)
return list;
}
-/* Free the whole list.
- *
- * This function can't fail. */
-void listRelease(list *list)
+/* Remove all the elements from the list without destroying the list itself. */
+void listEmpty(list *list)
{
unsigned long len;
listNode *current, *next;
@@ -68,6 +66,16 @@ void listRelease(list *list)
zfree(current);
current = next;
}
+ list->head = list->tail = NULL;
+ list->len = 0;
+}
+
+/* Free the whole list.
+ *
+ * This function can't fail. */
+void listRelease(list *list)
+{
+ listEmpty(list);
zfree(list);
}
@@ -242,7 +250,7 @@ listNode *listNext(listIter *iter)
list *listDup(list *orig)
{
list *copy;
- listIter *iter;
+ listIter iter;
listNode *node;
if ((copy = listCreate()) == NULL)
@@ -250,26 +258,23 @@ list *listDup(list *orig)
copy->dup = orig->dup;
copy->free = orig->free;
copy->match = orig->match;
- iter = listGetIterator(orig, AL_START_HEAD);
- while((node = listNext(iter)) != NULL) {
+ listRewind(orig, &iter);
+ while((node = listNext(&iter)) != NULL) {
void *value;
if (copy->dup) {
value = copy->dup(node->value);
if (value == NULL) {
listRelease(copy);
- listReleaseIterator(iter);
return NULL;
}
} else
value = node->value;
if (listAddNodeTail(copy, value) == NULL) {
listRelease(copy);
- listReleaseIterator(iter);
return NULL;
}
}
- listReleaseIterator(iter);
return copy;
}
@@ -284,24 +289,21 @@ list *listDup(list *orig)
* NULL is returned. */
listNode *listSearchKey(list *list, void *key)
{
- listIter *iter;
+ listIter iter;
listNode *node;
- iter = listGetIterator(list, AL_START_HEAD);
- while((node = listNext(iter)) != NULL) {
+ listRewind(list, &iter);
+ while((node = listNext(&iter)) != NULL) {
if (list->match) {
if (list->match(node->value, key)) {
- listReleaseIterator(iter);
return node;
}
} else {
if (key == node->value) {
- listReleaseIterator(iter);
return node;
}
}
}
- listReleaseIterator(iter);
return NULL;
}
@@ -339,3 +341,22 @@ void listRotate(list *list) {
tail->next = list->head;
list->head = tail;
}
+
+/* Add all the elements of the list 'o' at the end of the
+ * list 'l'. The list 'other' remains empty but otherwise valid. */
+void listJoin(list *l, list *o) {
+ if (o->head)
+ o->head->prev = l->tail;
+
+ if (l->tail)
+ l->tail->next = o->head;
+ else
+ l->head = o->head;
+
+ if (o->tail) l->tail = o->tail;
+ l->len += o->len;
+
+ /* Setup other as an empty list. */
+ o->head = o->tail = NULL;
+ o->len = 0;
+}
diff --git a/src/adlist.h b/src/adlist.h
index be322552f..c954fac87 100644
--- a/src/adlist.h
+++ b/src/adlist.h
@@ -72,6 +72,7 @@ typedef struct list {
/* Prototypes */
list *listCreate(void);
void listRelease(list *list);
+void listEmpty(list *list);
list *listAddNodeHead(list *list, void *value);
list *listAddNodeTail(list *list, void *value);
list *listInsertNode(list *list, listNode *old_node, void *value, int after);
@@ -85,6 +86,7 @@ listNode *listIndex(list *list, long index);
void listRewind(list *list, listIter *li);
void listRewindTail(list *list, listIter *li);
void listRotate(list *list);
+void listJoin(list *l, list *o);
/* Directions for iterators */
#define AL_START_HEAD 0
diff --git a/src/ae.c b/src/ae.c
index 63a1ab4eb..1ea671569 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -75,6 +75,7 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
eventLoop->stop = 0;
eventLoop->maxfd = -1;
eventLoop->beforesleep = NULL;
+ eventLoop->aftersleep = NULL;
if (aeApiCreate(eventLoop) == -1) goto err;
/* Events with mask == AE_NONE are not set. So let's initialize the
* vector with it. */
@@ -158,6 +159,10 @@ void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
aeFileEvent *fe = &eventLoop->events[fd];
if (fe->mask == AE_NONE) return;
+ /* We want to always remove AE_BARRIER if set when AE_WRITABLE
+ * is removed. */
+ if (mask & AE_WRITABLE) mask |= AE_BARRIER;
+
aeApiDelEvent(eventLoop, fd, mask);
fe->mask = fe->mask & (~mask);
if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
@@ -214,28 +219,22 @@ long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
te->timeProc = proc;
te->finalizerProc = finalizerProc;
te->clientData = clientData;
+ te->prev = NULL;
te->next = eventLoop->timeEventHead;
+ if (te->next)
+ te->next->prev = te;
eventLoop->timeEventHead = te;
return id;
}
int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
{
- aeTimeEvent *te, *prev = NULL;
-
- te = eventLoop->timeEventHead;
+ aeTimeEvent *te = eventLoop->timeEventHead;
while(te) {
if (te->id == id) {
- if (prev == NULL)
- eventLoop->timeEventHead = te->next;
- else
- prev->next = te->next;
- if (te->finalizerProc)
- te->finalizerProc(eventLoop, te->clientData);
- zfree(te);
+ te->id = AE_DELETED_EVENT_ID;
return AE_OK;
}
- prev = te;
te = te->next;
}
return AE_ERR; /* NO event with the specified ID found */
@@ -297,6 +296,27 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
long now_sec, now_ms;
long long id;
+ /* Remove events scheduled for deletion. */
+ if (te->id == AE_DELETED_EVENT_ID) {
+ aeTimeEvent *next = te->next;
+ if (te->prev)
+ te->prev->next = te->next;
+ else
+ eventLoop->timeEventHead = te->next;
+ if (te->next)
+ te->next->prev = te->prev;
+ if (te->finalizerProc)
+ te->finalizerProc(eventLoop, te->clientData);
+ zfree(te);
+ te = next;
+ continue;
+ }
+
+ /* Make sure we don't process time events created by time events in
+ * this iteration. Note that this check is currently useless: we always
+ * add new timers on the head, however if we change the implementation
+ * detail, this check may be useful again: we keep it here for future
+ * defense. */
if (te->id > maxId) {
te = te->next;
continue;
@@ -310,28 +330,13 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
id = te->id;
retval = te->timeProc(eventLoop, id, te->clientData);
processed++;
- /* After an event is processed our time event list may
- * no longer be the same, so we restart from head.
- * Still we make sure to don't process events registered
- * by event handlers itself in order to don't loop forever.
- * To do so we saved the max ID we want to handle.
- *
- * FUTURE OPTIMIZATIONS:
- * Note that this is NOT great algorithmically. Redis uses
- * a single time event so it's not a problem but the right
- * way to do this is to add the new elements on head, and
- * to flag deleted elements in a special way for later
- * deletion (putting references to the nodes to delete into
- * another linked list). */
if (retval != AE_NOMORE) {
aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
} else {
- aeDeleteTimeEvent(eventLoop, id);
+ te->id = AE_DELETED_EVENT_ID;
}
- te = eventLoop->timeEventHead;
- } else {
- te = te->next;
}
+ te = te->next;
}
return processed;
}
@@ -346,6 +351,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
* if flags has AE_FILE_EVENTS set, file events are processed.
* if flags has AE_TIME_EVENTS set, time events are processed.
* if flags has AE_DONT_WAIT set the function returns ASAP until all
+ * if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
* the events that's possible to process without to wait are processed.
*
* The function returns the number of events processed. */
@@ -371,19 +377,22 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
if (shortest) {
long now_sec, now_ms;
- /* Calculate the time missing for the nearest
- * timer to fire. */
aeGetTime(&now_sec, &now_ms);
tvp = &tv;
- tvp->tv_sec = shortest->when_sec - now_sec;
- if (shortest->when_ms < now_ms) {
- tvp->tv_usec = ((shortest->when_ms+1000) - now_ms)*1000;
- tvp->tv_sec --;
+
+ /* How many milliseconds we need to wait for the next
+ * time event to fire? */
+ long long ms =
+ (shortest->when_sec - now_sec)*1000 +
+ shortest->when_ms - now_ms;
+
+ if (ms > 0) {
+ tvp->tv_sec = ms/1000;
+ tvp->tv_usec = (ms % 1000)*1000;
} else {
- tvp->tv_usec = (shortest->when_ms - now_ms)*1000;
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
}
- if (tvp->tv_sec < 0) tvp->tv_sec = 0;
- if (tvp->tv_usec < 0) tvp->tv_usec = 0;
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
@@ -397,24 +406,61 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
}
}
+ /* Call the multiplexing API, will return only on timeout or when
+ * some event fires. */
numevents = aeApiPoll(eventLoop, tvp);
+
+ /* After sleep callback. */
+ if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
+ eventLoop->aftersleep(eventLoop);
+
for (j = 0; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;
int fd = eventLoop->fired[j].fd;
- int rfired = 0;
+ int fired = 0; /* Number of events fired for current fd. */
- /* note the fe->mask & mask & ... code: maybe an already processed
- * event removed an element that fired and we still didn't
- * processed, so we check if the event is still valid. */
- if (fe->mask & mask & AE_READABLE) {
- rfired = 1;
+ /* Normally we execute the readable event first, and the writable
+ * event laster. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if AE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsynching a file to disk,
+ * before replying to a client. */
+ int invert = fe->mask & AE_BARRIER;
+
+ /* Note the "fe->mask & mask & ..." code: maybe an already
+ * processed event removed an element that fired and we still
+ * didn't processed, so we check if the event is still valid.
+ *
+ * Fire the readable event if the call sequence is not
+ * inverted. */
+ if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
}
+
+ /* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
- if (!rfired || fe->wfileProc != fe->rfileProc)
+ if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
+ }
+ }
+
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert && fe->mask & mask & AE_READABLE) {
+ if (!fired || fe->wfileProc != fe->rfileProc) {
+ fe->rfileProc(eventLoop,fd,fe->clientData,mask);
+ fired++;
+ }
}
+
processed++;
}
}
@@ -439,7 +485,7 @@ int aeWait(int fd, int mask, long long milliseconds) {
if ((retval = poll(&pfd, 1, milliseconds))== 1) {
if (pfd.revents & POLLIN) retmask |= AE_READABLE;
if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
- if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
+ if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
return retmask;
} else {
@@ -452,7 +498,7 @@ void aeMain(aeEventLoop *eventLoop) {
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
eventLoop->beforesleep(eventLoop);
- aeProcessEvents(eventLoop, AE_ALL_EVENTS);
+ aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
@@ -463,3 +509,7 @@ char *aeGetApiName(void) {
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
eventLoop->beforesleep = beforesleep;
}
+
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
+ eventLoop->aftersleep = aftersleep;
+}
diff --git a/src/ae.h b/src/ae.h
index 15ca1b5e7..184fe3d1b 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -33,19 +33,28 @@
#ifndef __AE_H__
#define __AE_H__
+#include <time.h>
+
#define AE_OK 0
#define AE_ERR -1
-#define AE_NONE 0
-#define AE_READABLE 1
-#define AE_WRITABLE 2
+#define AE_NONE 0 /* No events registered. */
+#define AE_READABLE 1 /* Fire when descriptor is readable. */
+#define AE_WRITABLE 2 /* Fire when descriptor is writable. */
+#define AE_BARRIER 4 /* With WRITABLE, never fire the event if the
+ READABLE event already fired in the same event
+ loop iteration. Useful when you want to persist
+ things to disk before sending replies, and want
+ to do that in a group fashion. */
#define AE_FILE_EVENTS 1
#define AE_TIME_EVENTS 2
#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS)
#define AE_DONT_WAIT 4
+#define AE_CALL_AFTER_SLEEP 8
#define AE_NOMORE -1
+#define AE_DELETED_EVENT_ID -1
/* Macros */
#define AE_NOTUSED(V) ((void) V)
@@ -60,7 +69,7 @@ typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
/* File event structure */
typedef struct aeFileEvent {
- int mask; /* one of AE_(READABLE|WRITABLE) */
+ int mask; /* one of AE_(READABLE|WRITABLE|BARRIER) */
aeFileProc *rfileProc;
aeFileProc *wfileProc;
void *clientData;
@@ -74,6 +83,7 @@ typedef struct aeTimeEvent {
aeTimeProc *timeProc;
aeEventFinalizerProc *finalizerProc;
void *clientData;
+ struct aeTimeEvent *prev;
struct aeTimeEvent *next;
} aeTimeEvent;
@@ -95,6 +105,7 @@ typedef struct aeEventLoop {
int stop;
void *apidata; /* This is used for polling API specific data */
aeBeforeSleepProc *beforesleep;
+ aeBeforeSleepProc *aftersleep;
} aeEventLoop;
/* Prototypes */
@@ -114,6 +125,7 @@ int aeWait(int fd, int mask, long long milliseconds);
void aeMain(aeEventLoop *eventLoop);
char *aeGetApiName(void);
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
+void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
int aeGetSetSize(aeEventLoop *eventLoop);
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index da9c7b906..410aac70d 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -72,7 +72,7 @@ static void aeApiFree(aeEventLoop *eventLoop) {
static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
aeApiState *state = eventLoop->apidata;
- struct epoll_event ee;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
/* If the fd was already monitored for some event, we need a MOD
* operation. Otherwise we need an ADD operation. */
int op = eventLoop->events[fd].mask == AE_NONE ?
@@ -82,7 +82,6 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
mask |= eventLoop->events[fd].mask; /* Merge old events */
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
- ee.data.u64 = 0; /* avoid valgrind warning */
ee.data.fd = fd;
if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
return 0;
@@ -90,13 +89,12 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
aeApiState *state = eventLoop->apidata;
- struct epoll_event ee;
+ struct epoll_event ee = {0}; /* avoid valgrind warning */
int mask = eventLoop->events[fd].mask & (~delmask);
ee.events = 0;
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
- ee.data.u64 = 0; /* avoid valgrind warning */
ee.data.fd = fd;
if (mask != AE_NONE) {
epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
diff --git a/src/ae_select.c b/src/ae_select.c
index e2b7a9e8a..c039a8ea3 100644
--- a/src/ae_select.c
+++ b/src/ae_select.c
@@ -29,6 +29,7 @@
*/
+#include <sys/select.h>
#include <string.h>
typedef struct aeApiState {
diff --git a/src/anet.c b/src/anet.c
index 76e9b67ae..2981fca13 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -237,7 +237,7 @@ int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len) {
static int anetSetReuseAddr(char *err, int fd) {
int yes = 1;
- /* Make sure connection-intensive things like the redis benckmark
+ /* Make sure connection-intensive things like the redis benchmark
* will be able to close/open sockets a zillion of times */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) {
anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno));
@@ -264,6 +264,7 @@ static int anetCreateSocket(char *err, int domain) {
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
+#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
static int anetTcpGenericConnect(char *err, char *addr, int port,
char *source_addr, int flags)
{
@@ -295,7 +296,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0)
{
anetSetError(err, "%s", gai_strerror(rv));
- goto end;
+ goto error;
}
for (b = bservinfo; b != NULL; b = b->ai_next) {
if (bind(s,b->ai_addr,b->ai_addrlen) != -1) {
@@ -306,7 +307,7 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
freeaddrinfo(bservinfo);
if (!bound) {
anetSetError(err, "bind: %s", strerror(errno));
- goto end;
+ goto error;
}
}
if (connect(s,p->ai_addr,p->ai_addrlen) == -1) {
@@ -331,9 +332,17 @@ error:
close(s);
s = ANET_ERR;
}
+
end:
freeaddrinfo(servinfo);
- return s;
+
+ /* Handle best effort binding: if a binding address was used, but it is
+ * not possible to create a socket, try again without a binding address. */
+ if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) {
+ return anetTcpGenericConnect(err,addr,port,NULL,flags);
+ } else {
+ return s;
+ }
}
int anetTcpConnect(char *err, char *addr, int port)
@@ -346,9 +355,18 @@ int anetTcpNonBlockConnect(char *err, char *addr, int port)
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
}
-int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr)
+int anetTcpNonBlockBindConnect(char *err, char *addr, int port,
+ char *source_addr)
+{
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK);
+}
+
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port,
+ char *source_addr)
{
- return anetTcpGenericConnect(err,addr,port,source_addr,ANET_CONNECT_NONBLOCK);
+ return anetTcpGenericConnect(err,addr,port,source_addr,
+ ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
}
int anetUnixGenericConnect(char *err, char *path, int flags)
@@ -362,8 +380,10 @@ int anetUnixGenericConnect(char *err, char *path, int flags)
sa.sun_family = AF_LOCAL;
strncpy(sa.sun_path,path,sizeof(sa.sun_path)-1);
if (flags & ANET_CONNECT_NONBLOCK) {
- if (anetNonBlock(err,s) != ANET_OK)
+ if (anetNonBlock(err,s) != ANET_OK) {
+ close(s);
return ANET_ERR;
+ }
}
if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) {
if (errno == EINPROGRESS &&
@@ -391,7 +411,7 @@ int anetUnixNonBlockConnect(char *err, char *path)
* (unless error or EOF condition is encountered) */
int anetRead(int fd, char *buf, int count)
{
- int nread, totlen = 0;
+ ssize_t nread, totlen = 0;
while(totlen != count) {
nread = read(fd,buf,count-totlen);
if (nread == 0) return totlen;
@@ -402,11 +422,11 @@ int anetRead(int fd, char *buf, int count)
return totlen;
}
-/* Like write(2) but make sure 'count' is read before to return
+/* Like write(2) but make sure 'count' is written before to return
* (unless error is encountered) */
int anetWrite(int fd, char *buf, int count)
{
- int nwritten, totlen = 0;
+ ssize_t nwritten, totlen = 0;
while(totlen != count) {
nwritten = write(fd,buf,count-totlen);
if (nwritten == 0) return totlen;
@@ -444,7 +464,7 @@ static int anetV6Only(char *err, int s) {
static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog)
{
- int s, rv;
+ int s = -1, rv;
char _port[6]; /* strlen("65535") */
struct addrinfo hints, *servinfo, *p;
@@ -464,15 +484,16 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl
if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error;
if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
- if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog) == ANET_ERR) goto error;
+ if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog) == ANET_ERR) s = ANET_ERR;
goto end;
}
if (p == NULL) {
- anetSetError(err, "unable to bind socket");
+ anetSetError(err, "unable to bind socket, errno: %d", errno);
goto error;
}
error:
+ if (s != -1) close(s);
s = ANET_ERR;
end:
freeaddrinfo(servinfo);
diff --git a/src/anet.h b/src/anet.h
index ea9c77f2e..7142f78d2 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -31,6 +31,8 @@
#ifndef ANET_H
#define ANET_H
+#include <sys/types.h>
+
#define ANET_OK 0
#define ANET_ERR -1
#define ANET_ERR_LEN 256
@@ -50,6 +52,7 @@
int anetTcpConnect(char *err, char *addr, int port);
int anetTcpNonBlockConnect(char *err, char *addr, int port);
int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr);
+int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port, char *source_addr);
int anetUnixConnect(char *err, char *path);
int anetUnixNonBlockConnect(char *err, char *path);
int anetRead(int fd, char *buf, int count);
diff --git a/src/aof.c b/src/aof.c
index f5a90a12c..f8f26bdfe 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "bio.h"
#include "rio.h"
@@ -38,6 +38,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/wait.h>
+#include <sys/param.h>
void aofUpdateCurrentSize(void);
void aofClosePipes(void);
@@ -95,10 +96,10 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
listNode *ln;
aofrwblock *block;
ssize_t nwritten;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(fd);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(privdata);
+ UNUSED(mask);
while(1) {
ln = listFirst(server.aof_rewrite_buf_blocks);
@@ -114,6 +115,7 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
if (nwritten <= 0) return;
memmove(block->buf,block->buf+nwritten,block->used-nwritten);
block->used -= nwritten;
+ block->free += nwritten;
}
if (block->used == 0) listDelNode(server.aof_rewrite_buf_blocks,ln);
}
@@ -150,9 +152,9 @@ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) {
* as a notice or warning. */
numblocks = listLength(server.aof_rewrite_buf_blocks);
if (((numblocks+1) % 10) == 0) {
- int level = ((numblocks+1) % 100) == 0 ? REDIS_WARNING :
- REDIS_NOTICE;
- redisLog(level,"Background AOF buffer size: %lu MB",
+ int level = ((numblocks+1) % 100) == 0 ? LL_WARNING :
+ LL_NOTICE;
+ serverLog(level,"Background AOF buffer size: %lu MB",
aofRewriteBufferSize()/(1024*1024));
}
}
@@ -198,57 +200,113 @@ ssize_t aofRewriteBufferWrite(int fd) {
/* Starts a background task that performs fsync() against the specified
* file descriptor (the one of the AOF file) in another thread. */
void aof_background_fsync(int fd) {
- bioCreateBackgroundJob(REDIS_BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
+ bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);
+}
+
+/* Kills an AOFRW child process if exists */
+static void killAppendOnlyChild(void) {
+ int statloc;
+ /* No AOFRW child? return. */
+ if (server.aof_child_pid == -1) return;
+ /* Kill AOFRW child, wait for child exit. */
+ serverLog(LL_NOTICE,"Killing running AOF rewrite child: %ld",
+ (long) server.aof_child_pid);
+ if (kill(server.aof_child_pid,SIGUSR1) != -1) {
+ while(wait3(&statloc,0,NULL) != server.aof_child_pid);
+ }
+ /* Reset the buffer accumulating changes while the child saves. */
+ aofRewriteBufferReset();
+ aofRemoveTempFile(server.aof_child_pid);
+ server.aof_child_pid = -1;
+ server.aof_rewrite_time_start = -1;
+ /* Close pipes used for IPC between the two processes. */
+ aofClosePipes();
}
/* Called when the user switches from "appendonly yes" to "appendonly no"
* at runtime using the CONFIG command. */
void stopAppendOnly(void) {
- redisAssert(server.aof_state != REDIS_AOF_OFF);
+ serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
- aof_fsync(server.aof_fd);
+ redis_fsync(server.aof_fd);
close(server.aof_fd);
server.aof_fd = -1;
server.aof_selected_db = -1;
- server.aof_state = REDIS_AOF_OFF;
- /* rewrite operation in progress? kill it, wait child exit */
- if (server.aof_child_pid != -1) {
- int statloc;
-
- redisLog(REDIS_NOTICE,"Killing running AOF rewrite child: %ld",
- (long) server.aof_child_pid);
- if (kill(server.aof_child_pid,SIGUSR1) != -1)
- wait3(&statloc,0,NULL);
- /* reset the buffer accumulating changes while the child saves */
- aofRewriteBufferReset();
- aofRemoveTempFile(server.aof_child_pid);
- server.aof_child_pid = -1;
- server.aof_rewrite_time_start = -1;
- /* close pipes used for IPC between the two processes. */
- aofClosePipes();
- }
+ server.aof_state = AOF_OFF;
+ killAppendOnlyChild();
}
/* Called when the user switches from "appendonly no" to "appendonly yes"
* at runtime using the CONFIG command. */
int startAppendOnly(void) {
- server.aof_last_fsync = server.unixtime;
- server.aof_fd = open(server.aof_filename,O_WRONLY|O_APPEND|O_CREAT,0644);
- redisAssert(server.aof_state == REDIS_AOF_OFF);
- if (server.aof_fd == -1) {
- redisLog(REDIS_WARNING,"Redis needs to enable the AOF but can't open the append only file: %s",strerror(errno));
- return REDIS_ERR;
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
+ int newfd;
+
+ newfd = open(server.aof_filename,O_WRONLY|O_APPEND|O_CREAT,0644);
+ serverAssert(server.aof_state == AOF_OFF);
+ if (newfd == -1) {
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+
+ serverLog(LL_WARNING,
+ "Redis needs to enable the AOF but can't open the "
+ "append only file %s (in server root dir %s): %s",
+ server.aof_filename,
+ cwdp ? cwdp : "unknown",
+ strerror(errno));
+ return C_ERR;
}
- if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
- close(server.aof_fd);
- redisLog(REDIS_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
- return REDIS_ERR;
+ if (server.rdb_child_pid != -1) {
+ server.aof_rewrite_scheduled = 1;
+ serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible.");
+ } else {
+ /* If there is a pending AOF rewrite, we need to switch it off and
+ * start a new one: the old one cannot be reused because it is not
+ * accumulating the AOF buffer. */
+ if (server.aof_child_pid != -1) {
+ serverLog(LL_WARNING,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now.");
+ killAppendOnlyChild();
+ }
+ if (rewriteAppendOnlyFileBackground() == C_ERR) {
+ close(newfd);
+ serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
+ return C_ERR;
+ }
}
/* We correctly switched on AOF, now wait for the rewrite to be complete
* in order to append data on disk. */
- server.aof_state = REDIS_AOF_WAIT_REWRITE;
- return REDIS_OK;
+ server.aof_state = AOF_WAIT_REWRITE;
+ server.aof_last_fsync = server.unixtime;
+ server.aof_fd = newfd;
+ return C_OK;
+}
+
+/* This is a wrapper to the write syscall in order to retry on short writes
+ * or if the syscall gets interrupted. It could look strange that we retry
+ * on short writes given that we are writing to a block device: normally if
+ * the first call is short, there is a end-of-space condition, so the next
+ * is likely to fail. However apparently in modern systems this is no longer
+ * true, and in general it looks just more resilient to retry the write. If
+ * there is an actual error condition we'll get it at the next try. */
+ssize_t aofWrite(int fd, const char *buf, size_t len) {
+ ssize_t nwritten = 0, totwritten = 0;
+
+ while(len) {
+ nwritten = write(fd, buf, len);
+
+ if (nwritten < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ return totwritten ? totwritten : -1;
+ }
+
+ len -= nwritten;
+ buf += nwritten;
+ totwritten += nwritten;
+ }
+
+ return totwritten;
}
/* Write the append only file buffer on disk.
@@ -278,7 +336,7 @@ void flushAppendOnlyFile(int force) {
if (sdslen(server.aof_buf) == 0) return;
if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
- sync_in_progress = bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC) != 0;
+ sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;
if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {
/* With this append fsync policy we do background fsyncing.
@@ -298,7 +356,7 @@ void flushAppendOnlyFile(int force) {
/* Otherwise fall trough, and go write since we can't wait
* over two seconds. */
server.aof_delayed_fsync++;
- redisLog(REDIS_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
+ serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
}
}
/* We want to perform a single write. This should be guaranteed atomic
@@ -308,7 +366,7 @@ void flushAppendOnlyFile(int force) {
* or alike */
latencyStartMonitor(latency);
- nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
+ nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
latencyEndMonitor(latency);
/* We want to capture different events for delayed writes:
* when the delay happens with a pending fsync, or with a saving child
@@ -327,7 +385,7 @@ void flushAppendOnlyFile(int force) {
/* We performed the write so reset the postponed flush sentinel to zero. */
server.aof_flush_postponed_start = 0;
- if (nwritten != (signed)sdslen(server.aof_buf)) {
+ if (nwritten != (ssize_t)sdslen(server.aof_buf)) {
static time_t last_write_error_log = 0;
int can_log = 0;
@@ -340,13 +398,13 @@ void flushAppendOnlyFile(int force) {
/* Log the AOF write error and record the error code. */
if (nwritten == -1) {
if (can_log) {
- redisLog(REDIS_WARNING,"Error writing to the AOF file: %s",
+ serverLog(LL_WARNING,"Error writing to the AOF file: %s",
strerror(errno));
server.aof_last_write_errno = errno;
}
} else {
if (can_log) {
- redisLog(REDIS_WARNING,"Short write while writing to "
+ serverLog(LL_WARNING,"Short write while writing to "
"the AOF file: (nwritten=%lld, "
"expected=%lld)",
(long long)nwritten,
@@ -355,7 +413,7 @@ void flushAppendOnlyFile(int force) {
if (ftruncate(server.aof_fd, server.aof_current_size) == -1) {
if (can_log) {
- redisLog(REDIS_WARNING, "Could not remove short write "
+ serverLog(LL_WARNING, "Could not remove short write "
"from the append-only file. Redis may refuse "
"to load the AOF the next time it starts. "
"ftruncate: %s", strerror(errno));
@@ -374,13 +432,13 @@ void flushAppendOnlyFile(int force) {
* reply for the client is already in the output buffers, and we
* have the contract with the user that on acknowledged write data
* is synced on disk. */
- redisLog(REDIS_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
+ serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
exit(1);
} else {
/* Recover from failed write leaving data into the buffer. However
* set an error to stop accepting writes as long as the error
* condition is not cleared. */
- server.aof_last_write_status = REDIS_ERR;
+ server.aof_last_write_status = C_ERR;
/* Trim the sds buffer if there was a partial write, and there
* was no way to undo it with ftruncate(2). */
@@ -393,10 +451,10 @@ void flushAppendOnlyFile(int force) {
} else {
/* Successful write(2). If AOF was in error state, restore the
* OK state and log the event. */
- if (server.aof_last_write_status == REDIS_ERR) {
- redisLog(REDIS_WARNING,
+ if (server.aof_last_write_status == C_ERR) {
+ serverLog(LL_WARNING,
"AOF write error looks solved, Redis can write again.");
- server.aof_last_write_status = REDIS_OK;
+ server.aof_last_write_status = C_OK;
}
}
server.aof_current_size += nwritten;
@@ -418,10 +476,10 @@ void flushAppendOnlyFile(int force) {
/* Perform the fsync if needed. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
- /* aof_fsync is defined as fdatasync() for Linux in order to avoid
+ /* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
- aof_fsync(server.aof_fd); /* Let's try to get this data on the disk */
+ redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_last_fsync = server.unixtime;
@@ -521,6 +579,22 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
decrRefCount(tmpargv[0]);
buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
+ } else if (cmd->proc == setCommand && argc > 3) {
+ int i;
+ robj *exarg = NULL, *pxarg = NULL;
+ /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
+ buf = catAppendOnlyGenericCommand(buf,3,argv);
+ for (i = 3; i < argc; i ++) {
+ if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
+ if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
+ }
+ serverAssert(!(exarg && pxarg));
+ if (exarg)
+ buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],
+ exarg);
+ if (pxarg)
+ buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
+ pxarg);
} else {
/* All the other commands don't need translation or need the
* same translation already operated in the command vector
@@ -531,7 +605,7 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
/* Append to the AOF buffer. This will be flushed on disk just before
* of re-entering the event loop, so before the client will get a
* positive reply about the operation performed. */
- if (server.aof_state == REDIS_AOF_ON)
+ if (server.aof_state == AOF_ON)
server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));
/* If a background append only file rewriting is in progress we want to
@@ -550,8 +624,8 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
/* In Redis commands are always executed in the context of a client, so in
* order to load the append only file we need to create a fake client. */
-struct redisClient *createFakeClient(void) {
- struct redisClient *c = zmalloc(sizeof(*c));
+struct client *createFakeClient(void) {
+ struct client *c = zmalloc(sizeof(*c));
selectDb(c,0);
c->fd = -1;
@@ -562,22 +636,22 @@ struct redisClient *createFakeClient(void) {
c->argv = NULL;
c->bufpos = 0;
c->flags = 0;
- c->btype = REDIS_BLOCKED_NONE;
+ c->btype = BLOCKED_NONE;
/* We set the fake client as a slave waiting for the synchronization
* so that Redis will not try to send replies to this client. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
c->reply = listCreate();
c->reply_bytes = 0;
c->obuf_soft_limit_reached_time = 0;
c->watched_keys = listCreate();
c->peerid = NULL;
- listSetFreeMethod(c->reply,decrRefCountVoid);
+ listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
initClientMultiState(c);
return c;
}
-void freeFakeClientArgv(struct redisClient *c) {
+void freeFakeClientArgv(struct client *c) {
int j;
for (j = 0; j < c->argc; j++)
@@ -585,7 +659,7 @@ void freeFakeClientArgv(struct redisClient *c) {
zfree(c->argv);
}
-void freeFakeClient(struct redisClient *c) {
+void freeFakeClient(struct client *c) {
sdsfree(c->querybuf);
listRelease(c->reply);
listRelease(c->watched_keys);
@@ -593,35 +667,61 @@ void freeFakeClient(struct redisClient *c) {
zfree(c);
}
-/* Replay the append log file. On success REDIS_OK is returned. On non fatal
- * error (the append only file is zero-length) REDIS_ERR is returned. On
+/* Replay the append log file. On success C_OK is returned. On non fatal
+ * error (the append only file is zero-length) C_ERR is returned. On
* fatal error an error message is logged and the program exists. */
int loadAppendOnlyFile(char *filename) {
- struct redisClient *fakeClient;
+ struct client *fakeClient;
FILE *fp = fopen(filename,"r");
struct redis_stat sb;
int old_aof_state = server.aof_state;
long loops = 0;
- off_t valid_up_to = 0; /* Offset of the latest well-formed command loaded. */
+ off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */
+
+ if (fp == NULL) {
+ serverLog(LL_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
+ exit(1);
+ }
+ /* Handle a zero-length AOF file as a special case. An empty AOF file
+ * is a valid AOF because an empty server with AOF enabled will create
+ * a zero length file at startup, that will remain like that if no write
+ * operation is received. */
if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
server.aof_current_size = 0;
fclose(fp);
- return REDIS_ERR;
- }
-
- if (fp == NULL) {
- redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
- exit(1);
+ return C_ERR;
}
/* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
* to the same file we're about to read. */
- server.aof_state = REDIS_AOF_OFF;
+ server.aof_state = AOF_OFF;
fakeClient = createFakeClient();
startLoading(fp);
+ /* Check if this AOF file has an RDB preamble. In that case we need to
+ * load the RDB file and later continue loading the AOF tail. */
+ char sig[5]; /* "REDIS" */
+ if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) {
+ /* No RDB preamble, seek back at 0 offset. */
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ } else {
+ /* RDB preamble. Pass loading the RDB functions. */
+ rio rdb;
+
+ serverLog(LL_NOTICE,"Reading RDB preamble from AOF file...");
+ if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
+ rioInitWithFile(&rdb,fp);
+ if (rdbLoadRio(&rdb,NULL,1) != C_OK) {
+ serverLog(LL_WARNING,"Error reading the RDB preamble of the AOF file, AOF loading aborted");
+ goto readerr;
+ } else {
+ serverLog(LL_NOTICE,"Reading the remaining AOF tail...");
+ }
+ }
+
+ /* Read the actual AOF file, in REPL format, command by command. */
while(1) {
int argc, j;
unsigned long len;
@@ -659,14 +759,14 @@ int loadAppendOnlyFile(char *filename) {
}
if (buf[0] != '$') goto fmterr;
len = strtol(buf+1,NULL,10);
- argsds = sdsnewlen(NULL,len);
+ argsds = sdsnewlen(SDS_NOINIT,len);
if (len && fread(argsds,len,1,fp) == 0) {
sdsfree(argsds);
fakeClient->argc = j; /* Free up to j-1. */
freeFakeClientArgv(fakeClient);
goto readerr;
}
- argv[j] = createObject(REDIS_STRING,argsds);
+ argv[j] = createObject(OBJ_STRING,argsds);
if (fread(buf,2,1,fp) == 0) {
fakeClient->argc = j+1; /* Free up to j. */
freeFakeClientArgv(fakeClient);
@@ -677,73 +777,80 @@ int loadAppendOnlyFile(char *filename) {
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
if (!cmd) {
- redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", (char*)argv[0]->ptr);
+ serverLog(LL_WARNING,"Unknown command '%s' reading the append only file", (char*)argv[0]->ptr);
exit(1);
}
/* Run the command in the context of a fake client */
+ fakeClient->cmd = cmd;
cmd->proc(fakeClient);
/* The fake client should not have a reply */
- redisAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
+ serverAssert(fakeClient->bufpos == 0 && listLength(fakeClient->reply) == 0);
/* The fake client should never get blocked */
- redisAssert((fakeClient->flags & REDIS_BLOCKED) == 0);
+ serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0);
/* Clean up. Command code may have changed argv/argc so we use the
* argv/argc of the client instead of the local variables. */
freeFakeClientArgv(fakeClient);
+ fakeClient->cmd = NULL;
if (server.aof_load_truncated) valid_up_to = ftello(fp);
}
/* This point can only be reached when EOF is reached without errors.
- * If the client is in the middle of a MULTI/EXEC, log error and quit. */
- if (fakeClient->flags & REDIS_MULTI) goto uxeof;
+ * If the client is in the middle of a MULTI/EXEC, handle it as it was
+ * a short read, even if technically the protocol is correct: we want
+ * to remove the unprocessed tail and continue. */
+ if (fakeClient->flags & CLIENT_MULTI) goto uxeof;
-loaded_ok: /* DB loaded, cleanup and return REDIS_OK to the caller. */
+loaded_ok: /* DB loaded, cleanup and return C_OK to the caller. */
fclose(fp);
freeFakeClient(fakeClient);
server.aof_state = old_aof_state;
stopLoading();
aofUpdateCurrentSize();
server.aof_rewrite_base_size = server.aof_current_size;
- return REDIS_OK;
+ return C_OK;
readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */
if (!feof(fp)) {
- redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
exit(1);
}
uxeof: /* Unexpected AOF end of file. */
if (server.aof_load_truncated) {
- redisLog(REDIS_WARNING,"!!! Warning: short read while loading the AOF file !!!");
- redisLog(REDIS_WARNING,"!!! Truncating the AOF at offset %llu !!!",
+ serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file !!!");
+ serverLog(LL_WARNING,"!!! Truncating the AOF at offset %llu !!!",
(unsigned long long) valid_up_to);
if (valid_up_to == -1 || truncate(filename,valid_up_to) == -1) {
if (valid_up_to == -1) {
- redisLog(REDIS_WARNING,"Last valid command offset is invalid");
+ serverLog(LL_WARNING,"Last valid command offset is invalid");
} else {
- redisLog(REDIS_WARNING,"Error truncating the AOF file: %s",
+ serverLog(LL_WARNING,"Error truncating the AOF file: %s",
strerror(errno));
}
} else {
/* Make sure the AOF file descriptor points to the end of the
* file after the truncate call. */
if (server.aof_fd != -1 && lseek(server.aof_fd,0,SEEK_END) == -1) {
- redisLog(REDIS_WARNING,"Can't seek the end of the AOF file: %s",
+ serverLog(LL_WARNING,"Can't seek the end of the AOF file: %s",
strerror(errno));
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"AOF loaded anyway because aof-load-truncated is enabled");
goto loaded_ok;
}
}
}
- redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file. You can: 1) Make a backup of your AOF file, then use ./redis-check-aof --fix <filename>. 2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.");
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Unexpected end of file reading the append only file. You can: 1) Make a backup of your AOF file, then use ./redis-check-aof --fix <filename>. 2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.");
exit(1);
fmterr: /* Format error. */
- redisLog(REDIS_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
+ if (fakeClient) freeFakeClient(fakeClient); /* avoid valgrind warning */
+ serverLog(LL_WARNING,"Bad file format reading the append only file: make a backup of your AOF file, then use ./redis-check-aof --fix <filename>");
exit(1);
}
@@ -752,16 +859,16 @@ fmterr: /* Format error. */
* ------------------------------------------------------------------------- */
/* Delegate writing an object to writing a bulk string or bulk long long.
- * This is not placed in rio.c since that adds the redis.h dependency. */
+ * This is not placed in rio.c since that adds the server.h dependency. */
int rioWriteBulkObject(rio *r, robj *obj) {
/* Avoid using getDecodedObject to help copy-on-write (we are often
* in a child process when this function is called). */
- if (obj->encoding == REDIS_ENCODING_INT) {
+ if (obj->encoding == OBJ_ENCODING_INT) {
return rioWriteBulkLongLong(r,(long)obj->ptr);
} else if (sdsEncodedObject(obj)) {
return rioWriteBulkString(r,obj->ptr,sdslen(obj->ptr));
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
@@ -770,15 +877,15 @@ int rioWriteBulkObject(rio *r, robj *obj) {
int rewriteListObject(rio *r, robj *key, robj *o) {
long long count = 0, items = listTypeLength(o);
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklist *list = o->ptr;
quicklistIter *li = quicklistGetIterator(list, AL_START_HEAD);
quicklistEntry entry;
while (quicklistNext(li,&entry)) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"RPUSH",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
@@ -789,12 +896,12 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
} else {
if (rioWriteBulkLongLong(r,entry.longval) == 0) return 0;
}
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
quicklistReleaseIterator(li);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return 1;
}
@@ -804,44 +911,44 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
int rewriteSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = setTypeSize(o);
- if (o->encoding == REDIS_ENCODING_INTSET) {
+ if (o->encoding == OBJ_ENCODING_INTSET) {
int ii = 0;
int64_t llval;
while(intsetGet(o->ptr,ii++,&llval)) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"SADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
if (rioWriteBulkLongLong(r,llval) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
- } else if (o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
dictIterator *di = dictGetIterator(o->ptr);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds ele = dictGetKey(de);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items) == 0) return 0;
if (rioWriteBulkString(r,"SADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteBulkString(r,ele,sdslen(ele)) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 1;
}
@@ -851,7 +958,7 @@ int rewriteSetObject(rio *r, robj *key, robj *o) {
int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = zsetLength(o);
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = o->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -860,17 +967,17 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
double score;
eptr = ziplistIndex(zl,0);
- redisAssert(eptr != NULL);
+ serverAssert(eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
while (eptr != NULL) {
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
score = zzlGetScore(sptr);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"ZADD",4) == 0) return 0;
@@ -883,34 +990,34 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
if (rioWriteBulkLongLong(r,vll) == 0) return 0;
}
zzlNext(zl,&eptr,&sptr);
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
dictIterator *di = dictGetIterator(zs->dict);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds ele = dictGetKey(de);
double *score = dictGetVal(de);
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"ZADD",4) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
if (rioWriteBulkDouble(r,*score) == 0) return 0;
- if (rioWriteBulkObject(r,eleobj) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteBulkString(r,ele,sdslen(ele)) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted zset encoding");
+ serverPanic("Unknown sorted zset encoding");
}
return 1;
}
@@ -918,30 +1025,26 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) {
/* Write either the key or the value of the currently selected item of a hash.
* The 'hi' argument passes a valid Redis hash iterator.
* The 'what' filed specifies if to write a key or a value and can be
- * either REDIS_HASH_KEY or REDIS_HASH_VALUE.
+ * either OBJ_HASH_KEY or OBJ_HASH_VALUE.
*
* The function returns 0 on error, non-zero on success. */
static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
+ if (vstr)
return rioWriteBulkString(r, (char*)vstr, vlen);
- } else {
+ else
return rioWriteBulkLongLong(r, vll);
- }
-
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- hashTypeCurrentFromHashTable(hi, what, &value);
- return rioWriteBulkObject(r, value);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ return rioWriteBulkString(r, value, sdslen(value));
}
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
return 0;
}
@@ -952,19 +1055,19 @@ int rewriteHashObject(rio *r, robj *key, robj *o) {
long long count = 0, items = hashTypeLength(o);
hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
+ while (hashTypeNext(hi) != C_ERR) {
if (count == 0) {
- int cmd_items = (items > REDIS_AOF_REWRITE_ITEMS_PER_CMD) ?
- REDIS_AOF_REWRITE_ITEMS_PER_CMD : items;
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
if (rioWriteBulkCount(r,'*',2+cmd_items*2) == 0) return 0;
if (rioWriteBulkString(r,"HMSET",5) == 0) return 0;
if (rioWriteBulkObject(r,key) == 0) return 0;
}
- if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_KEY) == 0) return 0;
- if (rioWriteHashIteratorCursor(r, hi, REDIS_HASH_VALUE) == 0) return 0;
- if (++count == REDIS_AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ if (rioWriteHashIteratorCursor(r, hi, OBJ_HASH_KEY) == 0) return 0;
+ if (rioWriteHashIteratorCursor(r, hi, OBJ_HASH_VALUE) == 0) return 0;
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
@@ -973,6 +1076,134 @@ int rewriteHashObject(rio *r, robj *key, robj *o) {
return 1;
}
+/* Helper for rewriteStreamObject() that generates a bulk string into the
+ * AOF representing the ID 'id'. */
+int rioWriteBulkStreamID(rio *r,streamID *id) {
+ int retval;
+
+ sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq);
+ if ((retval = rioWriteBulkString(r,replyid,sdslen(replyid))) == 0) return 0;
+ sdsfree(replyid);
+ return retval;
+}
+
+/* Helper for rewriteStreamObject(): emit the XCLAIM needed in order to
+ * add the message described by 'nack' having the id 'rawid', into the pending
+ * list of the specified consumer. All this in the context of the specified
+ * key and group. */
+int rioWriteStreamPendingEntry(rio *r, robj *key, const char *groupname, size_t groupname_len, streamConsumer *consumer, unsigned char *rawid, streamNACK *nack) {
+ /* XCLAIM <key> <group> <consumer> 0 <id> TIME <milliseconds-unix-time>
+ RETRYCOUNT <count> JUSTID FORCE. */
+ streamID id;
+ streamDecodeID(rawid,&id);
+ if (rioWriteBulkCount(r,'*',12) == 0) return 0;
+ if (rioWriteBulkString(r,"XCLAIM",6) == 0) return 0;
+ if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0;
+ if (rioWriteBulkString(r,consumer->name,sdslen(consumer->name)) == 0) return 0;
+ if (rioWriteBulkString(r,"0",1) == 0) return 0;
+ if (rioWriteBulkStreamID(r,&id) == 0) return 0;
+ if (rioWriteBulkString(r,"TIME",4) == 0) return 0;
+ if (rioWriteBulkLongLong(r,nack->delivery_time) == 0) return 0;
+ if (rioWriteBulkString(r,"RETRYCOUNT",10) == 0) return 0;
+ if (rioWriteBulkLongLong(r,nack->delivery_count) == 0) return 0;
+ if (rioWriteBulkString(r,"JUSTID",6) == 0) return 0;
+ if (rioWriteBulkString(r,"FORCE",5) == 0) return 0;
+ return 1;
+}
+
+/* Emit the commands needed to rebuild a stream object.
+ * The function returns 0 on error, 1 on success. */
+int rewriteStreamObject(rio *r, robj *key, robj *o) {
+ stream *s = o->ptr;
+ streamIterator si;
+ streamIteratorStart(&si,s,NULL,NULL,0);
+ streamID id;
+ int64_t numfields;
+
+ /* Reconstruct the stream data using XADD commands. */
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ /* Emit a two elements array for each item. The first is
+ * the ID, the second is an array of field-value pairs. */
+
+ /* Emit the XADD <key> <id> ...fields... command. */
+ if (rioWriteBulkCount(r,'*',3+numfields*2) == 0) return 0;
+ if (rioWriteBulkString(r,"XADD",4) == 0) return 0;
+ if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (rioWriteBulkStreamID(r,&id) == 0) return 0;
+ while(numfields--) {
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(&si,&field,&value,&field_len,&value_len);
+ if (rioWriteBulkString(r,(char*)field,field_len) == 0) return 0;
+ if (rioWriteBulkString(r,(char*)value,value_len) == 0) return 0;
+ }
+ }
+
+ /* Create all the stream consumer groups. */
+ if (s->cgroups) {
+ raxIterator ri;
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *group = ri.data;
+ /* Emit the XGROUP CREATE in order to create the group. */
+ if (rioWriteBulkCount(r,'*',5) == 0) return 0;
+ if (rioWriteBulkString(r,"XGROUP",6) == 0) return 0;
+ if (rioWriteBulkString(r,"CREATE",6) == 0) return 0;
+ if (rioWriteBulkObject(r,key) == 0) return 0;
+ if (rioWriteBulkString(r,(char*)ri.key,ri.key_len) == 0) return 0;
+ if (rioWriteBulkStreamID(r,&group->last_id) == 0) return 0;
+
+ /* Generate XCLAIMs for each consumer that happens to
+ * have pending entries. Empty consumers have no semantical
+ * value so they are discarded. */
+ raxIterator ri_cons;
+ raxStart(&ri_cons,group->consumers);
+ raxSeek(&ri_cons,"^",NULL,0);
+ while(raxNext(&ri_cons)) {
+ streamConsumer *consumer = ri_cons.data;
+ /* For the current consumer, iterate all the PEL entries
+ * to emit the XCLAIM protocol. */
+ raxIterator ri_pel;
+ raxStart(&ri_pel,consumer->pel);
+ raxSeek(&ri_pel,"^",NULL,0);
+ while(raxNext(&ri_pel)) {
+ streamNACK *nack = ri_pel.data;
+ if (rioWriteStreamPendingEntry(r,key,(char*)ri.key,
+ ri.key_len,consumer,
+ ri_pel.key,nack) == 0)
+ {
+ return 0;
+ }
+ }
+ raxStop(&ri_pel);
+ }
+ raxStop(&ri_cons);
+ }
+ raxStop(&ri);
+ }
+
+ streamIteratorStop(&si);
+ return 1;
+}
+
+/* Call the module type callback in order to rewrite a data type
+ * that is exported by a module and is not handled by Redis itself.
+ * The function returns 0 on error, 1 on success. */
+int rewriteModuleObject(rio *r, robj *key, robj *o) {
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitIOContext(io,mt,r);
+ mt->aof_rewrite(&io,key,mv->value);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? 0 : 1;
+}
+
/* This function is called by the child rewriting the AOF file to read
* the difference accumulated from the parent into a buffer, that is
* concatenated at the end of the rewrite. */
@@ -988,51 +1219,22 @@ ssize_t aofReadDiffFromParent(void) {
return total;
}
-/* Write a sequence of commands able to fully rebuild the dataset into
- * "filename". Used both by REWRITEAOF and BGREWRITEAOF.
- *
- * In order to minimize the number of commands needed in the rewritten
- * log Redis uses variadic commands when possible, such as RPUSH, SADD
- * and ZADD. However at max REDIS_AOF_REWRITE_ITEMS_PER_CMD items per time
- * are inserted using a single command. */
-int rewriteAppendOnlyFile(char *filename) {
+int rewriteAppendOnlyFileRio(rio *aof) {
dictIterator *di = NULL;
dictEntry *de;
- rio aof;
- FILE *fp;
- char tmpfile[256];
- int j;
- long long now = mstime();
- char byte;
size_t processed = 0;
+ int j;
- /* Note that we have to use a different temp name here compared to the
- * one used by rewriteAppendOnlyFileBackground() function. */
- snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
- fp = fopen(tmpfile,"w");
- if (!fp) {
- redisLog(REDIS_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno));
- return REDIS_ERR;
- }
-
- server.aof_child_diff = sdsempty();
- rioInitWithFile(&aof,fp);
- if (server.aof_rewrite_incremental_fsync)
- rioSetAutoSync(&aof,REDIS_AOF_AUTOSYNC_BYTES);
for (j = 0; j < server.dbnum; j++) {
char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
- if (!di) {
- fclose(fp);
- return REDIS_ERR;
- }
/* SELECT the new DB */
- if (rioWrite(&aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
- if (rioWriteBulkLongLong(&aof,j) == 0) goto werr;
+ if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,j) == 0) goto werr;
/* Iterate this DB writing every entry */
while((de = dictNext(di)) != NULL) {
@@ -1046,42 +1248,88 @@ int rewriteAppendOnlyFile(char *filename) {
expiretime = getExpire(db,&key);
- /* If this key is already expired skip it */
- if (expiretime != -1 && expiretime < now) continue;
-
/* Save the key and associated value */
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
/* Emit a SET command */
char cmd[]="*3\r\n$3\r\nSET\r\n";
- if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
/* Key and value */
- if (rioWriteBulkObject(&aof,&key) == 0) goto werr;
- if (rioWriteBulkObject(&aof,o) == 0) goto werr;
- } else if (o->type == REDIS_LIST) {
- if (rewriteListObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_SET) {
- if (rewriteSetObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_ZSET) {
- if (rewriteSortedSetObject(&aof,&key,o) == 0) goto werr;
- } else if (o->type == REDIS_HASH) {
- if (rewriteHashObject(&aof,&key,o) == 0) goto werr;
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkObject(aof,o) == 0) goto werr;
+ } else if (o->type == OBJ_LIST) {
+ if (rewriteListObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_SET) {
+ if (rewriteSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_ZSET) {
+ if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_HASH) {
+ if (rewriteHashObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_STREAM) {
+ if (rewriteStreamObject(aof,&key,o) == 0) goto werr;
+ } else if (o->type == OBJ_MODULE) {
+ if (rewriteModuleObject(aof,&key,o) == 0) goto werr;
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
/* Save the expire time */
if (expiretime != -1) {
char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n";
- if (rioWrite(&aof,cmd,sizeof(cmd)-1) == 0) goto werr;
- if (rioWriteBulkObject(&aof,&key) == 0) goto werr;
- if (rioWriteBulkLongLong(&aof,expiretime) == 0) goto werr;
+ if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;
+ if (rioWriteBulkObject(aof,&key) == 0) goto werr;
+ if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr;
}
/* Read some diff from the parent process from time to time. */
- if (aof.processed_bytes > processed+1024*10) {
- processed = aof.processed_bytes;
+ if (aof->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES) {
+ processed = aof->processed_bytes;
aofReadDiffFromParent();
}
}
dictReleaseIterator(di);
+ di = NULL;
+ }
+ return C_OK;
+
+werr:
+ if (di) dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* Write a sequence of commands able to fully rebuild the dataset into
+ * "filename". Used both by REWRITEAOF and BGREWRITEAOF.
+ *
+ * In order to minimize the number of commands needed in the rewritten
+ * log Redis uses variadic commands when possible, such as RPUSH, SADD
+ * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time
+ * are inserted using a single command. */
+int rewriteAppendOnlyFile(char *filename) {
+ rio aof;
+ FILE *fp;
+ char tmpfile[256];
+ char byte;
+
+ /* Note that we have to use a different temp name here compared to the
+ * one used by rewriteAppendOnlyFileBackground() function. */
+ snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
+ fp = fopen(tmpfile,"w");
+ if (!fp) {
+ serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno));
+ return C_ERR;
+ }
+
+ server.aof_child_diff = sdsempty();
+ rioInitWithFile(&aof,fp);
+
+ if (server.aof_rewrite_incremental_fsync)
+ rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES);
+
+ if (server.aof_use_rdb_preamble) {
+ int error;
+ if (rdbSaveRio(&aof,&error,RDB_SAVE_AOF_PREAMBLE,NULL) == C_ERR) {
+ errno = error;
+ goto werr;
+ }
+ } else {
+ if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr;
}
/* Do an initial slow fsync here while the parent is still sending
@@ -1117,13 +1365,13 @@ int rewriteAppendOnlyFile(char *filename) {
* the child will eventually get terminated. */
if (syncRead(server.aof_pipe_read_ack_from_parent,&byte,1,5000) != 1 ||
byte != '!') goto werr;
- redisLog(REDIS_NOTICE,"Parent agreed to stop sending diffs. Finalizing AOF...");
+ serverLog(LL_NOTICE,"Parent agreed to stop sending diffs. Finalizing AOF...");
/* Read the final diff if any. */
aofReadDiffFromParent();
/* Write the received diff to the file. */
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Concatenating %.2f MB of AOF diff received from parent.",
(double) sdslen(server.aof_child_diff) / (1024*1024));
if (rioWrite(&aof,server.aof_child_diff,sdslen(server.aof_child_diff)) == 0)
@@ -1137,19 +1385,18 @@ int rewriteAppendOnlyFile(char *filename) {
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
unlink(tmpfile);
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
- return REDIS_OK;
+ serverLog(LL_NOTICE,"SYNC append only file rewrite performed");
+ return C_OK;
werr:
+ serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
- if (di) dictReleaseIterator(di);
- return REDIS_ERR;
+ return C_ERR;
}
/* ----------------------------------------------------------------------------
@@ -1161,19 +1408,19 @@ werr:
* parent sends a '!' as well to acknowledge. */
void aofChildPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
char byte;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
if (read(fd,&byte,1) == 1 && byte == '!') {
- redisLog(REDIS_NOTICE,"AOF rewrite child asks to stop sending diffs.");
+ serverLog(LL_NOTICE,"AOF rewrite child asks to stop sending diffs.");
server.aof_stop_sending_diff = 1;
if (write(server.aof_pipe_write_ack_to_child,"!",1) != 1) {
/* If we can't send the ack, inform the user, but don't try again
* since in the other side the children will use a timeout if the
* kernel can't buffer our write, or, the children was
* terminated. */
- redisLog(REDIS_WARNING,"Can't send ACK to AOF child: %s",
+ serverLog(LL_WARNING,"Can't send ACK to AOF child: %s",
strerror(errno));
}
}
@@ -1193,7 +1440,7 @@ int aofCreatePipes(void) {
if (pipe(fds) == -1) goto error; /* parent -> children data. */
if (pipe(fds+2) == -1) goto error; /* children -> parent ack. */
- if (pipe(fds+4) == -1) goto error; /* children -> parent ack. */
+ if (pipe(fds+4) == -1) goto error; /* parent -> children ack. */
/* Parent -> children data is non blocking. */
if (anetNonBlock(NULL,fds[0]) != ANET_OK) goto error;
if (anetNonBlock(NULL,fds[1]) != ANET_OK) goto error;
@@ -1206,13 +1453,13 @@ int aofCreatePipes(void) {
server.aof_pipe_write_ack_to_child = fds[5];
server.aof_pipe_read_ack_from_parent = fds[4];
server.aof_stop_sending_diff = 0;
- return REDIS_OK;
+ return C_OK;
error:
- redisLog(REDIS_WARNING,"Error opening /setting AOF rewrite IPC pipes: %s",
+ serverLog(LL_WARNING,"Error opening /setting AOF rewrite IPC pipes: %s",
strerror(errno));
for (j = 0; j < 6; j++) if(fds[j] != -1) close(fds[j]);
- return REDIS_ERR;
+ return C_ERR;
}
void aofClosePipes(void) {
@@ -1246,8 +1493,9 @@ int rewriteAppendOnlyFileBackground(void) {
pid_t childpid;
long long start;
- if (server.aof_child_pid != -1) return REDIS_ERR;
- if (aofCreatePipes() != REDIS_OK) return REDIS_ERR;
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
+ if (aofCreatePipes() != C_OK) return C_ERR;
+ openChildInfoPipe();
start = ustime();
if ((childpid = fork()) == 0) {
char tmpfile[256];
@@ -1256,14 +1504,17 @@ int rewriteAppendOnlyFileBackground(void) {
closeListeningSockets(0);
redisSetProcTitle("redis-aof-rewrite");
snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
- if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
- size_t private_dirty = zmalloc_get_private_dirty();
+ if (rewriteAppendOnlyFile(tmpfile) == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"AOF rewrite: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
+
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_AOF);
exitFromChild(0);
} else {
exitFromChild(1);
@@ -1274,12 +1525,14 @@ int rewriteAppendOnlyFileBackground(void) {
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
- redisLog(REDIS_WARNING,
+ closeChildInfoPipe();
+ serverLog(LL_WARNING,
"Can't rewrite append only file in background: fork: %s",
strerror(errno));
- return REDIS_ERR;
+ aofClosePipes();
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background append only file rewriting started by pid %d",childpid);
server.aof_rewrite_scheduled = 0;
server.aof_rewrite_time_start = time(NULL);
@@ -1291,18 +1544,18 @@ int rewriteAppendOnlyFileBackground(void) {
* with a SELECT statement and it will be safe to merge. */
server.aof_selected_db = -1;
replicationScriptCacheFlush();
- return REDIS_OK;
+ return C_OK;
}
- return REDIS_OK; /* unreached */
+ return C_OK; /* unreached */
}
-void bgrewriteaofCommand(redisClient *c) {
+void bgrewriteaofCommand(client *c) {
if (server.aof_child_pid != -1) {
addReplyError(c,"Background append only file rewriting already in progress");
} else if (server.rdb_child_pid != -1) {
server.aof_rewrite_scheduled = 1;
addReplyStatus(c,"Background append only file rewriting scheduled");
- } else if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
+ } else if (rewriteAppendOnlyFileBackground() == C_OK) {
addReplyStatus(c,"Background append only file rewriting started");
} else {
addReply(c,shared.err);
@@ -1326,7 +1579,7 @@ void aofUpdateCurrentSize(void) {
latencyStartMonitor(latency);
if (redis_fstat(server.aof_fd,&sb) == -1) {
- redisLog(REDIS_WARNING,"Unable to obtain the AOF file length. stat: %s",
+ serverLog(LL_WARNING,"Unable to obtain the AOF file length. stat: %s",
strerror(errno));
} else {
server.aof_current_size = sb.st_size;
@@ -1344,7 +1597,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
long long now = ustime();
mstime_t latency;
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background AOF rewrite terminated with success");
/* Flush the differences accumulated by the parent to the
@@ -1354,13 +1607,13 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
(int)server.aof_child_pid);
newfd = open(tmpfile,O_WRONLY|O_APPEND);
if (newfd == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unable to open the temporary AOF produced by the child: %s", strerror(errno));
goto cleanup;
}
if (aofRewriteBufferWrite(newfd) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Error trying to flush the parent diff to the rewritten AOF: %s", strerror(errno));
close(newfd);
goto cleanup;
@@ -1368,7 +1621,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency);
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024));
/* The only remaining thing to do is to rename the temporary file to
@@ -1401,10 +1654,10 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
if (server.aof_fd == -1) {
/* AOF disabled */
- /* Don't care if this fails: oldfd will be -1 and we handle that.
- * One notable case of -1 return is if the old file does
- * not exist. */
- oldfd = open(server.aof_filename,O_RDONLY|O_NONBLOCK);
+ /* Don't care if this fails: oldfd will be -1 and we handle that.
+ * One notable case of -1 return is if the old file does
+ * not exist. */
+ oldfd = open(server.aof_filename,O_RDONLY|O_NONBLOCK);
} else {
/* AOF enabled */
oldfd = -1; /* We'll set this to the current AOF filedes later. */
@@ -1414,8 +1667,11 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
* it exists, because we reference it with "oldfd". */
latencyStartMonitor(latency);
if (rename(tmpfile,server.aof_filename) == -1) {
- redisLog(REDIS_WARNING,
- "Error trying to rename the temporary AOF file: %s", strerror(errno));
+ serverLog(LL_WARNING,
+ "Error trying to rename the temporary AOF file %s into %s: %s",
+ tmpfile,
+ server.aof_filename,
+ strerror(errno));
close(newfd);
if (oldfd != -1) close(oldfd);
goto cleanup;
@@ -1432,7 +1688,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
oldfd = server.aof_fd;
server.aof_fd = newfd;
if (server.aof_fsync == AOF_FSYNC_ALWAYS)
- aof_fsync(newfd);
+ redis_fsync(newfd);
else if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
aof_background_fsync(newfd);
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
@@ -1445,27 +1701,29 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
server.aof_buf = sdsempty();
}
- server.aof_lastbgrewrite_status = REDIS_OK;
+ server.aof_lastbgrewrite_status = C_OK;
- redisLog(REDIS_NOTICE, "Background AOF rewrite finished successfully");
+ serverLog(LL_NOTICE, "Background AOF rewrite finished successfully");
/* Change state from WAIT_REWRITE to ON if needed */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE)
- server.aof_state = REDIS_AOF_ON;
+ if (server.aof_state == AOF_WAIT_REWRITE)
+ server.aof_state = AOF_ON;
/* Asynchronously close the overwritten AOF. */
- if (oldfd != -1) bioCreateBackgroundJob(REDIS_BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL);
+ if (oldfd != -1) bioCreateBackgroundJob(BIO_CLOSE_FILE,(void*)(long)oldfd,NULL,NULL);
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Background AOF rewrite signal handler took %lldus", ustime()-now);
} else if (!bysignal && exitcode != 0) {
- server.aof_lastbgrewrite_status = REDIS_ERR;
-
- redisLog(REDIS_WARNING,
+ /* SIGUSR1 is whitelisted, so we have a way to kill a child without
+ * tirggering an error condition. */
+ if (bysignal != SIGUSR1)
+ server.aof_lastbgrewrite_status = C_ERR;
+ serverLog(LL_WARNING,
"Background AOF rewrite terminated with error");
} else {
- server.aof_lastbgrewrite_status = REDIS_ERR;
+ server.aof_lastbgrewrite_status = C_ERR;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Background AOF rewrite terminated by signal %d", bysignal);
}
@@ -1477,6 +1735,6 @@ cleanup:
server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start;
server.aof_rewrite_time_start = -1;
/* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE)
+ if (server.aof_state == AOF_WAIT_REWRITE)
server.aof_rewrite_scheduled = 1;
}
diff --git a/src/atomicvar.h b/src/atomicvar.h
new file mode 100644
index 000000000..173b045fc
--- /dev/null
+++ b/src/atomicvar.h
@@ -0,0 +1,133 @@
+/* This file implements atomic counters using __atomic or __sync macros if
+ * available, otherwise synchronizing different threads using a mutex.
+ *
+ * The exported interaface is composed of three macros:
+ *
+ * atomicIncr(var,count) -- Increment the atomic counter
+ * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter
+ * atomicDecr(var,count) -- Decrement the atomic counter
+ * atomicGet(var,dstvar) -- Fetch the atomic counter value
+ * atomicSet(var,value) -- Set the atomic counter value
+ *
+ * The variable 'var' should also have a declared mutex with the same
+ * name and the "_mutex" postfix, for instance:
+ *
+ * long myvar;
+ * pthread_mutex_t myvar_mutex;
+ * atomicSet(myvar,12345);
+ *
+ * If atomic primitives are available (tested in config.h) the mutex
+ * is not used.
+ *
+ * Never use return value from the macros, instead use the AtomicGetIncr()
+ * if you need to get the current value and increment it atomically, like
+ * in the followign example:
+ *
+ * long oldvalue;
+ * atomicGetIncr(myvar,oldvalue,1);
+ * doSomethingWith(oldvalue);
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <pthread.h>
+
+#ifndef __ATOMIC_VAR_H
+#define __ATOMIC_VAR_H
+
+/* To test Redis with Helgrind (a Valgrind tool) it is useful to define
+ * the following macro, so that __sync macros are used: those can be detected
+ * by Helgrind (even if they are less efficient) so that no false positive
+ * is reported. */
+// #define __ATOMIC_VAR_FORCE_SYNC_MACROS
+
+#if !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && defined(__ATOMIC_RELAXED) && !defined(__sun) && (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057)
+/* Implementation using __atomic macros. */
+
+#define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \
+} while(0)
+#define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED)
+#define atomicGet(var,dstvar) do { \
+ dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \
+} while(0)
+#define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED)
+#define REDIS_ATOMIC_API "atomic-builtin"
+
+#elif defined(HAVE_ATOMIC)
+/* Implementation using __sync macros. */
+
+#define atomicIncr(var,count) __sync_add_and_fetch(&var,(count))
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ oldvalue_var = __sync_fetch_and_add(&var,(count)); \
+} while(0)
+#define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count))
+#define atomicGet(var,dstvar) do { \
+ dstvar = __sync_sub_and_fetch(&var,0); \
+} while(0)
+#define atomicSet(var,value) do { \
+ while(!__sync_bool_compare_and_swap(&var,var,value)); \
+} while(0)
+#define REDIS_ATOMIC_API "sync-builtin"
+
+#else
+/* Implementation using pthread mutex. */
+
+#define atomicIncr(var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var += (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicGetIncr(var,oldvalue_var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ oldvalue_var = var; \
+ var += (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicDecr(var,count) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var -= (count); \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicGet(var,dstvar) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ dstvar = var; \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define atomicSet(var,value) do { \
+ pthread_mutex_lock(&var ## _mutex); \
+ var = value; \
+ pthread_mutex_unlock(&var ## _mutex); \
+} while(0)
+#define REDIS_ATOMIC_API "pthread-mutex"
+
+#endif
+#endif /* __ATOMIC_VAR_H */
diff --git a/src/bio.c b/src/bio.c
index 4bd5a17c6..0c92d053b 100644
--- a/src/bio.c
+++ b/src/bio.c
@@ -58,20 +58,21 @@
*/
-#include "redis.h"
+#include "server.h"
#include "bio.h"
-static pthread_t bio_threads[REDIS_BIO_NUM_OPS];
-static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS];
-static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS];
-static list *bio_jobs[REDIS_BIO_NUM_OPS];
+static pthread_t bio_threads[BIO_NUM_OPS];
+static pthread_mutex_t bio_mutex[BIO_NUM_OPS];
+static pthread_cond_t bio_newjob_cond[BIO_NUM_OPS];
+static pthread_cond_t bio_step_cond[BIO_NUM_OPS];
+static list *bio_jobs[BIO_NUM_OPS];
/* The following array is used to hold the number of pending jobs for every
* OP type. This allows us to export the bioPendingJobsOfType() API that is
* useful when the main thread wants to perform some operation that may involve
* objects shared with the background thread. The main thread will just wait
* that there are no longer jobs of this type to be executed before performing
* the sensible operation. This data is also useful for reporting. */
-static unsigned long long bio_pending[REDIS_BIO_NUM_OPS];
+static unsigned long long bio_pending[BIO_NUM_OPS];
/* This structure represents a background Job. It is only used locally to this
* file as the API does not expose the internals at all. */
@@ -83,6 +84,9 @@ struct bio_job {
};
void *bioProcessBackgroundJobs(void *arg);
+void lazyfreeFreeObjectFromBioThread(robj *o);
+void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2);
+void lazyfreeFreeSlotsMapFromBioThread(zskiplist *sl);
/* Make sure we have enough stack to perform all the things we do in the
* main thread. */
@@ -96,9 +100,10 @@ void bioInit(void) {
int j;
/* Initialization of state vars and objects */
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
pthread_mutex_init(&bio_mutex[j],NULL);
- pthread_cond_init(&bio_condvar[j],NULL);
+ pthread_cond_init(&bio_newjob_cond[j],NULL);
+ pthread_cond_init(&bio_step_cond[j],NULL);
bio_jobs[j] = listCreate();
bio_pending[j] = 0;
}
@@ -113,10 +118,10 @@ void bioInit(void) {
/* Ready to spawn our threads. We use the single argument the thread
* function accepts in order to pass the job ID the thread is
* responsible of. */
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
void *arg = (void*)(unsigned long) j;
if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
- redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs.");
+ serverLog(LL_WARNING,"Fatal: Can't initialize Background Jobs.");
exit(1);
}
bio_threads[j] = thread;
@@ -133,7 +138,7 @@ void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
pthread_mutex_lock(&bio_mutex[type]);
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
- pthread_cond_signal(&bio_condvar[type]);
+ pthread_cond_signal(&bio_newjob_cond[type]);
pthread_mutex_unlock(&bio_mutex[type]);
}
@@ -142,6 +147,13 @@ void *bioProcessBackgroundJobs(void *arg) {
unsigned long type = (unsigned long) arg;
sigset_t sigset;
+ /* Check that the type is within the right interval. */
+ if (type >= BIO_NUM_OPS) {
+ serverLog(LL_WARNING,
+ "Warning: bio thread started with wrong type %lu",type);
+ return NULL;
+ }
+
/* Make the thread killable at any time, so that bioKillThreads()
* can work reliably. */
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
@@ -153,7 +165,7 @@ void *bioProcessBackgroundJobs(void *arg) {
sigemptyset(&sigset);
sigaddset(&sigset, SIGALRM);
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
while(1) {
@@ -161,7 +173,7 @@ void *bioProcessBackgroundJobs(void *arg) {
/* The loop always starts with the lock hold. */
if (listLength(bio_jobs[type]) == 0) {
- pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]);
+ pthread_cond_wait(&bio_newjob_cond[type],&bio_mutex[type]);
continue;
}
/* Pop the job from the queue. */
@@ -172,15 +184,29 @@ void *bioProcessBackgroundJobs(void *arg) {
pthread_mutex_unlock(&bio_mutex[type]);
/* Process the job accordingly to its type. */
- if (type == REDIS_BIO_CLOSE_FILE) {
+ if (type == BIO_CLOSE_FILE) {
close((long)job->arg1);
- } else if (type == REDIS_BIO_AOF_FSYNC) {
- aof_fsync((long)job->arg1);
+ } else if (type == BIO_AOF_FSYNC) {
+ redis_fsync((long)job->arg1);
+ } else if (type == BIO_LAZY_FREE) {
+ /* What we free changes depending on what arguments are set:
+ * arg1 -> free the object at pointer.
+ * arg2 & arg3 -> free two dictionaries (a Redis DB).
+ * only arg3 -> free the skiplist. */
+ if (job->arg1)
+ lazyfreeFreeObjectFromBioThread(job->arg1);
+ else if (job->arg2 && job->arg3)
+ lazyfreeFreeDatabaseFromBioThread(job->arg2,job->arg3);
+ else if (job->arg3)
+ lazyfreeFreeSlotsMapFromBioThread(job->arg3);
} else {
- redisPanic("Wrong job type in bioProcessBackgroundJobs().");
+ serverPanic("Wrong job type in bioProcessBackgroundJobs().");
}
zfree(job);
+ /* Unblock threads blocked on bioWaitStepOfType() if any. */
+ pthread_cond_broadcast(&bio_step_cond[type]);
+
/* Lock again before reiterating the loop, if there are no longer
* jobs to process we'll block again in pthread_cond_wait(). */
pthread_mutex_lock(&bio_mutex[type]);
@@ -198,6 +224,28 @@ unsigned long long bioPendingJobsOfType(int type) {
return val;
}
+/* If there are pending jobs for the specified type, the function blocks
+ * and waits that the next job was processed. Otherwise the function
+ * does not block and returns ASAP.
+ *
+ * The function returns the number of jobs still to process of the
+ * requested type.
+ *
+ * This function is useful when from another thread, we want to wait
+ * a bio.c thread to do more work in a blocking way.
+ */
+unsigned long long bioWaitStepOfType(int type) {
+ unsigned long long val;
+ pthread_mutex_lock(&bio_mutex[type]);
+ val = bio_pending[type];
+ if (val != 0) {
+ pthread_cond_wait(&bio_step_cond[type],&bio_mutex[type]);
+ val = bio_pending[type];
+ }
+ pthread_mutex_unlock(&bio_mutex[type]);
+ return val;
+}
+
/* Kill the running bio threads in an unclean way. This function should be
* used only when it's critical to stop the threads for some reason.
* Currently Redis does this only on crash (for instance on SIGSEGV) in order
@@ -205,14 +253,14 @@ unsigned long long bioPendingJobsOfType(int type) {
void bioKillThreads(void) {
int err, j;
- for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
+ for (j = 0; j < BIO_NUM_OPS; j++) {
if (pthread_cancel(bio_threads[j]) == 0) {
if ((err = pthread_join(bio_threads[j],NULL)) != 0) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bio thread for job type #%d can be joined: %s",
j, strerror(err));
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bio thread for job type #%d terminated",j);
}
}
diff --git a/src/bio.h b/src/bio.h
index 85f03ad1a..4b15d1c4d 100644
--- a/src/bio.h
+++ b/src/bio.h
@@ -31,11 +31,12 @@
void bioInit(void);
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3);
unsigned long long bioPendingJobsOfType(int type);
-void bioWaitPendingJobsLE(int type, unsigned long long num);
+unsigned long long bioWaitStepOfType(int type);
time_t bioOlderJobOfType(int type);
void bioKillThreads(void);
/* Background job opcodes */
-#define REDIS_BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
-#define REDIS_BIO_AOF_FSYNC 1 /* Deferred AOF fsync. */
-#define REDIS_BIO_NUM_OPS 2
+#define BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
+#define BIO_AOF_FSYNC 1 /* Deferred AOF fsync. */
+#define BIO_LAZY_FREE 2 /* Deferred objects freeing. */
+#define BIO_NUM_OPS 3
diff --git a/src/bitops.c b/src/bitops.c
index 4c8662244..23f2266a7 100644
--- a/src/bitops.c
+++ b/src/bitops.c
@@ -28,33 +28,12 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* -----------------------------------------------------------------------------
* Helpers and low level bit functions.
* -------------------------------------------------------------------------- */
-/* This helper function used by GETBIT / SETBIT parses the bit offset argument
- * making sure an error is returned if it is negative or if it overflows
- * Redis 512 MB limit for the string value. */
-static int getBitOffsetFromArgument(redisClient *c, robj *o, size_t *offset) {
- long long loffset;
- char *err = "bit offset is not an integer or out of range";
-
- if (getLongLongFromObjectOrReply(c,o,&loffset,err) != REDIS_OK)
- return REDIS_ERR;
-
- /* Limit offset to 512MB in bytes */
- if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
- {
- addReplyError(c,err);
- return REDIS_ERR;
- }
-
- *offset = (size_t)loffset;
- return REDIS_OK;
-}
-
/* Count number of bits set in the binary array pointed by 's' and long
* 'count' bytes. The implementation of this function is required to
* work with a input string length up to 512 MB. */
@@ -125,6 +104,7 @@ long redisBitpos(void *s, unsigned long count, int bit) {
unsigned long skipval, word = 0, one;
long pos = 0; /* Position of bit, to return to the caller. */
unsigned long j;
+ int found;
/* Process whole words first, seeking for first word that is not
* all ones or all zeros respectively if we are lookig for zeros
@@ -138,21 +118,27 @@ long redisBitpos(void *s, unsigned long count, int bit) {
/* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */
skipval = bit ? 0 : UCHAR_MAX;
c = (unsigned char*) s;
+ found = 0;
while((unsigned long)c & (sizeof(*l)-1) && count) {
- if (*c != skipval) break;
+ if (*c != skipval) {
+ found = 1;
+ break;
+ }
c++;
count--;
pos += 8;
}
/* Skip bits with full word step. */
- skipval = bit ? 0 : ULONG_MAX;
l = (unsigned long*) c;
- while (count >= sizeof(*l)) {
- if (*l != skipval) break;
- l++;
- count -= sizeof(*l);
- pos += sizeof(*l)*8;
+ if (!found) {
+ skipval = bit ? 0 : ULONG_MAX;
+ while (count >= sizeof(*l)) {
+ if (*l != skipval) break;
+ l++;
+ count -= sizeof(*l);
+ pos += sizeof(*l)*8;
+ }
}
/* Load bytes into "word" considering the first byte as the most significant
@@ -195,10 +181,213 @@ long redisBitpos(void *s, unsigned long count, int bit) {
/* If we reached this point, there is a bug in the algorithm, since
* the case of no match is handled as a special case before. */
- redisPanic("End of redisBitpos() reached.");
+ serverPanic("End of redisBitpos() reached.");
return 0; /* Just to avoid warnings. */
}
+/* The following set.*Bitfield and get.*Bitfield functions implement setting
+ * and getting arbitrary size (up to 64 bits) signed and unsigned integers
+ * at arbitrary positions into a bitmap.
+ *
+ * The representation considers the bitmap as having the bit number 0 to be
+ * the most significant bit of the first byte, and so forth, so for example
+ * setting a 5 bits unsigned integer to value 23 at offset 7 into a bitmap
+ * previously set to all zeroes, will produce the following representation:
+ *
+ * +--------+--------+
+ * |00000001|01110000|
+ * +--------+--------+
+ *
+ * When offsets and integer sizes are aligned to bytes boundaries, this is the
+ * same as big endian, however when such alignment does not exist, its important
+ * to also understand how the bits inside a byte are ordered.
+ *
+ * Note that this format follows the same convention as SETBIT and related
+ * commands.
+ */
+
+void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value) {
+ uint64_t byte, bit, byteval, bitval, j;
+
+ for (j = 0; j < bits; j++) {
+ bitval = (value & ((uint64_t)1<<(bits-1-j))) != 0;
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ byteval &= ~(1 << bit);
+ byteval |= bitval << bit;
+ p[byte] = byteval & 0xff;
+ offset++;
+ }
+}
+
+void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value) {
+ uint64_t uv = value; /* Casting will add UINT64_MAX + 1 if v is negative. */
+ setUnsignedBitfield(p,offset,bits,uv);
+}
+
+uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ uint64_t byte, bit, byteval, bitval, j, value = 0;
+
+ for (j = 0; j < bits; j++) {
+ byte = offset >> 3;
+ bit = 7 - (offset & 0x7);
+ byteval = p[byte];
+ bitval = (byteval >> bit) & 1;
+ value = (value<<1) | bitval;
+ offset++;
+ }
+ return value;
+}
+
+int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) {
+ int64_t value;
+ union {uint64_t u; int64_t i;} conv;
+
+ /* Converting from unsigned to signed is undefined when the value does
+ * not fit, however here we assume two's complement and the original value
+ * was obtained from signed -> unsigned conversion, so we'll find the
+ * most significant bit set if the original value was negative.
+ *
+ * Note that two's complement is mandatory for exact-width types
+ * according to the C99 standard. */
+ conv.u = getUnsignedBitfield(p,offset,bits);
+ value = conv.i;
+
+ /* If the top significant bit is 1, propagate it to all the
+ * higher bits for two's complement representation of signed
+ * integers. */
+ if (value & ((uint64_t)1 << (bits-1)))
+ value |= ((uint64_t)-1) << bits;
+ return value;
+}
+
+/* The following two functions detect overflow of a value in the context
+ * of storing it as an unsigned or signed integer with the specified
+ * number of bits. The functions both take the value and a possible increment.
+ * If no overflow could happen and the value+increment fit inside the limits,
+ * then zero is returned, otherwise in case of overflow, 1 is returned,
+ * otherwise in case of underflow, -1 is returned.
+ *
+ * When non-zero is returned (oferflow or underflow), if not NULL, *limit is
+ * set to the value the operation should result when an overflow happens,
+ * depending on the specified overflow semantics:
+ *
+ * For BFOVERFLOW_SAT if 1 is returned, *limit it is set maximum value that
+ * you can store in that integer. when -1 is returned, *limit is set to the
+ * minimum value that an integer of that size can represent.
+ *
+ * For BFOVERFLOW_WRAP *limit is set by performing the operation in order to
+ * "wrap" around towards zero for unsigned integers, or towards the most
+ * negative number that is possible to represent for signed integers. */
+
+#define BFOVERFLOW_WRAP 0
+#define BFOVERFLOW_SAT 1
+#define BFOVERFLOW_FAIL 2 /* Used by the BITFIELD command implementation. */
+
+int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit) {
+ uint64_t max = (bits == 64) ? UINT64_MAX : (((uint64_t)1<<bits)-1);
+ int64_t maxincr = max-value;
+ int64_t minincr = -value;
+
+ if (value > max || (incr > 0 && incr > maxincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (incr < 0 && incr < minincr) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = 0;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ uint64_t res = value+incr;
+
+ res &= ~mask;
+ *limit = res;
+ }
+ return 1;
+}
+
+int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit) {
+ int64_t max = (bits == 64) ? INT64_MAX : (((int64_t)1<<(bits-1))-1);
+ int64_t min = (-max)-1;
+
+ /* Note that maxincr and minincr could overflow, but we use the values
+ * only after checking 'value' range, so when we use it no overflow
+ * happens. */
+ int64_t maxincr = max-value;
+ int64_t minincr = min-value;
+
+ if (value > max || (bits != 64 && incr > maxincr) || (value >= 0 && incr > 0 && incr > maxincr))
+ {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = max;
+ }
+ }
+ return 1;
+ } else if (value < min || (bits != 64 && incr < minincr) || (value < 0 && incr < 0 && incr < minincr)) {
+ if (limit) {
+ if (owtype == BFOVERFLOW_WRAP) {
+ goto handle_wrap;
+ } else if (owtype == BFOVERFLOW_SAT) {
+ *limit = min;
+ }
+ }
+ return -1;
+ }
+ return 0;
+
+handle_wrap:
+ {
+ uint64_t mask = ((uint64_t)-1) << bits;
+ uint64_t msb = (uint64_t)1 << (bits-1);
+ uint64_t a = value, b = incr, c;
+ c = a+b; /* Perform addition as unsigned so that's defined. */
+
+ /* If the sign bit is set, propagate to all the higher order
+ * bits, to cap the negative value. If it's clear, mask to
+ * the positive integer limit. */
+ if (c & msb) {
+ c |= mask;
+ } else {
+ c &= ~mask;
+ }
+ *limit = c;
+ }
+ return 1;
+}
+
+/* Debugging function. Just show bits in the specified bitmap. Not used
+ * but here for not having to rewrite it when debugging is needed. */
+void printBits(unsigned char *p, unsigned long count) {
+ unsigned long j, i, byte;
+
+ for (j = 0; j < count; j++) {
+ byte = p[j];
+ for (i = 0x80; i > 0; i /= 2)
+ printf("%c", (byte & i) ? '1' : '0');
+ printf("|");
+ }
+ printf("\n");
+}
+
/* -----------------------------------------------------------------------------
* Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP.
* -------------------------------------------------------------------------- */
@@ -208,19 +397,143 @@ long redisBitpos(void *s, unsigned long count, int bit) {
#define BITOP_XOR 2
#define BITOP_NOT 3
+#define BITFIELDOP_GET 0
+#define BITFIELDOP_SET 1
+#define BITFIELDOP_INCRBY 2
+
+/* This helper function used by GETBIT / SETBIT parses the bit offset argument
+ * making sure an error is returned if it is negative or if it overflows
+ * Redis 512 MB limit for the string value.
+ *
+ * If the 'hash' argument is true, and 'bits is positive, then the command
+ * will also parse bit offsets prefixed by "#". In such a case the offset
+ * is multiplied by 'bits'. This is useful for the BITFIELD command. */
+int getBitOffsetFromArgument(client *c, robj *o, size_t *offset, int hash, int bits) {
+ long long loffset;
+ char *err = "bit offset is not an integer or out of range";
+ char *p = o->ptr;
+ size_t plen = sdslen(p);
+ int usehash = 0;
+
+ /* Handle #<offset> form. */
+ if (p[0] == '#' && hash && bits > 0) usehash = 1;
+
+ if (string2ll(p+usehash,plen-usehash,&loffset) == 0) {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ /* Adjust the offset by 'bits' for #<offset> form. */
+ if (usehash) loffset *= bits;
+
+ /* Limit offset to 512MB in bytes */
+ if ((loffset < 0) || ((unsigned long long)loffset >> 3) >= (512*1024*1024))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ *offset = (size_t)loffset;
+ return C_OK;
+}
+
+/* This helper function for BITFIELD parses a bitfield type in the form
+ * <sign><bits> where sign is 'u' or 'i' for unsigned and signed, and
+ * the bits is a value between 1 and 64. However 64 bits unsigned integers
+ * are reported as an error because of current limitations of Redis protocol
+ * to return unsigned integer values greater than INT64_MAX.
+ *
+ * On error C_ERR is returned and an error is sent to the client. */
+int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
+ char *p = o->ptr;
+ char *err = "Invalid bitfield type. Use something like i16 u8. Note that u64 is not supported but i64 is.";
+ long long llbits;
+
+ if (p[0] == 'i') {
+ *sign = 1;
+ } else if (p[0] == 'u') {
+ *sign = 0;
+ } else {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+
+ if ((string2ll(p+1,strlen(p+1),&llbits)) == 0 ||
+ llbits < 1 ||
+ (*sign == 1 && llbits > 64) ||
+ (*sign == 0 && llbits > 63))
+ {
+ addReplyError(c,err);
+ return C_ERR;
+ }
+ *bits = llbits;
+ return C_OK;
+}
+
+/* This is an helper function for commands implementations that need to write
+ * bits to a string object. The command creates or pad with zeroes the string
+ * so that the 'maxbit' bit can be addressed. The object is finally
+ * returned. Otherwise if the key holds a wrong type NULL is returned and
+ * an error is sent to the client. */
+robj *lookupStringForBitCommand(client *c, size_t maxbit) {
+ size_t byte = maxbit >> 3;
+ robj *o = lookupKeyWrite(c->db,c->argv[1]);
+
+ if (o == NULL) {
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
+ dbAdd(c->db,c->argv[1],o);
+ } else {
+ if (checkType(c,o,OBJ_STRING)) return NULL;
+ o = dbUnshareStringValue(c->db,c->argv[1],o);
+ o->ptr = sdsgrowzero(o->ptr,byte+1);
+ }
+ return o;
+}
+
+/* Return a pointer to the string object content, and stores its length
+ * in 'len'. The user is required to pass (likely stack allocated) buffer
+ * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case
+ * the object is integer encoded in order to provide the representation
+ * without usign heap allocation.
+ *
+ * The function returns the pointer to the object array of bytes representing
+ * the string it contains, that may be a pointer to 'llbuf' or to the
+ * internal object representation. As a side effect 'len' is filled with
+ * the length of such buffer.
+ *
+ * If the source object is NULL the function is guaranteed to return NULL
+ * and set 'len' to 0. */
+unsigned char *getObjectReadOnlyString(robj *o, long *len, char *llbuf) {
+ serverAssert(o->type == OBJ_STRING);
+ unsigned char *p = NULL;
+
+ /* Set the 'p' pointer to the string, that can be just a stack allocated
+ * array if our string was integer encoded. */
+ if (o && o->encoding == OBJ_ENCODING_INT) {
+ p = (unsigned char*) llbuf;
+ if (len) *len = ll2string(llbuf,LONG_STR_SIZE,(long)o->ptr);
+ } else if (o) {
+ p = (unsigned char*) o->ptr;
+ if (len) *len = sdslen(o->ptr);
+ } else {
+ if (len) *len = 0;
+ }
+ return p;
+}
+
/* SETBIT key offset bitvalue */
-void setbitCommand(redisClient *c) {
+void setbitCommand(client *c) {
robj *o;
char *err = "bit is not an integer or out of range";
size_t bitoffset;
- int byte, bit;
+ ssize_t byte, bit;
int byteval, bitval;
long on;
- if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
return;
- if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != C_OK)
return;
/* Bits can only be set or cleared... */
@@ -229,20 +542,10 @@ void setbitCommand(redisClient *c) {
return;
}
- o = lookupKeyWrite(c->db,c->argv[1]);
- if (o == NULL) {
- o = createObject(REDIS_STRING,sdsempty());
- dbAdd(c->db,c->argv[1],o);
- } else {
- if (checkType(c,o,REDIS_STRING)) return;
- o = dbUnshareStringValue(c->db,c->argv[1],o);
- }
-
- /* Grow sds value to the right length if necessary */
- byte = bitoffset >> 3;
- o->ptr = sdsgrowzero(o->ptr,byte+1);
+ if ((o = lookupStringForBitCommand(c,bitoffset)) == NULL) return;
/* Get current values */
+ byte = bitoffset >> 3;
byteval = ((uint8_t*)o->ptr)[byte];
bit = 7 - (bitoffset & 0x7);
bitval = byteval & (1 << bit);
@@ -252,24 +555,24 @@ void setbitCommand(redisClient *c) {
byteval |= ((on & 0x1) << bit);
((uint8_t*)o->ptr)[byte] = byteval;
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty++;
addReply(c, bitval ? shared.cone : shared.czero);
}
/* GETBIT key offset */
-void getbitCommand(redisClient *c) {
+void getbitCommand(client *c) {
robj *o;
char llbuf[32];
size_t bitoffset;
size_t byte, bit;
size_t bitval = 0;
- if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset) != REDIS_OK)
+ if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
byte = bitoffset >> 3;
bit = 7 - (bitoffset & 0x7);
@@ -285,7 +588,7 @@ void getbitCommand(redisClient *c) {
}
/* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */
-void bitopCommand(redisClient *c) {
+void bitopCommand(client *c) {
char *opname = c->argv[1]->ptr;
robj *o, *targetkey = c->argv[2];
unsigned long op, j, numkeys;
@@ -332,7 +635,7 @@ void bitopCommand(redisClient *c) {
continue;
}
/* Return an error if one of the keys is not a string. */
- if (checkType(c,o,REDIS_STRING)) {
+ if (checkType(c,o,OBJ_STRING)) {
unsigned long i;
for (i = 0; i < j; i++) {
if (objects[i])
@@ -358,8 +661,11 @@ void bitopCommand(redisClient *c) {
/* Fast path: as far as we have data for all the input bitmaps we
* can take a fast path that performs much better than the
- * vanilla algorithm. */
+ * vanilla algorithm. On ARM we skip the fast path since it will
+ * result in GCC compiling the code using multiple-words load/store
+ * operations that are not supported even in ARM >= v6. */
j = 0;
+ #ifndef USE_ALIGNED_ACCESS
if (minlen >= sizeof(unsigned long)*4 && numkeys <= 16) {
unsigned long *lp[16];
unsigned long *lres = (unsigned long*) res;
@@ -420,6 +726,7 @@ void bitopCommand(redisClient *c) {
}
}
}
+ #endif
/* j is set to the next byte to process by the previous loop. */
for (; j < maxlen; j++) {
@@ -446,46 +753,41 @@ void bitopCommand(redisClient *c) {
/* Store the computed value into the target key */
if (maxlen) {
- o = createObject(REDIS_STRING,res);
+ o = createObject(OBJ_STRING,res);
setKey(c->db,targetkey,o);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",targetkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id);
decrRefCount(o);
} else if (dbDelete(c->db,targetkey)) {
signalModifiedKey(c->db,targetkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",targetkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id);
}
server.dirty++;
addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */
}
/* BITCOUNT key [start end] */
-void bitcountCommand(redisClient *c) {
+void bitcountCommand(client *c) {
robj *o;
long start, end, strlen;
unsigned char *p;
- char llbuf[32];
+ char llbuf[LONG_STR_SIZE];
/* Lookup, check for type, and return 0 for non existing keys. */
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
-
- /* Set the 'p' pointer to the string, that can be just a stack allocated
- * array if our string was integer encoded. */
- if (o->encoding == REDIS_ENCODING_INT) {
- p = (unsigned char*) llbuf;
- strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
- } else {
- p = (unsigned char*) o->ptr;
- strlen = sdslen(o->ptr);
- }
+ checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4) {
- if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
- if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
/* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.czero);
+ return;
+ }
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
@@ -513,16 +815,16 @@ void bitcountCommand(redisClient *c) {
}
/* BITPOS key bit [start [end]] */
-void bitposCommand(redisClient *c) {
+void bitposCommand(client *c) {
robj *o;
long bit, start, end, strlen;
unsigned char *p;
- char llbuf[32];
+ char llbuf[LONG_STR_SIZE];
int end_given = 0;
/* Parse the bit argument to understand what we are looking for, set
* or clear bits. */
- if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != C_OK)
return;
if (bit != 0 && bit != 1) {
addReplyError(c, "The bit argument must be 1 or 0.");
@@ -536,24 +838,15 @@ void bitposCommand(redisClient *c) {
addReplyLongLong(c, bit ? -1 : 0);
return;
}
- if (checkType(c,o,REDIS_STRING)) return;
-
- /* Set the 'p' pointer to the string, that can be just a stack allocated
- * array if our string was integer encoded. */
- if (o->encoding == REDIS_ENCODING_INT) {
- p = (unsigned char*) llbuf;
- strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
- } else {
- p = (unsigned char*) o->ptr;
- strlen = sdslen(o->ptr);
- }
+ if (checkType(c,o,OBJ_STRING)) return;
+ p = getObjectReadOnlyString(o,&strlen,llbuf);
/* Parse start/end range if any. */
if (c->argc == 4 || c->argc == 5) {
- if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK)
return;
if (c->argc == 5) {
- if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK)
return;
end_given = 1;
} else {
@@ -598,3 +891,235 @@ void bitposCommand(redisClient *c) {
addReplyLongLong(c,pos);
}
}
+
+/* BITFIELD key subcommmand-1 arg ... subcommand-2 arg ... subcommand-N ...
+ *
+ * Supported subcommands:
+ *
+ * GET <type> <offset>
+ * SET <type> <offset> <value>
+ * INCRBY <type> <offset> <increment>
+ * OVERFLOW [WRAP|SAT|FAIL]
+ */
+
+struct bitfieldOp {
+ uint64_t offset; /* Bitfield offset. */
+ int64_t i64; /* Increment amount (INCRBY) or SET value */
+ int opcode; /* Operation id. */
+ int owtype; /* Overflow type to use. */
+ int bits; /* Integer bitfield bits width. */
+ int sign; /* True if signed, otherwise unsigned op. */
+};
+
+void bitfieldCommand(client *c) {
+ robj *o;
+ size_t bitoffset;
+ int j, numops = 0, changes = 0;
+ struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
+ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
+ int readonly = 1;
+ size_t highest_write_offset = 0;
+
+ for (j = 2; j < c->argc; j++) {
+ int remargs = c->argc-j-1; /* Remaining args other than current. */
+ char *subcmd = c->argv[j]->ptr; /* Current command name. */
+ int opcode; /* Current operation code. */
+ long long i64 = 0; /* Signed SET value. */
+ int sign = 0; /* Signed or unsigned type? */
+ int bits = 0; /* Bitfield width in bits. */
+
+ if (!strcasecmp(subcmd,"get") && remargs >= 2)
+ opcode = BITFIELDOP_GET;
+ else if (!strcasecmp(subcmd,"set") && remargs >= 3)
+ opcode = BITFIELDOP_SET;
+ else if (!strcasecmp(subcmd,"incrby") && remargs >= 3)
+ opcode = BITFIELDOP_INCRBY;
+ else if (!strcasecmp(subcmd,"overflow") && remargs >= 1) {
+ char *owtypename = c->argv[j+1]->ptr;
+ j++;
+ if (!strcasecmp(owtypename,"wrap"))
+ owtype = BFOVERFLOW_WRAP;
+ else if (!strcasecmp(owtypename,"sat"))
+ owtype = BFOVERFLOW_SAT;
+ else if (!strcasecmp(owtypename,"fail"))
+ owtype = BFOVERFLOW_FAIL;
+ else {
+ addReplyError(c,"Invalid OVERFLOW type specified");
+ zfree(ops);
+ return;
+ }
+ continue;
+ } else {
+ addReply(c,shared.syntaxerr);
+ zfree(ops);
+ return;
+ }
+
+ /* Get the type and offset arguments, common to all the ops. */
+ if (getBitfieldTypeFromArgument(c,c->argv[j+1],&sign,&bits) != C_OK) {
+ zfree(ops);
+ return;
+ }
+
+ if (getBitOffsetFromArgument(c,c->argv[j+2],&bitoffset,1,bits) != C_OK){
+ zfree(ops);
+ return;
+ }
+
+ if (opcode != BITFIELDOP_GET) {
+ readonly = 0;
+ if (highest_write_offset < bitoffset + bits - 1)
+ highest_write_offset = bitoffset + bits - 1;
+ /* INCRBY and SET require another argument. */
+ if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){
+ zfree(ops);
+ return;
+ }
+ }
+
+ /* Populate the array of operations we'll process. */
+ ops = zrealloc(ops,sizeof(*ops)*(numops+1));
+ ops[numops].offset = bitoffset;
+ ops[numops].i64 = i64;
+ ops[numops].opcode = opcode;
+ ops[numops].owtype = owtype;
+ ops[numops].bits = bits;
+ ops[numops].sign = sign;
+ numops++;
+
+ j += 3 - (opcode == BITFIELDOP_GET);
+ }
+
+ if (readonly) {
+ /* Lookup for read is ok if key doesn't exit, but errors
+ * if it's not a string. */
+ o = lookupKeyRead(c->db,c->argv[1]);
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ } else {
+ /* Lookup by making room up to the farest bit reached by
+ * this operation. */
+ if ((o = lookupStringForBitCommand(c,
+ highest_write_offset)) == NULL) return;
+ }
+
+ addReplyMultiBulkLen(c,numops);
+
+ /* Actually process the operations. */
+ for (j = 0; j < numops; j++) {
+ struct bitfieldOp *thisop = ops+j;
+
+ /* Execute the operation. */
+ if (thisop->opcode == BITFIELDOP_SET ||
+ thisop->opcode == BITFIELDOP_INCRBY)
+ {
+ /* SET and INCRBY: We handle both with the same code path
+ * for simplicity. SET return value is the previous value so
+ * we need fetch & store as well. */
+
+ /* We need two different but very similar code paths for signed
+ * and unsigned operations, since the set of functions to get/set
+ * the integers and the used variables types are different. */
+ if (thisop->sign) {
+ int64_t oldval, newval, wrapped, retval;
+ int overflow;
+
+ oldval = getSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ newval = oldval + thisop->i64;
+ overflow = checkSignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkSignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setSignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ } else {
+ uint64_t oldval, newval, wrapped, retval;
+ int overflow;
+
+ oldval = getUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits);
+
+ if (thisop->opcode == BITFIELDOP_INCRBY) {
+ newval = oldval + thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(oldval,
+ thisop->i64,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = newval;
+ } else {
+ newval = thisop->i64;
+ overflow = checkUnsignedBitfieldOverflow(newval,
+ 0,thisop->bits,thisop->owtype,&wrapped);
+ if (overflow) newval = wrapped;
+ retval = oldval;
+ }
+ /* On overflow of type is "FAIL", don't write and return
+ * NULL to signal the condition. */
+ if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) {
+ addReplyLongLong(c,retval);
+ setUnsignedBitfield(o->ptr,thisop->offset,
+ thisop->bits,newval);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+ }
+ changes++;
+ } else {
+ /* GET */
+ unsigned char buf[9];
+ long strlen = 0;
+ unsigned char *src = NULL;
+ char llbuf[LONG_STR_SIZE];
+
+ if (o != NULL)
+ src = getObjectReadOnlyString(o,&strlen,llbuf);
+
+ /* For GET we use a trick: before executing the operation
+ * copy up to 9 bytes to a local buffer, so that we can easily
+ * execute up to 64 bit operations that are at actual string
+ * object boundaries. */
+ memset(buf,0,9);
+ int i;
+ size_t byte = thisop->offset >> 3;
+ for (i = 0; i < 9; i++) {
+ if (src == NULL || i+byte >= (size_t)strlen) break;
+ buf[i] = src[i+byte];
+ }
+
+ /* Now operate on the copied buffer which is guaranteed
+ * to be zero-padded. */
+ if (thisop->sign) {
+ int64_t val = getSignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ } else {
+ uint64_t val = getUnsignedBitfield(buf,thisop->offset-(byte*8),
+ thisop->bits);
+ addReplyLongLong(c,val);
+ }
+ }
+ }
+
+ if (changes) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
+ server.dirty += changes;
+ }
+ zfree(ops);
+}
diff --git a/src/blocked.c b/src/blocked.c
index 4cd632bd3..4a667501f 100644
--- a/src/blocked.c
+++ b/src/blocked.c
@@ -34,17 +34,17 @@
* getTimeoutFromObjectOrReply() is just an utility function to parse a
* timeout argument since blocking operations usually require a timeout.
*
- * blockClient() set the REDIS_BLOCKED flag in the client, and set the
- * specified block type 'btype' filed to one of REDIS_BLOCKED_* macros.
+ * blockClient() set the CLIENT_BLOCKED flag in the client, and set the
+ * specified block type 'btype' filed to one of BLOCKED_* macros.
*
* unblockClient() unblocks the client doing the following:
* 1) It calls the btype-specific function to cleanup the state.
- * 2) It unblocks the client by unsetting the REDIS_BLOCKED flag.
+ * 2) It unblocks the client by unsetting the CLIENT_BLOCKED flag.
* 3) It puts the client into a list of just unblocked clients that are
* processed ASAP in the beforeSleep() event loop callback, so that
* if there is some query buffer to process, we do it. This is also
* required because otherwise there is no 'readable' event fired, we
- * already read the pending commands. We also set the REDIS_UNBLOCKED
+ * already read the pending commands. We also set the CLIENT_UNBLOCKED
* flag to remember the client is in the unblocked_clients list.
*
* processUnblockedClients() is called inside the beforeSleep() function
@@ -59,9 +59,13 @@
* When implementing a new type of blocking opeation, the implementation
* should modify unblockClient() and replyToBlockedClientTimedOut() in order
* to handle the btype-specific behavior of this two functions.
+ * If the blocking operation waits for certain keys to change state, the
+ * clusterRedirectBlockedClientIfNeeded() function should also be updated.
*/
-#include "redis.h"
+#include "server.h"
+
+int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where);
/* Get a timeout value from an object and store it into 'timeout'.
* The final timeout is always stored as milliseconds as a time where the
@@ -71,16 +75,16 @@
* Note that if the timeout is zero (usually from the point of view of
* commands API this means no timeout) the value stored into 'timeout'
* is zero. */
-int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit) {
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit) {
long long tval;
if (getLongLongFromObjectOrReply(c,object,&tval,
- "timeout is not an integer or out of range") != REDIS_OK)
- return REDIS_ERR;
+ "timeout is not an integer or out of range") != C_OK)
+ return C_ERR;
if (tval < 0) {
addReplyError(c,"timeout is negative");
- return REDIS_ERR;
+ return C_ERR;
}
if (tval > 0) {
@@ -89,16 +93,17 @@ int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout,
}
*timeout = tval;
- return REDIS_OK;
+ return C_OK;
}
-/* Block a client for the specific operation type. Once the REDIS_BLOCKED
+/* Block a client for the specific operation type. Once the CLIENT_BLOCKED
* flag is set client query buffer is not longer processed, but accumulated,
* and will be processed when the client is unblocked. */
-void blockClient(redisClient *c, int btype) {
- c->flags |= REDIS_BLOCKED;
+void blockClient(client *c, int btype) {
+ c->flags |= CLIENT_BLOCKED;
c->btype = btype;
- server.bpop_blocked_clients++;
+ server.blocked_clients++;
+ server.blocked_clients_by_type[btype]++;
}
/* This function is called in the beforeSleep() function of the event loop
@@ -106,53 +111,478 @@ void blockClient(redisClient *c, int btype) {
* unblocked after a blocking operation. */
void processUnblockedClients(void) {
listNode *ln;
- redisClient *c;
+ client *c;
while (listLength(server.unblocked_clients)) {
ln = listFirst(server.unblocked_clients);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
c = ln->value;
listDelNode(server.unblocked_clients,ln);
- c->flags &= ~REDIS_UNBLOCKED;
- c->btype = REDIS_BLOCKED_NONE;
-
- /* Process remaining data in the input buffer. */
- if (c->querybuf && sdslen(c->querybuf) > 0) {
- server.current_client = c;
- processInputBuffer(c);
- server.current_client = NULL;
+ c->flags &= ~CLIENT_UNBLOCKED;
+
+ /* Process remaining data in the input buffer, unless the client
+ * is blocked again. Actually processInputBuffer() checks that the
+ * client is not blocked before to proceed, but things may change and
+ * the code is conceptually more correct this way. */
+ if (!(c->flags & CLIENT_BLOCKED)) {
+ if (c->querybuf && sdslen(c->querybuf) > 0) {
+ processInputBuffer(c);
+ }
}
}
}
/* Unblock a client calling the right function depending on the kind
* of operation the client is blocking for. */
-void unblockClient(redisClient *c) {
- if (c->btype == REDIS_BLOCKED_LIST) {
+void unblockClient(client *c) {
+ if (c->btype == BLOCKED_LIST ||
+ c->btype == BLOCKED_ZSET ||
+ c->btype == BLOCKED_STREAM) {
unblockClientWaitingData(c);
- } else if (c->btype == REDIS_BLOCKED_WAIT) {
+ } else if (c->btype == BLOCKED_WAIT) {
unblockClientWaitingReplicas(c);
+ } else if (c->btype == BLOCKED_MODULE) {
+ unblockClientFromModule(c);
} else {
- redisPanic("Unknown btype in unblockClient().");
+ serverPanic("Unknown btype in unblockClient().");
}
/* Clear the flags, and put the client in the unblocked list so that
* we'll process new commands in its query buffer ASAP. */
- c->flags &= ~REDIS_BLOCKED;
- c->flags |= REDIS_UNBLOCKED;
- c->btype = REDIS_BLOCKED_NONE;
- server.bpop_blocked_clients--;
- listAddNodeTail(server.unblocked_clients,c);
+ server.blocked_clients--;
+ server.blocked_clients_by_type[c->btype]--;
+ c->flags &= ~CLIENT_BLOCKED;
+ c->btype = BLOCKED_NONE;
+ /* The client may already be into the unblocked list because of a previous
+ * blocking operation, don't add back it into the list multiple times. */
+ if (!(c->flags & CLIENT_UNBLOCKED)) {
+ c->flags |= CLIENT_UNBLOCKED;
+ listAddNodeTail(server.unblocked_clients,c);
+ }
}
/* This function gets called when a blocked client timed out in order to
- * send it a reply of some kind. */
-void replyToBlockedClientTimedOut(redisClient *c) {
- if (c->btype == REDIS_BLOCKED_LIST) {
+ * send it a reply of some kind. After this function is called,
+ * unblockClient() will be called with the same client as argument. */
+void replyToBlockedClientTimedOut(client *c) {
+ if (c->btype == BLOCKED_LIST ||
+ c->btype == BLOCKED_ZSET ||
+ c->btype == BLOCKED_STREAM) {
addReply(c,shared.nullmultibulk);
- } else if (c->btype == REDIS_BLOCKED_WAIT) {
+ } else if (c->btype == BLOCKED_WAIT) {
addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset));
+ } else if (c->btype == BLOCKED_MODULE) {
+ moduleBlockedClientTimedOut(c);
} else {
- redisPanic("Unknown btype in replyToBlockedClientTimedOut().");
+ serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
+ }
+}
+
+/* Mass-unblock clients because something changed in the instance that makes
+ * blocking no longer safe. For example clients blocked in list operations
+ * in an instance which turns from master to slave is unsafe, so this function
+ * is called when a master turns into a slave.
+ *
+ * The semantics is to send an -UNBLOCKED error to the client, disconnecting
+ * it at the same time. */
+void disconnectAllBlockedClients(void) {
+ listNode *ln;
+ listIter li;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+
+ if (c->flags & CLIENT_BLOCKED) {
+ addReplySds(c,sdsnew(
+ "-UNBLOCKED force unblock from blocking operation, "
+ "instance state changed (master -> slave?)\r\n"));
+ unblockClient(c);
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+}
+
+/* This function should be called by Redis every time a single command,
+ * a MULTI/EXEC block, or a Lua script, terminated its execution after
+ * being called by a client. It handles serving clients blocked in
+ * lists, streams, and sorted sets, via a blocking commands.
+ *
+ * All the keys with at least one client blocked that received at least
+ * one new element via some write operation are accumulated into
+ * the server.ready_keys list. This function will run the list and will
+ * serve clients accordingly. Note that the function will iterate again and
+ * again as a result of serving BRPOPLPUSH we can have new blocking clients
+ * to serve because of the PUSH side of BRPOPLPUSH.
+ *
+ * This function is normally "fair", that is, it will server clients
+ * using a FIFO behavior. However this fairness is violated in certain
+ * edge cases, that is, when we have clients blocked at the same time
+ * in a sorted set and in a list, for the same key (a very odd thing to
+ * do client side, indeed!). Because mismatching clients (blocking for
+ * a different type compared to the current key type) are moved in the
+ * other side of the linked list. However as long as the key starts to
+ * be used only for a single type, like virtually any Redis application will
+ * do, the function is already fair. */
+void handleClientsBlockedOnKeys(void) {
+ while(listLength(server.ready_keys) != 0) {
+ list *l;
+
+ /* Point server.ready_keys to a fresh list and save the current one
+ * locally. This way as we run the old list we are free to call
+ * signalKeyAsReady() that may push new elements in server.ready_keys
+ * when handling clients blocked into BRPOPLPUSH. */
+ l = server.ready_keys;
+ server.ready_keys = listCreate();
+
+ while(listLength(l) != 0) {
+ listNode *ln = listFirst(l);
+ readyList *rl = ln->value;
+
+ /* First of all remove this key from db->ready_keys so that
+ * we can safely call signalKeyAsReady() against this key. */
+ dictDelete(rl->db->ready_keys,rl->key);
+
+ /* Serve clients blocked on list key. */
+ robj *o = lookupKeyWrite(rl->db,rl->key);
+ if (o != NULL && o->type == OBJ_LIST) {
+ dictEntry *de;
+
+ /* We serve clients in the same order they blocked for
+ * this key, from the first blocked to the last. */
+ de = dictFind(rl->db->blocking_keys,rl->key);
+ if (de) {
+ list *clients = dictGetVal(de);
+ int numclients = listLength(clients);
+
+ while(numclients--) {
+ listNode *clientnode = listFirst(clients);
+ client *receiver = clientnode->value;
+
+ if (receiver->btype != BLOCKED_LIST) {
+ /* Put at the tail, so that at the next call
+ * we'll not run into it again. */
+ listDelNode(clients,clientnode);
+ listAddNodeTail(clients,receiver);
+ continue;
+ }
+
+ robj *dstkey = receiver->bpop.target;
+ int where = (receiver->lastcmd &&
+ receiver->lastcmd->proc == blpopCommand) ?
+ LIST_HEAD : LIST_TAIL;
+ robj *value = listTypePop(o,where);
+
+ if (value) {
+ /* Protect receiver->bpop.target, that will be
+ * freed by the next unblockClient()
+ * call. */
+ if (dstkey) incrRefCount(dstkey);
+ unblockClient(receiver);
+
+ if (serveClientBlockedOnList(receiver,
+ rl->key,dstkey,rl->db,value,
+ where) == C_ERR)
+ {
+ /* If we failed serving the client we need
+ * to also undo the POP operation. */
+ listTypePush(o,value,where);
+ }
+
+ if (dstkey) decrRefCount(dstkey);
+ decrRefCount(value);
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (listTypeLength(o) == 0) {
+ dbDelete(rl->db,rl->key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",rl->key,rl->db->id);
+ }
+ /* We don't call signalModifiedKey() as it was already called
+ * when an element was pushed on the list. */
+ }
+
+ /* Serve clients blocked on sorted set key. */
+ else if (o != NULL && o->type == OBJ_ZSET) {
+ dictEntry *de;
+
+ /* We serve clients in the same order they blocked for
+ * this key, from the first blocked to the last. */
+ de = dictFind(rl->db->blocking_keys,rl->key);
+ if (de) {
+ list *clients = dictGetVal(de);
+ int numclients = listLength(clients);
+ unsigned long zcard = zsetLength(o);
+
+ while(numclients-- && zcard) {
+ listNode *clientnode = listFirst(clients);
+ client *receiver = clientnode->value;
+
+ if (receiver->btype != BLOCKED_ZSET) {
+ /* Put at the tail, so that at the next call
+ * we'll not run into it again. */
+ listDelNode(clients,clientnode);
+ listAddNodeTail(clients,receiver);
+ continue;
+ }
+
+ int where = (receiver->lastcmd &&
+ receiver->lastcmd->proc == bzpopminCommand)
+ ? ZSET_MIN : ZSET_MAX;
+ unblockClient(receiver);
+ genericZpopCommand(receiver,&rl->key,1,where,1,NULL);
+ zcard--;
+
+ /* Replicate the command. */
+ robj *argv[2];
+ struct redisCommand *cmd = where == ZSET_MIN ?
+ server.zpopminCommand :
+ server.zpopmaxCommand;
+ argv[0] = createStringObject(cmd->name,strlen(cmd->name));
+ argv[1] = rl->key;
+ incrRefCount(rl->key);
+ propagate(cmd,receiver->db->id,
+ argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(argv[0]);
+ decrRefCount(argv[1]);
+ }
+ }
+ }
+
+ /* Serve clients blocked on stream key. */
+ else if (o != NULL && o->type == OBJ_STREAM) {
+ dictEntry *de = dictFind(rl->db->blocking_keys,rl->key);
+ stream *s = o->ptr;
+
+ /* We need to provide the new data arrived on the stream
+ * to all the clients that are waiting for an offset smaller
+ * than the current top item. */
+ if (de) {
+ list *clients = dictGetVal(de);
+ listNode *ln;
+ listIter li;
+ listRewind(clients,&li);
+
+ while((ln = listNext(&li))) {
+ client *receiver = listNodeValue(ln);
+ if (receiver->btype != BLOCKED_STREAM) continue;
+ streamID *gt = dictFetchValue(receiver->bpop.keys,
+ rl->key);
+
+ /* If we blocked in the context of a consumer
+ * group, we need to resolve the group and update the
+ * last ID the client is blocked for: this is needed
+ * because serving other clients in the same consumer
+ * group will alter the "last ID" of the consumer
+ * group, and clients blocked in a consumer group are
+ * always blocked for the ">" ID: we need to deliver
+ * only new messages and avoid unblocking the client
+ * otherwise. */
+ streamCG *group = NULL;
+ if (receiver->bpop.xread_group) {
+ group = streamLookupCG(s,
+ receiver->bpop.xread_group->ptr);
+ /* If the group was not found, send an error
+ * to the consumer. */
+ if (!group) {
+ addReplyError(receiver,
+ "-NOGROUP the consumer group this client "
+ "was blocked on no longer exists");
+ unblockClient(receiver);
+ continue;
+ } else {
+ *gt = group->last_id;
+ }
+ }
+
+ if (streamCompareID(&s->last_id, gt) > 0) {
+ streamID start = *gt;
+ start.seq++; /* Can't overflow, it's an uint64_t */
+
+ /* Lookup the consumer for the group, if any. */
+ streamConsumer *consumer = NULL;
+ int noack = 0;
+
+ if (group) {
+ consumer = streamLookupConsumer(group,
+ receiver->bpop.xread_consumer->ptr,
+ 1);
+ noack = receiver->bpop.xread_group_noack;
+ }
+
+ /* Emit the two elements sub-array consisting of
+ * the name of the stream and the data we
+ * extracted from it. Wrapped in a single-item
+ * array, since we have just one key. */
+ addReplyMultiBulkLen(receiver,1);
+ addReplyMultiBulkLen(receiver,2);
+ addReplyBulk(receiver,rl->key);
+
+ streamPropInfo pi = {
+ rl->key,
+ receiver->bpop.xread_group
+ };
+ streamReplyWithRange(receiver,s,&start,NULL,
+ receiver->bpop.xread_count,
+ 0, group, consumer, noack, &pi);
+
+ /* Note that after we unblock the client, 'gt'
+ * and other receiver->bpop stuff are no longer
+ * valid, so we must do the setup above before
+ * this call. */
+ unblockClient(receiver);
+ }
+ }
+ }
+ }
+
+ /* Free this item. */
+ decrRefCount(rl->key);
+ zfree(rl);
+ listDelNode(l,ln);
+ }
+ listRelease(l); /* We have the new list on place at this point. */
+ }
+}
+
+/* This is how the current blocking lists/sorted sets/streams work, we use
+ * BLPOP as example, but the concept is the same for other list ops, sorted
+ * sets and XREAD.
+ * - If the user calls BLPOP and the key exists and contains a non empty list
+ * then LPOP is called instead. So BLPOP is semantically the same as LPOP
+ * if blocking is not required.
+ * - If instead BLPOP is called and the key does not exists or the list is
+ * empty we need to block. In order to do so we remove the notification for
+ * new data to read in the client socket (so that we'll not serve new
+ * requests if the blocking request is not served). Also we put the client
+ * in a dictionary (db->blocking_keys) mapping keys to a list of clients
+ * blocking for this keys.
+ * - If a PUSH operation against a key with blocked clients waiting is
+ * performed, we mark this key as "ready", and after the current command,
+ * MULTI/EXEC block, or script, is executed, we serve all the clients waiting
+ * for this list, from the one that blocked first, to the last, accordingly
+ * to the number of elements we have in the ready list.
+ */
+
+/* Set a client in blocking mode for the specified key (list, zset or stream),
+ * with the specified timeout. The 'type' argument is BLOCKED_LIST,
+ * BLOCKED_ZSET or BLOCKED_STREAM depending on the kind of operation we are
+ * waiting for an empty key in order to awake the client. The client is blocked
+ * for all the 'numkeys' keys as in the 'keys' argument. When we block for
+ * stream keys, we also provide an array of streamID structures: clients will
+ * be unblocked only when items with an ID greater or equal to the specified
+ * one is appended to the stream. */
+void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, streamID *ids) {
+ dictEntry *de;
+ list *l;
+ int j;
+
+ c->bpop.timeout = timeout;
+ c->bpop.target = target;
+
+ if (target != NULL) incrRefCount(target);
+
+ for (j = 0; j < numkeys; j++) {
+ /* The value associated with the key name in the bpop.keys dictionary
+ * is NULL for lists and sorted sets, or the stream ID for streams. */
+ void *key_data = NULL;
+ if (btype == BLOCKED_STREAM) {
+ key_data = zmalloc(sizeof(streamID));
+ memcpy(key_data,ids+j,sizeof(streamID));
+ }
+
+ /* If the key already exists in the dictionary ignore it. */
+ if (dictAdd(c->bpop.keys,keys[j],key_data) != DICT_OK) {
+ zfree(key_data);
+ continue;
+ }
+ incrRefCount(keys[j]);
+
+ /* And in the other "side", to map keys -> clients */
+ de = dictFind(c->db->blocking_keys,keys[j]);
+ if (de == NULL) {
+ int retval;
+
+ /* For every key we take a list of clients blocked for it */
+ l = listCreate();
+ retval = dictAdd(c->db->blocking_keys,keys[j],l);
+ incrRefCount(keys[j]);
+ serverAssertWithInfo(c,keys[j],retval == DICT_OK);
+ } else {
+ l = dictGetVal(de);
+ }
+ listAddNodeTail(l,c);
}
+ blockClient(c,btype);
}
+/* Unblock a client that's waiting in a blocking operation such as BLPOP.
+ * You should never call this function directly, but unblockClient() instead. */
+void unblockClientWaitingData(client *c) {
+ dictEntry *de;
+ dictIterator *di;
+ list *l;
+
+ serverAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
+ di = dictGetIterator(c->bpop.keys);
+ /* The client may wait for multiple keys, so unblock it for every key. */
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+
+ /* Remove this client from the list of clients waiting for this key. */
+ l = dictFetchValue(c->db->blocking_keys,key);
+ serverAssertWithInfo(c,key,l != NULL);
+ listDelNode(l,listSearchKey(l,c));
+ /* If the list is empty we need to remove it to avoid wasting memory */
+ if (listLength(l) == 0)
+ dictDelete(c->db->blocking_keys,key);
+ }
+ dictReleaseIterator(di);
+
+ /* Cleanup the client structure */
+ dictEmpty(c->bpop.keys,NULL);
+ if (c->bpop.target) {
+ decrRefCount(c->bpop.target);
+ c->bpop.target = NULL;
+ }
+ if (c->bpop.xread_group) {
+ decrRefCount(c->bpop.xread_group);
+ decrRefCount(c->bpop.xread_consumer);
+ c->bpop.xread_group = NULL;
+ c->bpop.xread_consumer = NULL;
+ }
+}
+
+/* If the specified key has clients blocked waiting for list pushes, this
+ * function will put the key reference into the server.ready_keys list.
+ * Note that db->ready_keys is a hash table that allows us to avoid putting
+ * the same key again and again in the list in case of multiple pushes
+ * made by a script or in the context of MULTI/EXEC.
+ *
+ * The list will be finally processed by handleClientsBlockedOnLists() */
+void signalKeyAsReady(redisDb *db, robj *key) {
+ readyList *rl;
+
+ /* No clients blocking for this key? No need to queue it. */
+ if (dictFind(db->blocking_keys,key) == NULL) return;
+
+ /* Key was already signaled? No need to queue it again. */
+ if (dictFind(db->ready_keys,key) != NULL) return;
+
+ /* Ok, we need to queue this key into server.ready_keys. */
+ rl = zmalloc(sizeof(*rl));
+ rl->key = key;
+ rl->db = db;
+ incrRefCount(key);
+ listAddNodeTail(server.ready_keys,rl);
+
+ /* We also add the key in the db->ready_keys dictionary in order
+ * to avoid adding it multiple times into a list with a simple O(1)
+ * check. */
+ incrRefCount(key);
+ serverAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
+}
+
+
diff --git a/src/childinfo.c b/src/childinfo.c
new file mode 100644
index 000000000..719025e8c
--- /dev/null
+++ b/src/childinfo.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <unistd.h>
+
+/* Open a child-parent channel used in order to move information about the
+ * RDB / AOF saving process from the child to the parent (for instance
+ * the amount of copy on write memory used) */
+void openChildInfoPipe(void) {
+ if (pipe(server.child_info_pipe) == -1) {
+ /* On error our two file descriptors should be still set to -1,
+ * but we call anyway cloesChildInfoPipe() since can't hurt. */
+ closeChildInfoPipe();
+ } else if (anetNonBlock(NULL,server.child_info_pipe[0]) != ANET_OK) {
+ closeChildInfoPipe();
+ } else {
+ memset(&server.child_info_data,0,sizeof(server.child_info_data));
+ }
+}
+
+/* Close the pipes opened with openChildInfoPipe(). */
+void closeChildInfoPipe(void) {
+ if (server.child_info_pipe[0] != -1 ||
+ server.child_info_pipe[1] != -1)
+ {
+ close(server.child_info_pipe[0]);
+ close(server.child_info_pipe[1]);
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ }
+}
+
+/* Send COW data to parent. The child should call this function after populating
+ * the corresponding fields it want to sent (according to the process type). */
+void sendChildInfo(int ptype) {
+ if (server.child_info_pipe[1] == -1) return;
+ server.child_info_data.magic = CHILD_INFO_MAGIC;
+ server.child_info_data.process_type = ptype;
+ ssize_t wlen = sizeof(server.child_info_data);
+ if (write(server.child_info_pipe[1],&server.child_info_data,wlen) != wlen) {
+ /* Nothing to do on error, this will be detected by the other side. */
+ }
+}
+
+/* Receive COW data from parent. */
+void receiveChildInfo(void) {
+ if (server.child_info_pipe[0] == -1) return;
+ ssize_t wlen = sizeof(server.child_info_data);
+ if (read(server.child_info_pipe[0],&server.child_info_data,wlen) == wlen &&
+ server.child_info_data.magic == CHILD_INFO_MAGIC)
+ {
+ if (server.child_info_data.process_type == CHILD_INFO_TYPE_RDB) {
+ server.stat_rdb_cow_bytes = server.child_info_data.cow_size;
+ } else if (server.child_info_data.process_type == CHILD_INFO_TYPE_AOF) {
+ server.stat_aof_cow_bytes = server.child_info_data.cow_size;
+ }
+ }
+}
diff --git a/src/cluster.c b/src/cluster.c
index ec6901e8f..2f3e298e0 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include "endianconv.h"
@@ -37,9 +37,9 @@
#include <arpa/inet.h>
#include <fcntl.h>
#include <unistd.h>
-#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/file.h>
+#include <math.h>
/* A global reference to myself is handy to make code more clear.
* Myself always points to server.cluster->myself, that is, the clusterNode
@@ -56,7 +56,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request);
void clusterUpdateState(void);
int clusterNodeGetSlotBit(clusterNode *n, int slot);
sds clusterGenNodesDescription(int filter);
-clusterNode *clusterLookupNode(char *name);
+clusterNode *clusterLookupNode(const char *name);
int clusterNodeAddSlave(clusterNode *master, clusterNode *slave);
int clusterAddSlot(clusterNode *n, int slot);
int clusterDelSlot(int slot);
@@ -72,34 +72,21 @@ void resetManualFailover(void);
void clusterCloseAllSlots(void);
void clusterSetNodeAsMaster(clusterNode *n);
void clusterDelNode(clusterNode *delnode);
-sds representRedisNodeFlags(sds ci, uint16_t flags);
+sds representClusterNodeFlags(sds ci, uint16_t flags);
+uint64_t clusterGetMaxEpoch(void);
+int clusterBumpConfigEpochWithoutConsensus(void);
+void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len);
/* -----------------------------------------------------------------------------
* Initialization
* -------------------------------------------------------------------------- */
-/* Return the greatest configEpoch found in the cluster. */
-uint64_t clusterGetMaxEpoch(void) {
- uint64_t max = 0;
- dictIterator *di;
- dictEntry *de;
-
- di = dictGetSafeIterator(server.cluster->nodes);
- while((de = dictNext(di)) != NULL) {
- clusterNode *node = dictGetVal(de);
- if (node->configEpoch > max) max = node->configEpoch;
- }
- dictReleaseIterator(di);
- if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
- return max;
-}
-
/* Load the cluster config from 'filename'.
*
* If the file does not exist or is zero-length (this may happen because
* when we lock the nodes.conf file, we create a zero-length one for the
- * sake of locking if it does not already exist), REDIS_ERR is returned.
- * If the configuration was loaded from the file, REDIS_OK is returned. */
+ * sake of locking if it does not already exist), C_ERR is returned.
+ * If the configuration was loaded from the file, C_OK is returned. */
int clusterLoadConfig(char *filename) {
FILE *fp = fopen(filename,"r");
struct stat sb;
@@ -108,20 +95,20 @@ int clusterLoadConfig(char *filename) {
if (fp == NULL) {
if (errno == ENOENT) {
- return REDIS_ERR;
+ return C_ERR;
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Loading the cluster node config from %s: %s",
filename, strerror(errno));
exit(1);
}
}
- /* Check if the file is zero-length: if so return REDIS_ERR to signal
+ /* Check if the file is zero-length: if so return C_ERR to signal
* we have to write the config. */
if (fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) {
fclose(fp);
- return REDIS_ERR;
+ return C_ERR;
}
/* Parse the file. Note that single lines of the cluster config file can
@@ -130,8 +117,8 @@ int clusterLoadConfig(char *filename) {
* present in a single line, possibly in importing or migrating state, so
* together with the node ID of the sender/receiver.
*
- * To simplify we allocate 1024+REDIS_CLUSTER_SLOTS*128 bytes per line. */
- maxline = 1024+REDIS_CLUSTER_SLOTS*128;
+ * To simplify we allocate 1024+CLUSTER_SLOTS*128 bytes per line. */
+ maxline = 1024+CLUSTER_SLOTS*128;
line = zmalloc(maxline);
while(fgets(line,maxline,fp) != NULL) {
int argc;
@@ -142,7 +129,7 @@ int clusterLoadConfig(char *filename) {
/* Skip blank lines, they can be created either by users manually
* editing nodes.conf or by the config writing process if stopped
* before the truncate() call. */
- if (line[0] == '\n') continue;
+ if (line[0] == '\n' || line[0] == '\0') continue;
/* Split the line into arguments for processing. */
argv = sdssplitargs(line,&argc);
@@ -159,7 +146,7 @@ int clusterLoadConfig(char *filename) {
server.cluster->lastVoteEpoch =
strtoull(argv[j+1],NULL,10);
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Skipping unknown cluster config variable '%s'",
argv[j]);
}
@@ -181,7 +168,17 @@ int clusterLoadConfig(char *filename) {
if ((p = strrchr(argv[1],':')) == NULL) goto fmterr;
*p = '\0';
memcpy(n->ip,argv[1],strlen(argv[1])+1);
- n->port = atoi(p+1);
+ char *port = p+1;
+ char *busp = strchr(port,'@');
+ if (busp) {
+ *busp = '\0';
+ busp++;
+ }
+ n->port = atoi(port);
+ /* In older versions of nodes.conf the "@busport" part is missing.
+ * In this case we set it to the default offset of 10000 from the
+ * base port. */
+ n->cport = busp ? atoi(busp) : n->port + CLUSTER_PORT_INCR;
/* Parse flags */
p = s = argv[2];
@@ -189,26 +186,28 @@ int clusterLoadConfig(char *filename) {
p = strchr(s,',');
if (p) *p = '\0';
if (!strcasecmp(s,"myself")) {
- redisAssert(server.cluster->myself == NULL);
+ serverAssert(server.cluster->myself == NULL);
myself = server.cluster->myself = n;
- n->flags |= REDIS_NODE_MYSELF;
+ n->flags |= CLUSTER_NODE_MYSELF;
} else if (!strcasecmp(s,"master")) {
- n->flags |= REDIS_NODE_MASTER;
+ n->flags |= CLUSTER_NODE_MASTER;
} else if (!strcasecmp(s,"slave")) {
- n->flags |= REDIS_NODE_SLAVE;
+ n->flags |= CLUSTER_NODE_SLAVE;
} else if (!strcasecmp(s,"fail?")) {
- n->flags |= REDIS_NODE_PFAIL;
+ n->flags |= CLUSTER_NODE_PFAIL;
} else if (!strcasecmp(s,"fail")) {
- n->flags |= REDIS_NODE_FAIL;
+ n->flags |= CLUSTER_NODE_FAIL;
n->fail_time = mstime();
} else if (!strcasecmp(s,"handshake")) {
- n->flags |= REDIS_NODE_HANDSHAKE;
+ n->flags |= CLUSTER_NODE_HANDSHAKE;
} else if (!strcasecmp(s,"noaddr")) {
- n->flags |= REDIS_NODE_NOADDR;
+ n->flags |= CLUSTER_NODE_NOADDR;
+ } else if (!strcasecmp(s,"nofailover")) {
+ n->flags |= CLUSTER_NODE_NOFAILOVER;
} else if (!strcasecmp(s,"noflags")) {
/* nothing to do */
} else {
- redisPanic("Unknown flag in redis cluster config file");
+ serverPanic("Unknown flag in redis cluster config file");
}
if (p) s = p+1;
}
@@ -243,10 +242,11 @@ int clusterLoadConfig(char *filename) {
clusterNode *cn;
p = strchr(argv[j],'-');
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
*p = '\0';
direction = p[1]; /* Either '>' or '<' */
slot = atoi(argv[j]+1);
+ if (slot < 0 || slot >= CLUSTER_SLOTS) goto fmterr;
p += 3;
cn = clusterLookupNode(p);
if (!cn) {
@@ -266,6 +266,8 @@ int clusterLoadConfig(char *filename) {
} else {
start = stop = atoi(argv[j]);
}
+ if (start < 0 || start >= CLUSTER_SLOTS) goto fmterr;
+ if (stop < 0 || stop >= CLUSTER_SLOTS) goto fmterr;
while(start <= stop) clusterAddSlot(n, start++);
}
@@ -277,7 +279,7 @@ int clusterLoadConfig(char *filename) {
zfree(line);
fclose(fp);
- redisLog(REDIS_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
+ serverLog(LL_NOTICE,"Node configuration loaded, I'm %.40s", myself->name);
/* Something that should never happen: currentEpoch smaller than
* the max epoch found in the nodes configuration. However we handle this
@@ -285,10 +287,10 @@ int clusterLoadConfig(char *filename) {
if (clusterGetMaxEpoch() > server.cluster->currentEpoch) {
server.cluster->currentEpoch = clusterGetMaxEpoch();
}
- return REDIS_OK;
+ return C_OK;
fmterr:
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unrecoverable error: corrupted cluster config file.");
zfree(line);
if (fp) fclose(fp);
@@ -317,7 +319,7 @@ int clusterSaveConfig(int do_fsync) {
/* Get the nodes description and concatenate our "vars" directive to
* save currentEpoch and lastVoteEpoch. */
- ci = clusterGenNodesDescription(REDIS_NODE_HANDSHAKE);
+ ci = clusterGenNodesDescription(CLUSTER_NODE_HANDSHAKE);
ci = sdscatprintf(ci,"vars currentEpoch %llu lastVoteEpoch %llu\n",
(unsigned long long) server.cluster->currentEpoch,
(unsigned long long) server.cluster->lastVoteEpoch);
@@ -356,7 +358,7 @@ err:
void clusterSaveConfigOrDie(int do_fsync) {
if (clusterSaveConfig(do_fsync) == -1) {
- redisLog(REDIS_WARNING,"Fatal: can't update cluster config file.");
+ serverLog(LL_WARNING,"Fatal: can't update cluster config file.");
exit(1);
}
}
@@ -368,37 +370,60 @@ void clusterSaveConfigOrDie(int do_fsync) {
* in-place, reopening the file, and writing to it in place (later adjusting
* the length with ftruncate()).
*
- * On success REDIS_OK is returned, otherwise an error is logged and
- * the function returns REDIS_ERR to signal a lock was not acquired. */
+ * On success C_OK is returned, otherwise an error is logged and
+ * the function returns C_ERR to signal a lock was not acquired. */
int clusterLockConfig(char *filename) {
+/* flock() does not exist on Solaris
+ * and a fcntl-based solution won't help, as we constantly re-open that file,
+ * which will release _all_ locks anyway
+ */
+#if !defined(__sun)
/* To lock it, we need to open the file in a way it is created if
* it does not exist, otherwise there is a race condition with other
* processes. */
int fd = open(filename,O_WRONLY|O_CREAT,0644);
if (fd == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Can't open %s in order to acquire a lock: %s",
filename, strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
if (flock(fd,LOCK_EX|LOCK_NB) == -1) {
if (errno == EWOULDBLOCK) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sorry, the cluster configuration file %s is already used "
"by a different Redis Cluster node. Please make sure that "
"different nodes use different cluster configuration "
"files.", filename);
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Impossible to lock %s: %s", filename, strerror(errno));
}
close(fd);
- return REDIS_ERR;
+ return C_ERR;
}
/* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the
* lock to the file as long as the process exists. */
- return REDIS_OK;
+#endif /* __sun */
+
+ return C_OK;
+}
+
+/* Some flags (currently just the NOFAILOVER flag) may need to be updated
+ * in the "myself" node based on the current configuration of the node,
+ * that may change at runtime via CONFIG SET. This function changes the
+ * set of flags in myself->flags accordingly. */
+void clusterUpdateMyselfFlags(void) {
+ int oldflags = myself->flags;
+ int nofailover = server.cluster_slave_no_failover ?
+ CLUSTER_NODE_NOFAILOVER : 0;
+ myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
+ myself->flags |= nofailover;
+ if (myself->flags != oldflags) {
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_UPDATE_STATE);
+ }
}
void clusterInit(void) {
@@ -407,7 +432,7 @@ void clusterInit(void) {
server.cluster = zmalloc(sizeof(clusterState));
server.cluster->myself = NULL;
server.cluster->currentEpoch = 0;
- server.cluster->state = REDIS_CLUSTER_FAIL;
+ server.cluster->state = CLUSTER_FAIL;
server.cluster->size = 1;
server.cluster->todo_before_sleep = 0;
server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL);
@@ -417,25 +442,28 @@ void clusterInit(void) {
server.cluster->failover_auth_count = 0;
server.cluster->failover_auth_rank = 0;
server.cluster->failover_auth_epoch = 0;
- server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
server.cluster->lastVoteEpoch = 0;
- server.cluster->stats_bus_messages_sent = 0;
- server.cluster->stats_bus_messages_received = 0;
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ server.cluster->stats_bus_messages_sent[i] = 0;
+ server.cluster->stats_bus_messages_received[i] = 0;
+ }
+ server.cluster->stats_pfail_nodes = 0;
memset(server.cluster->slots,0, sizeof(server.cluster->slots));
clusterCloseAllSlots();
/* Lock the cluster config file to make sure every node uses
* its own nodes.conf. */
- if (clusterLockConfig(server.cluster_configfile) == REDIS_ERR)
+ if (clusterLockConfig(server.cluster_configfile) == C_ERR)
exit(1);
/* Load or create a new nodes configuration. */
- if (clusterLoadConfig(server.cluster_configfile) == REDIS_ERR) {
+ if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
/* No configuration found. We will just use the random name provided
* by the createClusterNode() function. */
myself = server.cluster->myself =
- createClusterNode(NULL,REDIS_NODE_MYSELF|REDIS_NODE_MASTER);
- redisLog(REDIS_NOTICE,"No cluster configuration found, I'm %.40s",
+ createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
+ serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
myself->name);
clusterAddNode(myself);
saveconf = 1;
@@ -448,8 +476,8 @@ void clusterInit(void) {
/* Port sanity check II
* The other handshake port check is triggered too late to stop
* us from trying to use a too-high cluster port number. */
- if (server.port > (65535-REDIS_CLUSTER_PORT_INCR)) {
- redisLog(REDIS_WARNING, "Redis port number too high. "
+ if (server.port > (65535-CLUSTER_PORT_INCR)) {
+ serverLog(LL_WARNING, "Redis port number too high. "
"Cluster communication port is 10,000 port "
"numbers higher than your Redis port. "
"Your Redis port number must be "
@@ -457,8 +485,8 @@ void clusterInit(void) {
exit(1);
}
- if (listenToPort(server.port+REDIS_CLUSTER_PORT_INCR,
- server.cfd,&server.cfd_count) == REDIS_ERR)
+ if (listenToPort(server.port+CLUSTER_PORT_INCR,
+ server.cfd,&server.cfd_count) == C_ERR)
{
exit(1);
} else {
@@ -467,20 +495,28 @@ void clusterInit(void) {
for (j = 0; j < server.cfd_count; j++) {
if (aeCreateFileEvent(server.el, server.cfd[j], AE_READABLE,
clusterAcceptHandler, NULL) == AE_ERR)
- redisPanic("Unrecoverable error creating Redis Cluster "
+ serverPanic("Unrecoverable error creating Redis Cluster "
"file event.");
}
}
- /* The slots -> keys map is a sorted set. Init it. */
- server.cluster->slots_to_keys = zslCreate();
+ /* The slots -> keys map is a radix tree. Initialize it here. */
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
- /* Set myself->port to my listening port, we'll just need to discover
- * the IP address via MEET messages. */
+ /* Set myself->port / cport to my listening ports, we'll just need to
+ * discover the IP address via MEET messages. */
myself->port = server.port;
+ myself->cport = server.port+CLUSTER_PORT_INCR;
+ if (server.cluster_announce_port)
+ myself->port = server.cluster_announce_port;
+ if (server.cluster_announce_bus_port)
+ myself->cport = server.cluster_announce_bus_port;
server.cluster->mf_end = 0;
resetManualFailover();
+ clusterUpdateMyselfFlags();
}
/* Reset a node performing a soft or hard reset:
@@ -501,7 +537,7 @@ void clusterReset(int hard) {
if (nodeIsSlave(myself)) {
clusterSetNodeAsMaster(myself);
replicationUnsetMaster();
- emptyDb(NULL);
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
}
/* Close slots, reset manual failover state. */
@@ -509,7 +545,7 @@ void clusterReset(int hard) {
resetManualFailover();
/* Unassign all the slots. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) clusterDelSlot(j);
+ for (j = 0; j < CLUSTER_SLOTS; j++) clusterDelSlot(j);
/* Forget all the nodes, but myself. */
di = dictGetSafeIterator(server.cluster->nodes);
@@ -528,15 +564,16 @@ void clusterReset(int hard) {
server.cluster->currentEpoch = 0;
server.cluster->lastVoteEpoch = 0;
myself->configEpoch = 0;
- redisLog(REDIS_WARNING, "configEpoch set to 0 via CLUSTER RESET HARD");
+ serverLog(LL_WARNING, "configEpoch set to 0 via CLUSTER RESET HARD");
/* To change the Node ID we need to remove the old name from the
* nodes table, change the ID, and re-add back with new name. */
- oldname = sdsnewlen(myself->name, REDIS_CLUSTER_NAMELEN);
+ oldname = sdsnewlen(myself->name, CLUSTER_NAMELEN);
dictDelete(server.cluster->nodes,oldname);
sdsfree(oldname);
- getRandomHexChars(myself->name, REDIS_CLUSTER_NAMELEN);
+ getRandomHexChars(myself->name, CLUSTER_NAMELEN);
clusterAddNode(myself);
+ serverLog(LL_NOTICE,"Node hard reset, now I'm %.40s", myself->name);
}
/* Make sure to persist the new config and update the state. */
@@ -564,8 +601,7 @@ clusterLink *createClusterLink(clusterNode *node) {
* with this link will have the 'link' field set to NULL. */
void freeClusterLink(clusterLink *link) {
if (link->fd != -1) {
- aeDeleteFileEvent(server.el, link->fd, AE_WRITABLE);
- aeDeleteFileEvent(server.el, link->fd, AE_READABLE);
+ aeDeleteFileEvent(server.el, link->fd, AE_READABLE|AE_WRITABLE);
}
sdsfree(link->sndbuf);
sdsfree(link->rcvbuf);
@@ -579,11 +615,11 @@ void freeClusterLink(clusterLink *link) {
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd;
int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
- char cip[REDIS_IP_STR_LEN];
+ char cip[NET_IP_STR_LEN];
clusterLink *link;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
/* If the server is starting up, don't accept cluster connections:
* UPDATE messages may interact with the database content. */
@@ -593,7 +629,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Error accepting cluster node: %s", server.neterr);
return;
}
@@ -601,7 +637,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
anetEnableTcpNoDelay(NULL,cfd);
/* Use non-blocking I/O for cluster messages. */
- redisLog(REDIS_VERBOSE,"Accepted cluster node %s:%d", cip, cport);
+ serverLog(LL_VERBOSE,"Accepted cluster node %s:%d", cip, cport);
/* Create a link object we use to handle the connection.
* It gets passed to the readable handler when data is available.
* Initiallly the link->node pointer is set to NULL as we don't know
@@ -636,7 +672,7 @@ unsigned int keyHashSlot(char *key, int keylen) {
for (e = s+1; e < keylen; e++)
if (key[e] == '}') break;
- /* No '}' or nothing betweeen {} ? Hash the whole key. */
+ /* No '}' or nothing between {} ? Hash the whole key. */
if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
/* If we are here there is both a { and a } on its right. Hash
@@ -659,9 +695,9 @@ clusterNode *createClusterNode(char *nodename, int flags) {
clusterNode *node = zmalloc(sizeof(*node));
if (nodename)
- memcpy(node->name, nodename, REDIS_CLUSTER_NAMELEN);
+ memcpy(node->name, nodename, CLUSTER_NAMELEN);
else
- getRandomHexChars(node->name, REDIS_CLUSTER_NAMELEN);
+ getRandomHexChars(node->name, CLUSTER_NAMELEN);
node->ctime = mstime();
node->configEpoch = 0;
node->flags = flags;
@@ -675,8 +711,10 @@ clusterNode *createClusterNode(char *nodename, int flags) {
node->link = NULL;
memset(node->ip,0,sizeof(node->ip));
node->port = 0;
+ node->cport = 0;
node->fail_reports = listCreate();
node->voted_time = 0;
+ node->orphaned_time = 0;
node->repl_offset_time = 0;
node->repl_offset = 0;
listSetFreeMethod(node->fail_reports,zfree);
@@ -729,7 +767,7 @@ void clusterNodeCleanupFailureReports(clusterNode *node) {
listIter li;
clusterNodeFailReport *fr;
mstime_t maxtime = server.cluster_node_timeout *
- REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT;
+ CLUSTER_FAIL_REPORT_VALIDITY_MULT;
mstime_t now = mstime();
listRewind(l,&li);
@@ -783,13 +821,18 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
for (j = 0; j < master->numslaves; j++) {
if (master->slaves[j] == slave) {
- memmove(master->slaves+j,master->slaves+(j+1),
- (master->numslaves-1)-j);
+ if ((j+1) < master->numslaves) {
+ int remaining_slaves = (master->numslaves - j) - 1;
+ memmove(master->slaves+j,master->slaves+(j+1),
+ (sizeof(*master->slaves) * remaining_slaves));
+ }
master->numslaves--;
- return REDIS_OK;
+ if (master->numslaves == 0)
+ master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
}
}
- return REDIS_ERR;
+ return C_ERR;
}
int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
@@ -797,18 +840,13 @@ int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
/* If it's already a slave, don't add it again. */
for (j = 0; j < master->numslaves; j++)
- if (master->slaves[j] == slave) return REDIS_ERR;
+ if (master->slaves[j] == slave) return C_ERR;
master->slaves = zrealloc(master->slaves,
sizeof(clusterNode*)*(master->numslaves+1));
master->slaves[master->numslaves] = slave;
master->numslaves++;
- return REDIS_OK;
-}
-
-void clusterNodeResetSlaves(clusterNode *n) {
- zfree(n->slaves);
- n->numslaves = 0;
- n->slaves = NULL;
+ master->flags |= CLUSTER_NODE_MIGRATE_TO;
+ return C_OK;
}
int clusterCountNonFailingSlaves(clusterNode *n) {
@@ -819,15 +857,28 @@ int clusterCountNonFailingSlaves(clusterNode *n) {
return okslaves;
}
+/* Low level cleanup of the node structure. Only called by clusterDelNode(). */
void freeClusterNode(clusterNode *n) {
sds nodename;
+ int j;
- nodename = sdsnewlen(n->name, REDIS_CLUSTER_NAMELEN);
- redisAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
+ /* If the node has associated slaves, we have to set
+ * all the slaves->slaveof fields to NULL (unknown). */
+ for (j = 0; j < n->numslaves; j++)
+ n->slaves[j]->slaveof = NULL;
+
+ /* Remove this node from the list of slaves of its master. */
+ if (nodeIsSlave(n) && n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
+
+ /* Unlink from the set of nodes. */
+ nodename = sdsnewlen(n->name, CLUSTER_NAMELEN);
+ serverAssert(dictDelete(server.cluster->nodes,nodename) == DICT_OK);
sdsfree(nodename);
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof, n);
+
+ /* Release link and associated data structures. */
if (n->link) freeClusterLink(n->link);
listRelease(n->fail_reports);
+ zfree(n->slaves);
zfree(n);
}
@@ -836,15 +887,20 @@ int clusterAddNode(clusterNode *node) {
int retval;
retval = dictAdd(server.cluster->nodes,
- sdsnewlen(node->name,REDIS_CLUSTER_NAMELEN), node);
- return (retval == DICT_OK) ? REDIS_OK : REDIS_ERR;
+ sdsnewlen(node->name,CLUSTER_NAMELEN), node);
+ return (retval == DICT_OK) ? C_OK : C_ERR;
}
-/* Remove a node from the cluster:
- * 1) Mark all the nodes handled by it as unassigned.
- * 2) Remove all the failure reports sent by this node.
- * 3) Free the node, that will in turn remove it from the hash table
- * and from the list of slaves of its master, if it is a slave node.
+/* Remove a node from the cluster. The functio performs the high level
+ * cleanup, calling freeClusterNode() for the low level cleanup.
+ * Here we do the following:
+ *
+ * 1) Mark all the slots handled by it as unassigned.
+ * 2) Remove all the failure reports sent by this node and referenced by
+ * other nodes.
+ * 3) Free the node with freeClusterNode() that will in turn remove it
+ * from the hash table and from the list of slaves of its master, if
+ * it is a slave node.
*/
void clusterDelNode(clusterNode *delnode) {
int j;
@@ -852,7 +908,7 @@ void clusterDelNode(clusterNode *delnode) {
dictEntry *de;
/* 1) Mark slots as unassigned. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (server.cluster->importing_slots_from[j] == delnode)
server.cluster->importing_slots_from[j] = NULL;
if (server.cluster->migrating_slots_to[j] == delnode)
@@ -871,17 +927,13 @@ void clusterDelNode(clusterNode *delnode) {
}
dictReleaseIterator(di);
- /* 3) Remove this node from its master's slaves if needed. */
- if (nodeIsSlave(delnode) && delnode->slaveof)
- clusterNodeRemoveSlave(delnode->slaveof,delnode);
-
- /* 4) Free the node, unlinking it from the cluster. */
+ /* 3) Free the node, unlinking it from the cluster. */
freeClusterNode(delnode);
}
/* Node lookup by name */
-clusterNode *clusterLookupNode(char *name) {
- sds s = sdsnewlen(name, REDIS_CLUSTER_NAMELEN);
+clusterNode *clusterLookupNode(const char *name) {
+ sds s = sdsnewlen(name, CLUSTER_NAMELEN);
dictEntry *de;
de = dictFind(server.cluster->nodes,s);
@@ -896,30 +948,162 @@ clusterNode *clusterLookupNode(char *name) {
* this function. */
void clusterRenameNode(clusterNode *node, char *newname) {
int retval;
- sds s = sdsnewlen(node->name, REDIS_CLUSTER_NAMELEN);
+ sds s = sdsnewlen(node->name, CLUSTER_NAMELEN);
- redisLog(REDIS_DEBUG,"Renaming node %.40s into %.40s",
+ serverLog(LL_DEBUG,"Renaming node %.40s into %.40s",
node->name, newname);
retval = dictDelete(server.cluster->nodes, s);
sdsfree(s);
- redisAssert(retval == DICT_OK);
- memcpy(node->name, newname, REDIS_CLUSTER_NAMELEN);
+ serverAssert(retval == DICT_OK);
+ memcpy(node->name, newname, CLUSTER_NAMELEN);
clusterAddNode(node);
}
/* -----------------------------------------------------------------------------
+ * CLUSTER config epoch handling
+ * -------------------------------------------------------------------------- */
+
+/* Return the greatest configEpoch found in the cluster, or the current
+ * epoch if greater than any node configEpoch. */
+uint64_t clusterGetMaxEpoch(void) {
+ uint64_t max = 0;
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->configEpoch > max) max = node->configEpoch;
+ }
+ dictReleaseIterator(di);
+ if (max < server.cluster->currentEpoch) max = server.cluster->currentEpoch;
+ return max;
+}
+
+/* If this node epoch is zero or is not already the greatest across the
+ * cluster (from the POV of the local configuration), this function will:
+ *
+ * 1) Generate a new config epoch, incrementing the current epoch.
+ * 2) Assign the new epoch to this node, WITHOUT any consensus.
+ * 3) Persist the configuration on disk before sending packets with the
+ * new configuration.
+ *
+ * If the new config epoch is generated and assigend, C_OK is returned,
+ * otherwise C_ERR is returned (since the node has already the greatest
+ * configuration around) and no operation is performed.
+ *
+ * Important note: this function violates the principle that config epochs
+ * should be generated with consensus and should be unique across the cluster.
+ * However Redis Cluster uses this auto-generated new config epochs in two
+ * cases:
+ *
+ * 1) When slots are closed after importing. Otherwise resharding would be
+ * too expensive.
+ * 2) When CLUSTER FAILOVER is called with options that force a slave to
+ * failover its master even if there is not master majority able to
+ * create a new configuration epoch.
+ *
+ * Redis Cluster will not explode using this function, even in the case of
+ * a collision between this node and another node, generating the same
+ * configuration epoch unilaterally, because the config epoch conflict
+ * resolution algorithm will eventually move colliding nodes to different
+ * config epochs. However using this function may violate the "last failover
+ * wins" rule, so should only be used with care. */
+int clusterBumpConfigEpochWithoutConsensus(void) {
+ uint64_t maxEpoch = clusterGetMaxEpoch();
+
+ if (myself->configEpoch == 0 ||
+ myself->configEpoch != maxEpoch)
+ {
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
+ CLUSTER_TODO_FSYNC_CONFIG);
+ serverLog(LL_WARNING,
+ "New configEpoch set to %llu",
+ (unsigned long long) myself->configEpoch);
+ return C_OK;
+ } else {
+ return C_ERR;
+ }
+}
+
+/* This function is called when this node is a master, and we receive from
+ * another master a configuration epoch that is equal to our configuration
+ * epoch.
+ *
+ * BACKGROUND
+ *
+ * It is not possible that different slaves get the same config
+ * epoch during a failover election, because the slaves need to get voted
+ * by a majority. However when we perform a manual resharding of the cluster
+ * the node will assign a configuration epoch to itself without to ask
+ * for agreement. Usually resharding happens when the cluster is working well
+ * and is supervised by the sysadmin, however it is possible for a failover
+ * to happen exactly while the node we are resharding a slot to assigns itself
+ * a new configuration epoch, but before it is able to propagate it.
+ *
+ * So technically it is possible in this condition that two nodes end with
+ * the same configuration epoch.
+ *
+ * Another possibility is that there are bugs in the implementation causing
+ * this to happen.
+ *
+ * Moreover when a new cluster is created, all the nodes start with the same
+ * configEpoch. This collision resolution code allows nodes to automatically
+ * end with a different configEpoch at startup automatically.
+ *
+ * In all the cases, we want a mechanism that resolves this issue automatically
+ * as a safeguard. The same configuration epoch for masters serving different
+ * set of slots is not harmful, but it is if the nodes end serving the same
+ * slots for some reason (manual errors or software bugs) without a proper
+ * failover procedure.
+ *
+ * In general we want a system that eventually always ends with different
+ * masters having different configuration epochs whatever happened, since
+ * nothign is worse than a split-brain condition in a distributed system.
+ *
+ * BEHAVIOR
+ *
+ * When this function gets called, what happens is that if this node
+ * has the lexicographically smaller Node ID compared to the other node
+ * with the conflicting epoch (the 'sender' node), it will assign itself
+ * the greatest configuration epoch currently detected among nodes plus 1.
+ *
+ * This means that even if there are multiple nodes colliding, the node
+ * with the greatest Node ID never moves forward, so eventually all the nodes
+ * end with a different configuration epoch.
+ */
+void clusterHandleConfigEpochCollision(clusterNode *sender) {
+ /* Prerequisites: nodes have the same configEpoch and are both masters. */
+ if (sender->configEpoch != myself->configEpoch ||
+ !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
+ /* Don't act if the colliding node has a smaller Node ID. */
+ if (memcmp(sender->name,myself->name,CLUSTER_NAMELEN) <= 0) return;
+ /* Get the next ID available at the best of this node knowledge. */
+ server.cluster->currentEpoch++;
+ myself->configEpoch = server.cluster->currentEpoch;
+ clusterSaveConfigOrDie(1);
+ serverLog(LL_VERBOSE,
+ "WARNING: configEpoch collision with node %.40s."
+ " configEpoch set to %llu",
+ sender->name,
+ (unsigned long long) myself->configEpoch);
+}
+
+/* -----------------------------------------------------------------------------
* CLUSTER nodes blacklist
*
* The nodes blacklist is just a way to ensure that a given node with a given
* Node ID is not readded before some time elapsed (this time is specified
- * in seconds in REDIS_CLUSTER_BLACKLIST_TTL).
+ * in seconds in CLUSTER_BLACKLIST_TTL).
*
* This is useful when we want to remove a node from the cluster completely:
* when CLUSTER FORGET is called, it also puts the node into the blacklist so
* that even if we receive gossip messages from other nodes that still remember
* about the node we want to remove, we don't re-add it before some time.
*
- * Currently the REDIS_CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
+ * Currently the CLUSTER_BLACKLIST_TTL is set to 1 minute, this means
* that redis-trib has 60 seconds to send CLUSTER FORGET messages to nodes
* in the cluster without dealing with the problem of other nodes re-adding
* back the node to nodes we already sent the FORGET command to.
@@ -929,7 +1113,7 @@ void clusterRenameNode(clusterNode *node, char *newname) {
* value.
* -------------------------------------------------------------------------- */
-#define REDIS_CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */
+#define CLUSTER_BLACKLIST_TTL 60 /* 1 minute. */
/* Before of the addNode() or Exists() operations we always remove expired
@@ -955,7 +1139,7 @@ void clusterBlacklistCleanup(void) {
/* Cleanup the blacklist and add a new node ID to the black list. */
void clusterBlacklistAddNode(clusterNode *node) {
dictEntry *de;
- sds id = sdsnewlen(node->name,REDIS_CLUSTER_NAMELEN);
+ sds id = sdsnewlen(node->name,CLUSTER_NAMELEN);
clusterBlacklistCleanup();
if (dictAdd(server.cluster->nodes_black_list,id,NULL) == DICT_OK) {
@@ -964,7 +1148,7 @@ void clusterBlacklistAddNode(clusterNode *node) {
id = sdsdup(id);
}
de = dictFind(server.cluster->nodes_black_list,id);
- dictSetUnsignedIntegerVal(de,time(NULL)+REDIS_CLUSTER_BLACKLIST_TTL);
+ dictSetUnsignedIntegerVal(de,time(NULL)+CLUSTER_BLACKLIST_TTL);
sdsfree(id);
}
@@ -972,7 +1156,7 @@ void clusterBlacklistAddNode(clusterNode *node) {
* You don't need to pass an sds string here, any pointer to 40 bytes
* will work. */
int clusterBlacklistExists(char *nodeid) {
- sds id = sdsnewlen(nodeid,REDIS_CLUSTER_NAMELEN);
+ sds id = sdsnewlen(nodeid,CLUSTER_NAMELEN);
int retval;
clusterBlacklistCleanup();
@@ -1018,12 +1202,12 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
if (nodeIsMaster(myself)) failures++;
if (failures < needed_quorum) return; /* No weak agreement from masters. */
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Marking node %.40s as failing (quorum reached).", node->name);
/* Mark the node as failing. */
- node->flags &= ~REDIS_NODE_PFAIL;
- node->flags |= REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_PFAIL;
+ node->flags |= CLUSTER_NODE_FAIL;
node->fail_time = mstime();
/* Broadcast the failing node name to everybody, forcing all the other
@@ -1038,16 +1222,16 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
void clearNodeFailureIfNeeded(clusterNode *node) {
mstime_t now = mstime();
- redisAssert(nodeFailed(node));
+ serverAssert(nodeFailed(node));
/* For slaves we always clear the FAIL flag if we can contact the
* node again. */
if (nodeIsSlave(node) || node->numslots == 0) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Clear FAIL state for node %.40s: %s is reachable again.",
node->name,
nodeIsSlave(node) ? "slave" : "master without slots");
- node->flags &= ~REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_FAIL;
clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
}
@@ -1057,12 +1241,12 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
* Apparently no one is going to fix these slots, clear the FAIL flag. */
if (nodeIsMaster(node) && node->numslots > 0 &&
(now - node->fail_time) >
- (server.cluster_node_timeout * REDIS_CLUSTER_FAIL_UNDO_TIME_MULT))
+ (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT))
{
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Clear FAIL state for node %.40s: is reachable again and nobody is serving its slots after some time.",
node->name);
- node->flags &= ~REDIS_NODE_FAIL;
+ node->flags &= ~CLUSTER_NODE_FAIL;
clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG);
}
}
@@ -1070,7 +1254,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
/* Return true if we already have a node in HANDSHAKE state matching the
* specified ip address and port number. This function is used in order to
* avoid adding a new handshake node for the same address multiple times. */
-int clusterHandshakeInProgress(char *ip, int port) {
+int clusterHandshakeInProgress(char *ip, int port, int cport) {
dictIterator *di;
dictEntry *de;
@@ -1079,7 +1263,9 @@ int clusterHandshakeInProgress(char *ip, int port) {
clusterNode *node = dictGetVal(de);
if (!nodeInHandshake(node)) continue;
- if (!strcasecmp(node->ip,ip) && node->port == port) break;
+ if (!strcasecmp(node->ip,ip) &&
+ node->port == port &&
+ node->cport == cport) break;
}
dictReleaseIterator(di);
return de != NULL;
@@ -1092,9 +1278,9 @@ int clusterHandshakeInProgress(char *ip, int port) {
*
* EAGAIN - There is already an handshake in progress for this address.
* EINVAL - IP or port are not valid. */
-int clusterStartHandshake(char *ip, int port) {
+int clusterStartHandshake(char *ip, int port, int cport) {
clusterNode *n;
- char norm_ip[REDIS_IP_STR_LEN];
+ char norm_ip[NET_IP_STR_LEN];
struct sockaddr_storage sa;
/* IP sanity check */
@@ -1112,24 +1298,24 @@ int clusterStartHandshake(char *ip, int port) {
}
/* Port sanity check */
- if (port <= 0 || port > (65535-REDIS_CLUSTER_PORT_INCR)) {
+ if (port <= 0 || port > 65535 || cport <= 0 || cport > 65535) {
errno = EINVAL;
return 0;
}
/* Set norm_ip as the normalized string representation of the node
* IP address. */
- memset(norm_ip,0,REDIS_IP_STR_LEN);
+ memset(norm_ip,0,NET_IP_STR_LEN);
if (sa.ss_family == AF_INET)
inet_ntop(AF_INET,
(void*)&(((struct sockaddr_in *)&sa)->sin_addr),
- norm_ip,REDIS_IP_STR_LEN);
+ norm_ip,NET_IP_STR_LEN);
else
inet_ntop(AF_INET6,
(void*)&(((struct sockaddr_in6 *)&sa)->sin6_addr),
- norm_ip,REDIS_IP_STR_LEN);
+ norm_ip,NET_IP_STR_LEN);
- if (clusterHandshakeInProgress(norm_ip,port)) {
+ if (clusterHandshakeInProgress(norm_ip,port,cport)) {
errno = EAGAIN;
return 0;
}
@@ -1137,9 +1323,10 @@ int clusterStartHandshake(char *ip, int port) {
/* Add the node with a random address (NULL as first argument to
* createClusterNode()). Everything will be fixed during the
* handshake. */
- n = createClusterNode(NULL,REDIS_NODE_HANDSHAKE|REDIS_NODE_MEET);
+ n = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_MEET);
memcpy(n->ip,norm_ip,sizeof(n->ip));
n->port = port;
+ n->cport = cport;
clusterAddNode(n);
return 1;
}
@@ -1158,13 +1345,16 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
clusterNode *node;
sds ci;
- ci = representRedisNodeFlags(sdsempty(), flags);
- redisLog(REDIS_DEBUG,"GOSSIP %.40s %s:%d %s",
- g->nodename,
- g->ip,
- ntohs(g->port),
- ci);
- sdsfree(ci);
+ if (server.verbosity == LL_DEBUG) {
+ ci = representClusterNodeFlags(sdsempty(), flags);
+ serverLog(LL_DEBUG,"GOSSIP %.40s %s:%d@%d %s",
+ g->nodename,
+ g->ip,
+ ntohs(g->port),
+ ntohs(g->cport),
+ ci);
+ sdsfree(ci);
+ }
/* Update our state accordingly to the gossip sections */
node = clusterLookupNode(g->nodename);
@@ -1172,31 +1362,61 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
/* We already know this node.
Handle failure reports, only when the sender is a master. */
if (sender && nodeIsMaster(sender) && node != myself) {
- if (flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL)) {
+ if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) {
if (clusterNodeAddFailureReport(node,sender)) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s reported node %.40s as not reachable.",
sender->name, node->name);
}
markNodeAsFailingIfNeeded(node);
} else {
if (clusterNodeDelFailureReport(node,sender)) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s reported node %.40s is back online.",
sender->name, node->name);
}
}
}
+ /* If from our POV the node is up (no failure flags are set),
+ * we have no pending ping for the node, nor we have failure
+ * reports for this node, update the last pong time with the
+ * one we see from the other nodes. */
+ if (!(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ node->ping_sent == 0 &&
+ clusterNodeFailureReportsCount(node) == 0)
+ {
+ mstime_t pongtime = ntohl(g->pong_received);
+ pongtime *= 1000; /* Convert back to milliseconds. */
+
+ /* Replace the pong time with the received one only if
+ * it's greater than our view but is not in the future
+ * (with 500 milliseconds tolerance) from the POV of our
+ * clock. */
+ if (pongtime <= (server.mstime+500) &&
+ pongtime > node->pong_received)
+ {
+ node->pong_received = pongtime;
+ }
+ }
+
/* If we already know this node, but it is not reachable, and
- * we see a different address in the gossip section, start an
- * handshake with the (possibly) new address: this will result
- * into a node address update if the handshake will be
- * successful. */
- if (node->flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL) &&
- (strcasecmp(node->ip,g->ip) || node->port != ntohs(g->port)))
+ * we see a different address in the gossip section of a node that
+ * can talk with this other node, update the address, disconnect
+ * the old link if any, so that we'll attempt to connect with the
+ * new address. */
+ if (node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL) &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
+ !(flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) &&
+ (strcasecmp(node->ip,g->ip) ||
+ node->port != ntohs(g->port) ||
+ node->cport != ntohs(g->cport)))
{
- clusterStartHandshake(g->ip,ntohs(g->port));
+ if (node->link) freeClusterLink(node->link);
+ memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+ node->port = ntohs(g->port);
+ node->cport = ntohs(g->cport);
+ node->flags &= ~CLUSTER_NODE_NOADDR;
}
} else {
/* If it's not in NOADDR state and we don't have it, we
@@ -1206,10 +1426,10 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
* is a well known node in our cluster, otherwise we risk
* joining another cluster. */
if (sender &&
- !(flags & REDIS_NODE_NOADDR) &&
+ !(flags & CLUSTER_NODE_NOADDR) &&
!clusterBlacklistExists(g->nodename))
{
- clusterStartHandshake(g->ip,ntohs(g->port));
+ clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport));
}
}
@@ -1218,23 +1438,36 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
}
}
-/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes. */
-void nodeIp2String(char *buf, clusterLink *link) {
- anetPeerToString(link->fd, buf, REDIS_IP_STR_LEN, NULL);
+/* IP -> string conversion. 'buf' is supposed to at least be 46 bytes.
+ * If 'announced_ip' length is non-zero, it is used instead of extracting
+ * the IP from the socket peer address. */
+void nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
+ if (announced_ip[0] != '\0') {
+ memcpy(buf,announced_ip,NET_IP_STR_LEN);
+ buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
+ } else {
+ anetPeerToString(link->fd, buf, NET_IP_STR_LEN, NULL);
+ }
}
/* Update the node address to the IP address that can be extracted
- * from link->fd, and at the specified port.
- * Also disconnect the node link so that we'll connect again to the new
- * address.
+ * from link->fd, or if hdr->myip is non empty, to the address the node
+ * is announcing us. The port is taken from the packet header as well.
+ *
+ * If the address or port changed, disconnect the node link so that we'll
+ * connect again to the new address.
*
* If the ip/port pair are already correct no operation is performed at
* all.
*
* The function returns 0 if the node address is still the same,
* otherwise 1 is returned. */
-int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
- char ip[REDIS_IP_STR_LEN];
+int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link,
+ clusterMsg *hdr)
+{
+ char ip[NET_IP_STR_LEN] = {0};
+ int port = ntohs(hdr->port);
+ int cport = ntohs(hdr->cport);
/* We don't proceed if the link is the same as the sender link, as this
* function is designed to see if the node link is consistent with the
@@ -1244,15 +1477,17 @@ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
* it is safe to call during packet processing. */
if (link == node->link) return 0;
- nodeIp2String(ip,link);
- if (node->port == port && strcmp(ip,node->ip) == 0) return 0;
+ nodeIp2String(ip,link,hdr->myip);
+ if (node->port == port && node->cport == cport &&
+ strcmp(ip,node->ip) == 0) return 0;
/* IP / port is different, update it. */
memcpy(node->ip,ip,sizeof(ip));
node->port = port;
+ node->cport = cport;
if (node->link) freeClusterLink(node->link);
- node->flags &= ~REDIS_NODE_NOADDR;
- redisLog(REDIS_WARNING,"Address updated for node %.40s, now %s:%d",
+ node->flags &= ~CLUSTER_NODE_NOADDR;
+ serverLog(LL_WARNING,"Address updated for node %.40s, now %s:%d",
node->name, node->ip, node->port);
/* Check if this is our master and we have to change the
@@ -1268,9 +1503,12 @@ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
void clusterSetNodeAsMaster(clusterNode *n) {
if (nodeIsMaster(n)) return;
- if (n->slaveof) clusterNodeRemoveSlave(n->slaveof,n);
- n->flags &= ~REDIS_NODE_SLAVE;
- n->flags |= REDIS_NODE_MASTER;
+ if (n->slaveof) {
+ clusterNodeRemoveSlave(n->slaveof,n);
+ if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
+ n->flags &= ~CLUSTER_NODE_SLAVE;
+ n->flags |= CLUSTER_NODE_MASTER;
n->slaveof = NULL;
/* Update config and state. */
@@ -1287,8 +1525,8 @@ void clusterSetNodeAsMaster(clusterNode *n) {
* node (see the function comments for more info).
*
* The 'sender' is the node for which we received a configuration update.
- * Sometimes it is not actually the "Sender" of the information, like in the case
- * we receive the info via an UPDATE packet. */
+ * Sometimes it is not actually the "Sender" of the information, like in the
+ * case we receive the info via an UPDATE packet. */
void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
int j;
clusterNode *curmaster, *newmaster = NULL;
@@ -1299,7 +1537,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
* If the update message is not able to demote a master to slave (in this
* case we'll resync with the master updating the whole key space), we
* need to delete all the keys in the slots we lost ownership. */
- uint16_t dirty_slots[REDIS_CLUSTER_SLOTS];
+ uint16_t dirty_slots[CLUSTER_SLOTS];
int dirty_slots_count = 0;
/* Here we set curmaster to this node or the node this node
@@ -1308,11 +1546,11 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
curmaster = nodeIsMaster(myself) ? myself : myself->slaveof;
if (sender == myself) {
- redisLog(REDIS_WARNING,"Discarding UPDATE message about myself.");
+ serverLog(LL_WARNING,"Discarding UPDATE message about myself.");
return;
}
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(slots,j)) {
/* The slot is already bound to the sender of this message. */
if (server.cluster->slots[j] == sender) continue;
@@ -1359,7 +1597,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
* 2) We are a slave and our master is left without slots. We need
* to replicate to the new slots owner. */
if (newmaster && curmaster->numslots == 0) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Configuration change detected. Reconfiguring myself "
"as a replica of %.40s", sender->name);
clusterSetMaster(sender);
@@ -1379,69 +1617,6 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
}
}
-/* This function is called when this node is a master, and we receive from
- * another master a configuration epoch that is equal to our configuration
- * epoch.
- *
- * BACKGROUND
- *
- * It is not possible that different slaves get the same config
- * epoch during a failover election, because the slaves need to get voted
- * by a majority. However when we perform a manual resharding of the cluster
- * the node will assign a configuration epoch to itself without to ask
- * for agreement. Usually resharding happens when the cluster is working well
- * and is supervised by the sysadmin, however it is possible for a failover
- * to happen exactly while the node we are resharding a slot to assigns itself
- * a new configuration epoch, but before it is able to propagate it.
- *
- * So technically it is possible in this condition that two nodes end with
- * the same configuration epoch.
- *
- * Another possibility is that there are bugs in the implementation causing
- * this to happen.
- *
- * Moreover when a new cluster is created, all the nodes start with the same
- * configEpoch. This collision resolution code allows nodes to automatically
- * end with a different configEpoch at startup automatically.
- *
- * In all the cases, we want a mechanism that resolves this issue automatically
- * as a safeguard. The same configuration epoch for masters serving different
- * set of slots is not harmful, but it is if the nodes end serving the same
- * slots for some reason (manual errors or software bugs) without a proper
- * failover procedure.
- *
- * In general we want a system that eventually always ends with different
- * masters having different configuration epochs whatever happened, since
- * nothign is worse than a split-brain condition in a distributed system.
- *
- * BEHAVIOR
- *
- * When this function gets called, what happens is that if this node
- * has the lexicographically smaller Node ID compared to the other node
- * with the conflicting epoch (the 'sender' node), it will assign itself
- * the greatest configuration epoch currently detected among nodes plus 1.
- *
- * This means that even if there are multiple nodes colliding, the node
- * with the greatest Node ID never moves forward, so eventually all the nodes
- * end with a different configuration epoch.
- */
-void clusterHandleConfigEpochCollision(clusterNode *sender) {
- /* Prerequisites: nodes have the same configEpoch and are both masters. */
- if (sender->configEpoch != myself->configEpoch ||
- !nodeIsMaster(sender) || !nodeIsMaster(myself)) return;
- /* Don't act if the colliding node has a smaller Node ID. */
- if (memcmp(sender->name,myself->name,REDIS_CLUSTER_NAMELEN) <= 0) return;
- /* Get the next ID available at the best of this node knowledge. */
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterSaveConfigOrDie(1);
- redisLog(REDIS_VERBOSE,
- "WARNING: configEpoch collision with node %.40s."
- " configEpoch set to %llu",
- sender->name,
- (unsigned long long) myself->configEpoch);
-}
-
/* When this function is called, there is a packet to process starting
* at node->rcvbuf. Releasing the buffer is up to the caller, so this
* function should just handle the higher level stuff of processing the
@@ -1455,19 +1630,25 @@ int clusterProcessPacket(clusterLink *link) {
clusterMsg *hdr = (clusterMsg*) link->rcvbuf;
uint32_t totlen = ntohl(hdr->totlen);
uint16_t type = ntohs(hdr->type);
- uint16_t flags = ntohs(hdr->flags);
- uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
- clusterNode *sender;
- server.cluster->stats_bus_messages_received++;
- redisLog(REDIS_DEBUG,"--- Processing packet of type %d, %lu bytes",
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_received[type]++;
+ serverLog(LL_DEBUG,"--- Processing packet of type %d, %lu bytes",
type, (unsigned long) totlen);
/* Perform sanity checks */
if (totlen < 16) return 1; /* At least signature, version, totlen, count. */
- if (ntohs(hdr->ver) != CLUSTER_PROTO_VER)
- return 1; /* Can't handle versions other than the current one.*/
if (totlen > sdslen(link->rcvbuf)) return 1;
+
+ if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) {
+ /* Can't handle messages of different versions. */
+ return 1;
+ }
+
+ uint16_t flags = ntohs(hdr->flags);
+ uint64_t senderCurrentEpoch = 0, senderConfigEpoch = 0;
+ clusterNode *sender;
+
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
{
@@ -1502,6 +1683,12 @@ int clusterProcessPacket(clusterLink *link) {
explen += sizeof(clusterMsgDataUpdate);
if (totlen != explen) return 1;
+ } else if (type == CLUSTERMSG_TYPE_MODULE) {
+ uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+
+ explen += sizeof(clusterMsgDataPublish) -
+ 3 + ntohl(hdr->data.module.msg.len);
+ if (totlen != explen) return 1;
}
/* Check if the sender is a known node. */
@@ -1530,7 +1717,7 @@ int clusterProcessPacket(clusterLink *link) {
server.cluster->mf_master_offset == 0)
{
server.cluster->mf_master_offset = sender->repl_offset;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Received replication offset for paused "
"master manual failover: %lld",
server.cluster->mf_master_offset);
@@ -1539,11 +1726,11 @@ int clusterProcessPacket(clusterLink *link) {
/* Initial processing of PING and MEET requests replying with a PONG. */
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_MEET) {
- redisLog(REDIS_DEBUG,"Ping packet received: %p", (void*)link->node);
+ serverLog(LL_DEBUG,"Ping packet received: %p", (void*)link->node);
/* We use incoming MEET messages in order to set the address
* for 'myself', since only other cluster nodes will send us
- * MEET messagses on handshakes, when the cluster joins, or
+ * MEET messages on handshakes, when the cluster joins, or
* later if we changed address, and those nodes will use our
* official address to connect to us. So by obtaining this address
* from the socket is a simple way to discover / update our own
@@ -1552,14 +1739,16 @@ int clusterProcessPacket(clusterLink *link) {
* However if we don't have an address at all, we update the address
* even with a normal PING packet. If it's wrong it will be fixed
* by MEET later. */
- if (type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') {
- char ip[REDIS_IP_STR_LEN];
+ if ((type == CLUSTERMSG_TYPE_MEET || myself->ip[0] == '\0') &&
+ server.cluster_announce_ip == NULL)
+ {
+ char ip[NET_IP_STR_LEN];
if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 &&
strcmp(ip,myself->ip))
{
- memcpy(myself->ip,ip,REDIS_IP_STR_LEN);
- redisLog(REDIS_WARNING,"IP address for this node updated to %s",
+ memcpy(myself->ip,ip,NET_IP_STR_LEN);
+ serverLog(LL_WARNING,"IP address for this node updated to %s",
myself->ip);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
}
@@ -1572,9 +1761,10 @@ int clusterProcessPacket(clusterLink *link) {
if (!sender && type == CLUSTERMSG_TYPE_MEET) {
clusterNode *node;
- node = createClusterNode(NULL,REDIS_NODE_HANDSHAKE);
- nodeIp2String(node->ip,link);
+ node = createClusterNode(NULL,CLUSTER_NODE_HANDSHAKE);
+ nodeIp2String(node->ip,link,hdr->myip);
node->port = ntohs(hdr->port);
+ node->cport = ntohs(hdr->cport);
clusterAddNode(node);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
}
@@ -1593,7 +1783,7 @@ int clusterProcessPacket(clusterLink *link) {
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG ||
type == CLUSTERMSG_TYPE_MEET)
{
- redisLog(REDIS_DEBUG,"%s packet received: %p",
+ serverLog(LL_DEBUG,"%s packet received: %p",
type == CLUSTERMSG_TYPE_PING ? "ping" : "pong",
(void*)link->node);
if (link->node) {
@@ -1601,48 +1791,64 @@ int clusterProcessPacket(clusterLink *link) {
/* If we already have this node, try to change the
* IP/port of the node with the new one. */
if (sender) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Handshake: we already know node %.40s, "
"updating the address if needed.", sender->name);
- if (nodeUpdateAddressIfNeeded(sender,link,ntohs(hdr->port)))
+ if (nodeUpdateAddressIfNeeded(sender,link,hdr))
{
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
}
/* Free this node as we already have it. This will
* cause the link to be freed as well. */
- freeClusterNode(link->node);
+ clusterDelNode(link->node);
return 0;
}
/* First thing to do is replacing the random name with the
* right node name if this was a handshake stage. */
clusterRenameNode(link->node, hdr->sender);
- redisLog(REDIS_DEBUG,"Handshake with node %.40s completed.",
+ serverLog(LL_DEBUG,"Handshake with node %.40s completed.",
link->node->name);
- link->node->flags &= ~REDIS_NODE_HANDSHAKE;
- link->node->flags |= flags&(REDIS_NODE_MASTER|REDIS_NODE_SLAVE);
+ link->node->flags &= ~CLUSTER_NODE_HANDSHAKE;
+ link->node->flags |= flags&(CLUSTER_NODE_MASTER|CLUSTER_NODE_SLAVE);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
} else if (memcmp(link->node->name,hdr->sender,
- REDIS_CLUSTER_NAMELEN) != 0)
+ CLUSTER_NAMELEN) != 0)
{
/* If the reply has a non matching node ID we
* disconnect this node and set it as not having an associated
* address. */
- redisLog(REDIS_DEBUG,"PONG contains mismatching sender ID");
- link->node->flags |= REDIS_NODE_NOADDR;
+ serverLog(LL_DEBUG,"PONG contains mismatching sender ID. About node %.40s added %d ms ago, having flags %d",
+ link->node->name,
+ (int)(mstime()-(link->node->ctime)),
+ link->node->flags);
+ link->node->flags |= CLUSTER_NODE_NOADDR;
link->node->ip[0] = '\0';
link->node->port = 0;
+ link->node->cport = 0;
freeClusterLink(link);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG);
return 0;
}
}
+ /* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
+ * announced. This is a dynamic flag that we receive from the
+ * sender, and the latest status must be trusted. We need it to
+ * be propagated because the slave ranking used to understand the
+ * delay of each slave in the voting process, needs to know
+ * what are the instances really competing. */
+ if (sender) {
+ int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
+ sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
+ sender->flags |= nofailover;
+ }
+
/* Update the node address if it changed. */
if (sender && type == CLUSTERMSG_TYPE_PING &&
!nodeInHandshake(sender) &&
- nodeUpdateAddressIfNeeded(sender,link,ntohs(hdr->port)))
+ nodeUpdateAddressIfNeeded(sender,link,hdr))
{
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
@@ -1660,7 +1866,7 @@ int clusterProcessPacket(clusterLink *link) {
* The FAIL condition is also reversible under specific
* conditions detected by clearNodeFailureIfNeeded(). */
if (nodeTimedOut(link->node)) {
- link->node->flags &= ~REDIS_NODE_PFAIL;
+ link->node->flags &= ~CLUSTER_NODE_PFAIL;
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
} else if (nodeFailed(link->node)) {
@@ -1670,7 +1876,7 @@ int clusterProcessPacket(clusterLink *link) {
/* Check for role switch: slave -> master or master -> slave. */
if (sender) {
- if (!memcmp(hdr->slaveof,REDIS_NODE_NULL_NAME,
+ if (!memcmp(hdr->slaveof,CLUSTER_NODE_NULL_NAME,
sizeof(hdr->slaveof)))
{
/* Node is a master. */
@@ -1682,11 +1888,9 @@ int clusterProcessPacket(clusterLink *link) {
if (nodeIsMaster(sender)) {
/* Master turned into a slave! Reconfigure the node. */
clusterDelNodeSlots(sender);
- sender->flags &= ~REDIS_NODE_MASTER;
- sender->flags |= REDIS_NODE_SLAVE;
-
- /* Remove the list of slaves from the node. */
- if (sender->numslaves) clusterNodeResetSlaves(sender);
+ sender->flags &= ~(CLUSTER_NODE_MASTER|
+ CLUSTER_NODE_MIGRATE_TO);
+ sender->flags |= CLUSTER_NODE_SLAVE;
/* Update config and state. */
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
@@ -1709,7 +1913,7 @@ int clusterProcessPacket(clusterLink *link) {
/* Update our info about served slots.
*
* Note: this MUST happen after we update the master/slave state
- * so that REDIS_NODE_MASTER flag will be set. */
+ * so that CLUSTER_NODE_MASTER flag will be set. */
/* Many checks are only needed if the set of served slots this
* instance claims is different compared to the set of slots we have
@@ -1753,14 +1957,14 @@ int clusterProcessPacket(clusterLink *link) {
if (sender && dirty_slots) {
int j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(hdr->myslots,j)) {
if (server.cluster->slots[j] == sender ||
server.cluster->slots[j] == NULL) continue;
if (server.cluster->slots[j]->configEpoch >
senderConfigEpoch)
{
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Node %.40s has old slots configuration, sending "
"an UPDATE message about %.40s",
sender->name, server.cluster->slots[j]->name);
@@ -1793,19 +1997,19 @@ int clusterProcessPacket(clusterLink *link) {
if (sender) {
failing = clusterLookupNode(hdr->data.fail.about.nodename);
if (failing &&
- !(failing->flags & (REDIS_NODE_FAIL|REDIS_NODE_MYSELF)))
+ !(failing->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_MYSELF)))
{
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"FAIL message received from %.40s about %.40s",
hdr->sender, hdr->data.fail.about.nodename);
- failing->flags |= REDIS_NODE_FAIL;
+ failing->flags |= CLUSTER_NODE_FAIL;
failing->fail_time = mstime();
- failing->flags &= ~REDIS_NODE_PFAIL;
+ failing->flags &= ~CLUSTER_NODE_PFAIL;
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
}
} else {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Ignoring FAIL message from unknown node %.40s about %.40s",
hdr->sender, hdr->data.fail.about.nodename);
}
@@ -1852,10 +2056,10 @@ int clusterProcessPacket(clusterLink *link) {
/* Manual failover requested from slaves. Initialize the state
* accordingly. */
resetManualFailover();
- server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
+ server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
server.cluster->mf_slave = sender;
- pauseClients(mstime()+(REDIS_CLUSTER_MF_TIMEOUT*2));
- redisLog(REDIS_WARNING,"Manual failover requested by slave %.40s.",
+ pauseClients(mstime()+(CLUSTER_MF_TIMEOUT*2));
+ serverLog(LL_WARNING,"Manual failover requested by slave %.40s.",
sender->name);
} else if (type == CLUSTERMSG_TYPE_UPDATE) {
clusterNode *n; /* The node the update is about. */
@@ -1879,8 +2083,17 @@ int clusterProcessPacket(clusterLink *link) {
* config accordingly. */
clusterUpdateSlotsConfigWith(n,reportedConfigEpoch,
hdr->data.update.nodecfg.slots);
+ } else if (type == CLUSTERMSG_TYPE_MODULE) {
+ if (!sender) return 1; /* Protect the module from unknown nodes. */
+ /* We need to route this message back to the right module subscribed
+ * for the right message type. */
+ uint64_t module_id = hdr->data.module.msg.module_id; /* Endian-safe ID */
+ uint32_t len = ntohl(hdr->data.module.msg.len);
+ uint8_t type = hdr->data.module.msg.type;
+ unsigned char *payload = hdr->data.module.msg.bulk_data;
+ moduleCallClusterReceivers(sender->name,module_id,type,payload,len);
} else {
- redisLog(REDIS_WARNING,"Received unknown packet type: %d", type);
+ serverLog(LL_WARNING,"Received unknown packet type: %d", type);
}
return 1;
}
@@ -1901,13 +2114,13 @@ void handleLinkIOError(clusterLink *link) {
void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
clusterLink *link = (clusterLink*) privdata;
ssize_t nwritten;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(mask);
nwritten = write(fd, link->sndbuf, sdslen(link->sndbuf));
if (nwritten <= 0) {
- redisLog(REDIS_DEBUG,"I/O error writing to node link: %s",
- strerror(errno));
+ serverLog(LL_DEBUG,"I/O error writing to node link: %s",
+ (nwritten == -1) ? strerror(errno) : "short write");
handleLinkIOError(link);
return;
}
@@ -1925,8 +2138,8 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
clusterMsg *hdr;
clusterLink *link = (clusterLink*) privdata;
unsigned int readlen, rcvbuflen;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(mask);
while(1) { /* Read as long as there is data to read. */
rcvbuflen = sdslen(link->rcvbuf);
@@ -1943,7 +2156,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (memcmp(hdr->sig,"RCmb",4) != 0 ||
ntohl(hdr->totlen) < CLUSTERMSG_MIN_LEN)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Bad message length or signature received "
"from Cluster bus.");
handleLinkIOError(link);
@@ -1959,7 +2172,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (nread <= 0) {
/* I/O error... */
- redisLog(REDIS_DEBUG,"I/O error reading from node link: %s",
+ serverLog(LL_DEBUG,"I/O error reading from node link: %s",
(nread == 0) ? "connection closed" : strerror(errno));
handleLinkIOError(link);
return;
@@ -1989,11 +2202,16 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* from event handlers that will do stuff with the same link later. */
void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) {
if (sdslen(link->sndbuf) == 0 && msglen != 0)
- aeCreateFileEvent(server.el,link->fd,AE_WRITABLE,
+ aeCreateFileEvent(server.el,link->fd,AE_WRITABLE|AE_BARRIER,
clusterWriteHandler,link);
link->sndbuf = sdscatlen(link->sndbuf, msg, msglen);
- server.cluster->stats_bus_messages_sent++;
+
+ /* Populate sent messages stats. */
+ clusterMsg *hdr = (clusterMsg*) msg;
+ uint16_t type = ntohs(hdr->type);
+ if (type < CLUSTERMSG_TYPE_COUNT)
+ server.cluster->stats_bus_messages_sent[type]++;
}
/* Send a message to all the nodes that are part of the cluster having
@@ -2011,14 +2229,15 @@ void clusterBroadcastMessage(void *buf, size_t len) {
clusterNode *node = dictGetVal(de);
if (!node->link) continue;
- if (node->flags & (REDIS_NODE_MYSELF|REDIS_NODE_HANDSHAKE))
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
continue;
clusterSendMessage(node->link,buf,len);
}
dictReleaseIterator(di);
}
-/* Build the message header */
+/* Build the message header. hdr must point to a buffer at least
+ * sizeof(clusterMsg) in bytes. */
void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
int totlen = 0;
uint64_t offset;
@@ -2038,13 +2257,30 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
hdr->sig[2] = 'm';
hdr->sig[3] = 'b';
hdr->type = htons(type);
- memcpy(hdr->sender,myself->name,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->sender,myself->name,CLUSTER_NAMELEN);
+
+ /* If cluster-announce-ip option is enabled, force the receivers of our
+ * packets to use the specified address for this node. Otherwise if the
+ * first byte is zero, they'll do auto discovery. */
+ memset(hdr->myip,0,NET_IP_STR_LEN);
+ if (server.cluster_announce_ip) {
+ strncpy(hdr->myip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ hdr->myip[NET_IP_STR_LEN-1] = '\0';
+ }
+
+ /* Handle cluster-announce-port as well. */
+ int announced_port = server.cluster_announce_port ?
+ server.cluster_announce_port : server.port;
+ int announced_cport = server.cluster_announce_bus_port ?
+ server.cluster_announce_bus_port :
+ (server.port + CLUSTER_PORT_INCR);
memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
- memset(hdr->slaveof,0,REDIS_CLUSTER_NAMELEN);
+ memset(hdr->slaveof,0,CLUSTER_NAMELEN);
if (myself->slaveof != NULL)
- memcpy(hdr->slaveof,myself->slaveof->name, REDIS_CLUSTER_NAMELEN);
- hdr->port = htons(server.port);
+ memcpy(hdr->slaveof,myself->slaveof->name, CLUSTER_NAMELEN);
+ hdr->port = htons(announced_port);
+ hdr->cport = htons(announced_cport);
hdr->flags = htons(myself->flags);
hdr->state = server.cluster->state;
@@ -2076,68 +2312,161 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
/* For PING, PONG, and MEET, fixing the totlen field is up to the caller. */
}
+/* Return non zero if the node is already present in the gossip section of the
+ * message pointed by 'hdr' and having 'count' gossip entries. Otherwise
+ * zero is returned. Helper for clusterSendPing(). */
+int clusterNodeIsInGossipSection(clusterMsg *hdr, int count, clusterNode *n) {
+ int j;
+ for (j = 0; j < count; j++) {
+ if (memcmp(hdr->data.ping.gossip[j].nodename,n->name,
+ CLUSTER_NAMELEN) == 0) break;
+ }
+ return j != count;
+}
+
+/* Set the i-th entry of the gossip section in the message pointed by 'hdr'
+ * to the info of the specified node 'n'. */
+void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) {
+ clusterMsgDataGossip *gossip;
+ gossip = &(hdr->data.ping.gossip[i]);
+ memcpy(gossip->nodename,n->name,CLUSTER_NAMELEN);
+ gossip->ping_sent = htonl(n->ping_sent/1000);
+ gossip->pong_received = htonl(n->pong_received/1000);
+ memcpy(gossip->ip,n->ip,sizeof(n->ip));
+ gossip->port = htons(n->port);
+ gossip->cport = htons(n->cport);
+ gossip->flags = htons(n->flags);
+ gossip->notused1 = 0;
+}
+
/* Send a PING or PONG packet to the specified node, making sure to add enough
* gossip informations. */
void clusterSendPing(clusterLink *link, int type) {
- unsigned char buf[sizeof(clusterMsg)+sizeof(clusterMsgDataGossip)*3];
- clusterMsg *hdr = (clusterMsg*) buf;
- int gossipcount = 0, totlen;
- /* freshnodes is the number of nodes we can still use to populate the
- * gossip section of the ping packet. Basically we start with the nodes
- * we have in memory minus two (ourself and the node we are sending the
- * message to). Every time we add a node we decrement the counter, so when
- * it will drop to <= zero we know there is no more gossip info we can
- * send. */
+ unsigned char *buf;
+ clusterMsg *hdr;
+ int gossipcount = 0; /* Number of gossip sections added so far. */
+ int wanted; /* Number of gossip sections we want to append if possible. */
+ int totlen; /* Total packet length. */
+ /* freshnodes is the max number of nodes we can hope to append at all:
+ * nodes available minus two (ourself and the node we are sending the
+ * message to). However practically there may be less valid nodes since
+ * nodes in handshake state, disconnected, are not considered. */
int freshnodes = dictSize(server.cluster->nodes)-2;
+ /* How many gossip sections we want to add? 1/10 of the number of nodes
+ * and anyway at least 3. Why 1/10?
+ *
+ * If we have N masters, with N/10 entries, and we consider that in
+ * node_timeout we exchange with each other node at least 4 packets
+ * (we ping in the worst case in node_timeout/2 time, and we also
+ * receive two pings from the host), we have a total of 8 packets
+ * in the node_timeout*2 falure reports validity time. So we have
+ * that, for a single PFAIL node, we can expect to receive the following
+ * number of failure reports (in the specified window of time):
+ *
+ * PROB * GOSSIP_ENTRIES_PER_PACKET * TOTAL_PACKETS:
+ *
+ * PROB = probability of being featured in a single gossip entry,
+ * which is 1 / NUM_OF_NODES.
+ * ENTRIES = 10.
+ * TOTAL_PACKETS = 2 * 4 * NUM_OF_MASTERS.
+ *
+ * If we assume we have just masters (so num of nodes and num of masters
+ * is the same), with 1/10 we always get over the majority, and specifically
+ * 80% of the number of nodes, to account for many masters failing at the
+ * same time.
+ *
+ * Since we have non-voting slaves that lower the probability of an entry
+ * to feature our node, we set the number of entries per packet as
+ * 10% of the total nodes we have. */
+ wanted = floor(dictSize(server.cluster->nodes)/10);
+ if (wanted < 3) wanted = 3;
+ if (wanted > freshnodes) wanted = freshnodes;
+
+ /* Include all the nodes in PFAIL state, so that failure reports are
+ * faster to propagate to go from PFAIL to FAIL state. */
+ int pfail_wanted = server.cluster->stats_pfail_nodes;
+
+ /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen
+ * later according to the number of gossip sections we really were able
+ * to put inside the packet. */
+ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += (sizeof(clusterMsgDataGossip)*(wanted+pfail_wanted));
+ /* Note: clusterBuildMessageHdr() expects the buffer to be always at least
+ * sizeof(clusterMsg) or more. */
+ if (totlen < (int)sizeof(clusterMsg)) totlen = sizeof(clusterMsg);
+ buf = zcalloc(totlen);
+ hdr = (clusterMsg*) buf;
+
+ /* Populate the header. */
if (link->node && type == CLUSTERMSG_TYPE_PING)
link->node->ping_sent = mstime();
clusterBuildMessageHdr(hdr,type);
/* Populate the gossip fields */
- while(freshnodes > 0 && gossipcount < 3) {
+ int maxiterations = wanted*3;
+ while(freshnodes > 0 && gossipcount < wanted && maxiterations--) {
dictEntry *de = dictGetRandomKey(server.cluster->nodes);
clusterNode *this = dictGetVal(de);
- clusterMsgDataGossip *gossip;
- int j;
+
+ /* Don't include this node: the whole packet header is about us
+ * already, so we just gossip about other nodes. */
+ if (this == myself) continue;
+
+ /* PFAIL nodes will be added later. */
+ if (this->flags & CLUSTER_NODE_PFAIL) continue;
/* In the gossip section don't include:
- * 1) Myself.
- * 2) Nodes in HANDSHAKE state.
+ * 1) Nodes in HANDSHAKE state.
* 3) Nodes with the NOADDR flag set.
* 4) Disconnected nodes if they don't have configured slots.
*/
- if (this == myself ||
- this->flags & (REDIS_NODE_HANDSHAKE|REDIS_NODE_NOADDR) ||
+ if (this->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) ||
(this->link == NULL && this->numslots == 0))
{
- freshnodes--; /* otherwise we may loop forever. */
- continue;
+ freshnodes--; /* Tecnically not correct, but saves CPU. */
+ continue;
}
- /* Check if we already added this node */
- for (j = 0; j < gossipcount; j++) {
- if (memcmp(hdr->data.ping.gossip[j].nodename,this->name,
- REDIS_CLUSTER_NAMELEN) == 0) break;
- }
- if (j != gossipcount) continue;
+ /* Do not add a node we already have. */
+ if (clusterNodeIsInGossipSection(hdr,gossipcount,this)) continue;
/* Add it */
+ clusterSetGossipEntry(hdr,gossipcount,this);
freshnodes--;
- gossip = &(hdr->data.ping.gossip[gossipcount]);
- memcpy(gossip->nodename,this->name,REDIS_CLUSTER_NAMELEN);
- gossip->ping_sent = htonl(this->ping_sent);
- gossip->pong_received = htonl(this->pong_received);
- memcpy(gossip->ip,this->ip,sizeof(this->ip));
- gossip->port = htons(this->port);
- gossip->flags = htons(this->flags);
gossipcount++;
}
+
+ /* If there are PFAIL nodes, add them at the end. */
+ if (pfail_wanted) {
+ dictIterator *di;
+ dictEntry *de;
+
+ di = dictGetSafeIterator(server.cluster->nodes);
+ while((de = dictNext(di)) != NULL && pfail_wanted > 0) {
+ clusterNode *node = dictGetVal(de);
+ if (node->flags & CLUSTER_NODE_HANDSHAKE) continue;
+ if (node->flags & CLUSTER_NODE_NOADDR) continue;
+ if (!(node->flags & CLUSTER_NODE_PFAIL)) continue;
+ clusterSetGossipEntry(hdr,gossipcount,node);
+ freshnodes--;
+ gossipcount++;
+ /* We take the count of the slots we allocated, since the
+ * PFAIL stats may not match perfectly with the current number
+ * of PFAIL nodes. */
+ pfail_wanted--;
+ }
+ dictReleaseIterator(di);
+ }
+
+ /* Ready to send... fix the totlen fiend and queue the message in the
+ * output buffer. */
totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
totlen += (sizeof(clusterMsgDataGossip)*gossipcount);
hdr->count = htons(gossipcount);
hdr->totlen = htonl(totlen);
clusterSendMessage(link,buf,totlen);
+ zfree(buf);
}
/* Send a PONG packet to every connected node that's not in handshake state
@@ -2223,15 +2552,15 @@ void clusterSendPublish(clusterLink *link, robj *channel, robj *message) {
/* Send a FAIL message to all the nodes we are able to contact.
* The FAIL message is sent when we detect that a node is failing
- * (REDIS_NODE_PFAIL) and we also receive a gossip confirmation of this:
- * we switch the node state to REDIS_NODE_FAIL and ask all the other
+ * (CLUSTER_NODE_PFAIL) and we also receive a gossip confirmation of this:
+ * we switch the node state to CLUSTER_NODE_FAIL and ask all the other
* nodes to do the same ASAP. */
void clusterSendFail(char *nodename) {
unsigned char buf[sizeof(clusterMsg)];
clusterMsg *hdr = (clusterMsg*) buf;
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_FAIL);
- memcpy(hdr->data.fail.about.nodename,nodename,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->data.fail.about.nodename,nodename,CLUSTER_NAMELEN);
clusterBroadcastMessage(buf,ntohl(hdr->totlen));
}
@@ -2244,12 +2573,67 @@ void clusterSendUpdate(clusterLink *link, clusterNode *node) {
if (link == NULL) return;
clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_UPDATE);
- memcpy(hdr->data.update.nodecfg.nodename,node->name,REDIS_CLUSTER_NAMELEN);
+ memcpy(hdr->data.update.nodecfg.nodename,node->name,CLUSTER_NAMELEN);
hdr->data.update.nodecfg.configEpoch = htonu64(node->configEpoch);
memcpy(hdr->data.update.nodecfg.slots,node->slots,sizeof(node->slots));
clusterSendMessage(link,buf,ntohl(hdr->totlen));
}
+/* Send a MODULE message.
+ *
+ * If link is NULL, then the message is broadcasted to the whole cluster. */
+void clusterSendModule(clusterLink *link, uint64_t module_id, uint8_t type,
+ unsigned char *payload, uint32_t len) {
+ unsigned char buf[sizeof(clusterMsg)], *heapbuf;
+ clusterMsg *hdr = (clusterMsg*) buf;
+ uint32_t totlen;
+
+ clusterBuildMessageHdr(hdr,CLUSTERMSG_TYPE_MODULE);
+ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
+ totlen += sizeof(clusterMsgModule) - 3 + len;
+
+ hdr->data.module.msg.module_id = module_id; /* Already endian adjusted. */
+ hdr->data.module.msg.type = type;
+ hdr->data.module.msg.len = htonl(len);
+ hdr->totlen = htonl(totlen);
+
+ /* Try to use the local buffer if possible */
+ if (totlen < sizeof(buf)) {
+ heapbuf = buf;
+ } else {
+ heapbuf = zmalloc(totlen);
+ memcpy(heapbuf,hdr,sizeof(*hdr));
+ hdr = (clusterMsg*) heapbuf;
+ }
+ memcpy(hdr->data.module.msg.bulk_data,payload,len);
+
+ if (link)
+ clusterSendMessage(link,heapbuf,totlen);
+ else
+ clusterBroadcastMessage(heapbuf,totlen);
+
+ if (heapbuf != buf) zfree(heapbuf);
+}
+
+/* This function gets a cluster node ID string as target, the same way the nodes
+ * addresses are represented in the modules side, resolves the node, and sends
+ * the message. If the target is NULL the message is broadcasted.
+ *
+ * The function returns C_OK if the target is valid, otherwise C_ERR is
+ * returned. */
+int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, unsigned char *payload, uint32_t len) {
+ clusterNode *node = NULL;
+
+ if (target != NULL) {
+ node = clusterLookupNode(target);
+ if (node == NULL || node->link == NULL) return C_ERR;
+ }
+
+ clusterSendModule(target ? node->link : NULL,
+ module_id, type, payload, len);
+ return C_OK;
+}
+
/* -----------------------------------------------------------------------------
* CLUSTER Pub/Sub support
*
@@ -2332,7 +2716,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
* our currentEpoch was updated as a side effect of receiving this
* request, if the request epoch was greater. */
if (requestCurrentEpoch < server.cluster->currentEpoch) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: reqEpoch (%llu) < curEpoch(%llu)",
node->name,
(unsigned long long) requestCurrentEpoch,
@@ -2342,7 +2726,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* I already voted for this epoch? Return ASAP. */
if (server.cluster->lastVoteEpoch == server.cluster->currentEpoch) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: already voted for epoch %llu",
node->name,
(unsigned long long) server.cluster->currentEpoch);
@@ -2356,15 +2740,15 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
(!nodeFailed(master) && !force_ack))
{
if (nodeIsMaster(node)) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: it is a master node",
node->name);
} else if (master == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: I don't know its master",
node->name);
} else if (!nodeFailed(master)) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: its master is up",
node->name);
}
@@ -2376,7 +2760,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
* of the algorithm but makes the base case more linear. */
if (mstime() - node->slaveof->voted_time < server.cluster_node_timeout * 2)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: "
"can't vote about this master before %lld milliseconds",
node->name,
@@ -2388,7 +2772,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* The slave requesting the vote must have a configEpoch for the claimed
* slots that is >= the one of the masters currently serving the same
* slots in the current configuration. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (bitmapTestBit(claimed_slots, j) == 0) continue;
if (server.cluster->slots[j] == NULL ||
server.cluster->slots[j]->configEpoch <= requestConfigEpoch)
@@ -2398,7 +2782,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
/* If we reached this point we found a slot that in our current slots
* is served by a master with a greater configEpoch than the one claimed
* by the slave requesting our vote. Refuse to vote for this slave. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover auth denied to %.40s: "
"slot %d epoch (%llu) > reqEpoch (%llu)",
node->name, j,
@@ -2408,10 +2792,11 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
}
/* We can vote for this slave. */
- clusterSendFailoverAuth(node);
server.cluster->lastVoteEpoch = server.cluster->currentEpoch;
node->slaveof->voted_time = mstime();
- redisLog(REDIS_WARNING, "Failover auth granted to %.40s for epoch %llu",
+ clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_FSYNC_CONFIG);
+ clusterSendFailoverAuth(node);
+ serverLog(LL_WARNING, "Failover auth granted to %.40s for epoch %llu",
node->name, (unsigned long long) server.cluster->currentEpoch);
}
@@ -2432,13 +2817,14 @@ int clusterGetSlaveRank(void) {
int j, rank = 0;
clusterNode *master;
- redisAssert(nodeIsSlave(myself));
+ serverAssert(nodeIsSlave(myself));
master = myself->slaveof;
if (master == NULL) return 0; /* Never called by slaves without master. */
myoffset = replicationGetSlaveOffset();
for (j = 0; j < master->numslaves; j++)
if (master->slaves[j] != myself &&
+ !nodeCantFailover(master->slaves[j]) &&
master->slaves[j]->repl_offset > myoffset) rank++;
return rank;
}
@@ -2456,13 +2842,13 @@ int clusterGetSlaveRank(void) {
* when the slave finds that its master is fine (no FAIL flag).
* 2) Also, the log is emitted again if the master is still down and
* the reason for not failing over is still the same, but more than
- * REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
+ * CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
* 3) Finally, the function only logs if the slave is down for more than
* five seconds + NODE_TIMEOUT. This way nothing is logged when a
* failover starts in a reasonable time.
*
* The function is called with the reason why the slave can't failover
- * which is one of the integer macros REDIS_CLUSTER_CANT_FAILOVER_*.
+ * which is one of the integer macros CLUSTER_CANT_FAILOVER_*.
*
* The function is guaranteed to be called only if 'myself' is a slave. */
void clusterLogCantFailover(int reason) {
@@ -2472,7 +2858,7 @@ void clusterLogCantFailover(int reason) {
/* Don't log if we have the same reason for some time. */
if (reason == server.cluster->cant_failover_reason &&
- time(NULL)-lastlog_time < REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
+ time(NULL)-lastlog_time < CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
return;
server.cluster->cant_failover_reason = reason;
@@ -2485,16 +2871,18 @@ void clusterLogCantFailover(int reason) {
(mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
switch(reason) {
- case REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE:
- msg = "Disconnected from master for longer than allowed.";
+ case CLUSTER_CANT_FAILOVER_DATA_AGE:
+ msg = "Disconnected from master for longer than allowed. "
+ "Please check the 'cluster-slave-validity-factor' configuration "
+ "option.";
break;
- case REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY:
+ case CLUSTER_CANT_FAILOVER_WAITING_DELAY:
msg = "Waiting the delay before I can start a new failover.";
break;
- case REDIS_CLUSTER_CANT_FAILOVER_EXPIRED:
+ case CLUSTER_CANT_FAILOVER_EXPIRED:
msg = "Failover attempt expired.";
break;
- case REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES:
+ case CLUSTER_CANT_FAILOVER_WAITING_VOTES:
msg = "Waiting for votes, but majority still not reached.";
break;
default:
@@ -2502,7 +2890,43 @@ void clusterLogCantFailover(int reason) {
break;
}
lastlog_time = time(NULL);
- redisLog(REDIS_WARNING,"Currently unable to failover: %s", msg);
+ serverLog(LL_WARNING,"Currently unable to failover: %s", msg);
+}
+
+/* This function implements the final part of automatic and manual failovers,
+ * where the slave grabs its master's hash slots, and propagates the new
+ * configuration.
+ *
+ * Note that it's up to the caller to be sure that the node got a new
+ * configuration epoch already. */
+void clusterFailoverReplaceYourMaster(void) {
+ int j;
+ clusterNode *oldmaster = myself->slaveof;
+
+ if (nodeIsMaster(myself) || oldmaster == NULL) return;
+
+ /* 1) Turn this node into a master. */
+ clusterSetNodeAsMaster(myself);
+ replicationUnsetMaster();
+
+ /* 2) Claim all the slots assigned to our master. */
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(oldmaster,j)) {
+ clusterDelSlot(j);
+ clusterAddSlot(myself,j);
+ }
+ }
+
+ /* 3) Update state and save config. */
+ clusterUpdateState();
+ clusterSaveConfigOrDie(1);
+
+ /* 4) Pong all the other nodes so that they can update the state
+ * accordingly and detect that we switched to master role. */
+ clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
+
+ /* 5) If there was a manual failover in progress, clear the state. */
+ resetManualFailover();
}
/* This function is called if we are a slave node and our master serving
@@ -2519,7 +2943,6 @@ void clusterHandleSlaveFailover(void) {
int needed_quorum = (server.cluster->size / 2) + 1;
int manual_failover = server.cluster->mf_end != 0 &&
server.cluster->mf_can_start;
- int j;
mstime_t auth_timeout, auth_retry_time;
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_HANDLE_FAILOVER;
@@ -2528,7 +2951,7 @@ void clusterHandleSlaveFailover(void) {
* and wait for replies), and the failover retry time (the time to wait
* before trying to get voted again).
*
- * Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds.
+ * Timeout is MAX(NODE_TIMEOUT*2,2000) milliseconds.
* Retry is two times the Timeout.
*/
auth_timeout = server.cluster_node_timeout*2;
@@ -2539,21 +2962,24 @@ void clusterHandleSlaveFailover(void) {
* of an automatic or manual failover:
* 1) We are a slave.
* 2) Our master is flagged as FAIL, or this is a manual failover.
- * 3) It is serving slots. */
+ * 3) We don't have the no failover configuration set, and this is
+ * not a manual failover.
+ * 4) It is serving slots. */
if (nodeIsMaster(myself) ||
myself->slaveof == NULL ||
(!nodeFailed(myself->slaveof) && !manual_failover) ||
+ (server.cluster_slave_no_failover && !manual_failover) ||
myself->slaveof->numslots == 0)
{
/* There are no reasons to failover, so we set the reason why we
* are returning without failing over to NONE. */
- server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
+ server.cluster->cant_failover_reason = CLUSTER_CANT_FAILOVER_NONE;
return;
}
/* Set data_age to the number of seconds we are disconnected from
* the master. */
- if (server.repl_state == REDIS_REPL_CONNECTED) {
+ if (server.repl_state == REPL_STATE_CONNECTED) {
data_age = (mstime_t)(server.unixtime - server.master->lastinteraction)
* 1000;
} else {
@@ -2576,7 +3002,7 @@ void clusterHandleSlaveFailover(void) {
(server.cluster_node_timeout * server.cluster_slave_validity_factor)))
{
if (!manual_failover) {
- clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE);
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_DATA_AGE);
return;
}
}
@@ -2600,7 +3026,7 @@ void clusterHandleSlaveFailover(void) {
server.cluster->failover_auth_time = mstime();
server.cluster->failover_auth_rank = 0;
}
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Start of election delayed for %lld milliseconds "
"(rank #%d, offset %lld).",
server.cluster->failover_auth_time - mstime(),
@@ -2627,7 +3053,7 @@ void clusterHandleSlaveFailover(void) {
(newrank - server.cluster->failover_auth_rank) * 1000;
server.cluster->failover_auth_time += added_delay;
server.cluster->failover_auth_rank = newrank;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Slave rank updated to #%d, added %lld milliseconds of delay.",
newrank, added_delay);
}
@@ -2635,13 +3061,13 @@ void clusterHandleSlaveFailover(void) {
/* Return ASAP if we can't still start the election. */
if (mstime() < server.cluster->failover_auth_time) {
- clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY);
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY);
return;
}
/* Return ASAP if the election is too old to be valid. */
if (auth_age > auth_timeout) {
- clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_EXPIRED);
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_EXPIRED);
return;
}
@@ -2649,7 +3075,7 @@ void clusterHandleSlaveFailover(void) {
if (server.cluster->failover_auth_sent == 0) {
server.cluster->currentEpoch++;
server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
- redisLog(REDIS_WARNING,"Starting a failover election for epoch %llu.",
+ serverLog(LL_WARNING,"Starting a failover election for epoch %llu.",
(unsigned long long) server.cluster->currentEpoch);
clusterRequestFailoverAuth();
server.cluster->failover_auth_sent = 1;
@@ -2661,45 +3087,23 @@ void clusterHandleSlaveFailover(void) {
/* Check if we reached the quorum. */
if (server.cluster->failover_auth_count >= needed_quorum) {
- clusterNode *oldmaster = myself->slaveof;
+ /* We have the quorum, we can finally failover the master. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Failover election won: I'm the new master.");
- /* We have the quorum, perform all the steps to correctly promote
- * this slave to a master.
- *
- * 1) Turn this node into a master. */
- clusterSetNodeAsMaster(myself);
- replicationUnsetMaster();
-
- /* 2) Claim all the slots assigned to our master. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (clusterNodeGetSlotBit(oldmaster,j)) {
- clusterDelSlot(j);
- clusterAddSlot(myself,j);
- }
- }
- /* 3) Update my configEpoch to the epoch of the election. */
+ /* Update my configEpoch to the epoch of the election. */
if (myself->configEpoch < server.cluster->failover_auth_epoch) {
myself->configEpoch = server.cluster->failover_auth_epoch;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"configEpoch set to %llu after successful failover",
(unsigned long long) myself->configEpoch);
}
- /* 4) Update state and save config. */
- clusterUpdateState();
- clusterSaveConfigOrDie(1);
-
- /* 5) Pong all the other nodes so that they can update the state
- * accordingly and detect that we switched to master role. */
- clusterBroadcastPong(CLUSTER_BROADCAST_ALL);
-
- /* 6) If there was a manual failover in progress, clear the state. */
- resetManualFailover();
+ /* Take responsibility for the cluster slots. */
+ clusterFailoverReplaceYourMaster();
} else {
- clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES);
+ clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_VOTES);
}
}
@@ -2709,7 +3113,7 @@ void clusterHandleSlaveFailover(void) {
* Slave migration is the process that allows a slave of a master that is
* already covered by at least another slave, to "migrate" to a master that
* is orpaned, that is, left with no working slaves.
- * -------------------------------------------------------------------------- */
+ * ------------------------------------------------------------------------- */
/* This function is responsible to decide if this replica should be migrated
* to a different (orphaned) master. It is called by the clusterCron() function
@@ -2737,7 +3141,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
dictEntry *de;
/* Step 1: Don't migrate if the cluster state is not ok. */
- if (server.cluster->state != REDIS_CLUSTER_OK) return;
+ if (server.cluster->state != CLUSTER_OK) return;
/* Step 2: Don't migrate if my master will not be left with at least
* 'migration-barrier' slaves after my migration. */
@@ -2747,48 +3151,68 @@ void clusterHandleSlaveMigration(int max_slaves) {
!nodeTimedOut(mymaster->slaves[j])) okslaves++;
if (okslaves <= server.cluster_migration_barrier) return;
- /* Step 3: Idenitfy a candidate for migration, and check if among the
+ /* Step 3: Identify a candidate for migration, and check if among the
* masters with the greatest number of ok slaves, I'm the one with the
- * smaller node ID.
+ * smallest node ID (the "candidate slave").
*
- * Note that this means that eventually a replica migration will occurr
+ * Note: this means that eventually a replica migration will occur
* since slaves that are reachable again always have their FAIL flag
- * cleared. At the same time this does not mean that there are no
- * race conditions possible (two slaves migrating at the same time), but
- * this is extremely unlikely to happen, and harmless. */
+ * cleared, so eventually there must be a candidate. At the same time
+ * this does not mean that there are no race conditions possible (two
+ * slaves migrating at the same time), but this is unlikely to
+ * happen, and harmless when happens. */
candidate = myself;
di = dictGetSafeIterator(server.cluster->nodes);
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
- int okslaves;
+ int okslaves = 0, is_orphaned = 1;
+
+ /* We want to migrate only if this master is working, orphaned, and
+ * used to have slaves or if failed over a master that had slaves
+ * (MIGRATE_TO flag). This way we only migrate to instances that were
+ * supposed to have replicas. */
+ if (nodeIsSlave(node) || nodeFailed(node)) is_orphaned = 0;
+ if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) is_orphaned = 0;
- /* Only iterate over working masters. */
- if (nodeIsSlave(node) || nodeFailed(node)) continue;
- /* If this master never had slaves so far, don't migrate. We want
- * to migrate to a master that remained orphaned, not masters that
- * were never configured to have slaves. */
- if (node->numslaves == 0) continue;
- okslaves = clusterCountNonFailingSlaves(node);
+ /* Check number of working slaves. */
+ if (nodeIsMaster(node)) okslaves = clusterCountNonFailingSlaves(node);
+ if (okslaves > 0) is_orphaned = 0;
- if (okslaves == 0 && target == NULL && node->numslots > 0)
- target = node;
+ if (is_orphaned) {
+ if (!target && node->numslots > 0) target = node;
+
+ /* Track the starting time of the orphaned condition for this
+ * master. */
+ if (!node->orphaned_time) node->orphaned_time = mstime();
+ } else {
+ node->orphaned_time = 0;
+ }
+ /* Check if I'm the slave candidate for the migration: attached
+ * to a master with the maximum number of slaves and with the smallest
+ * node ID. */
if (okslaves == max_slaves) {
for (j = 0; j < node->numslaves; j++) {
if (memcmp(node->slaves[j]->name,
candidate->name,
- REDIS_CLUSTER_NAMELEN) < 0)
+ CLUSTER_NAMELEN) < 0)
{
candidate = node->slaves[j];
}
}
}
}
+ dictReleaseIterator(di);
/* Step 4: perform the migration if there is a target, and if I'm the
- * candidate. */
- if (target && candidate == myself) {
- redisLog(REDIS_WARNING,"Migrating to orphaned master %.40s",
+ * candidate, but only if the master is continuously orphaned for a
+ * couple of seconds, so that during failovers, we give some time to
+ * the natural slaves of this instance to advertise their switch from
+ * the old master to the new one. */
+ if (target && candidate == myself &&
+ (mstime()-target->orphaned_time) > CLUSTER_SLAVE_MIGRATION_DELAY)
+ {
+ serverLog(LL_WARNING,"Migrating to orphaned master %.40s",
target->name);
clusterSetMaster(target);
}
@@ -2802,7 +3226,7 @@ void clusterHandleSlaveMigration(int max_slaves) {
* setting mf_end to the millisecond unix time at which we'll abort the
* attempt.
* 2) Slave sends a MFSTART message to the master requesting to pause clients
- * for two times the manual failover timeout REDIS_CLUSTER_MF_TIMEOUT.
+ * for two times the manual failover timeout CLUSTER_MF_TIMEOUT.
* When master is paused for manual failover, it also starts to flag
* packets with CLUSTERMSG_FLAG0_PAUSED.
* 3) Slave waits for master to send its replication offset flagged as PAUSED.
@@ -2842,7 +3266,7 @@ void resetManualFailover(void) {
/* If a manual failover timed out, abort it. */
void manualFailoverCheckTimeout(void) {
if (server.cluster->mf_end && server.cluster->mf_end < mstime()) {
- redisLog(REDIS_WARNING,"Manual failover timed out.");
+ serverLog(LL_WARNING,"Manual failover timed out.");
resetManualFailover();
}
}
@@ -2863,7 +3287,7 @@ void clusterHandleManualFailover(void) {
/* Our replication offset matches the master replication offset
* announced after clients were paused. We can start the failover. */
server.cluster->mf_can_start = 1;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"All master replication stream processed, "
"manual failover can start.");
}
@@ -2888,6 +3312,35 @@ void clusterCron(void) {
iteration++; /* Number of times this function was called so far. */
+ /* We want to take myself->ip in sync with the cluster-announce-ip option.
+ * The option can be set at runtime via CONFIG SET, so we periodically check
+ * if the option changed to reflect this into myself->ip. */
+ {
+ static char *prev_ip = NULL;
+ char *curr_ip = server.cluster_announce_ip;
+ int changed = 0;
+
+ if (prev_ip == NULL && curr_ip != NULL) changed = 1;
+ else if (prev_ip != NULL && curr_ip == NULL) changed = 1;
+ else if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
+
+ if (changed) {
+ if (prev_ip) zfree(prev_ip);
+ prev_ip = curr_ip;
+
+ if (curr_ip) {
+ /* We always take a copy of the previous IP address, by
+ * duplicating the string. This way later we can check if
+ * the address really changed. */
+ prev_ip = zstrdup(prev_ip);
+ strncpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
+ myself->ip[NET_IP_STR_LEN-1] = '\0';
+ } else {
+ myself->ip[0] = '\0'; /* Force autodetection. */
+ }
+ }
+ }
+
/* The handshake timeout is the time after which a handshake node that was
* not turned into a normal node is removed from the nodes. Usually it is
* just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
@@ -2895,17 +3348,28 @@ void clusterCron(void) {
handshake_timeout = server.cluster_node_timeout;
if (handshake_timeout < 1000) handshake_timeout = 1000;
- /* Check if we have disconnected nodes and re-establish the connection. */
+ /* Update myself flags. */
+ clusterUpdateMyselfFlags();
+
+ /* Check if we have disconnected nodes and re-establish the connection.
+ * Also update a few stats while we are here, that can be used to make
+ * better decisions in other part of the code. */
di = dictGetSafeIterator(server.cluster->nodes);
+ server.cluster->stats_pfail_nodes = 0;
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
- if (node->flags & (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR)) continue;
+ /* Not interested in reconnecting the link with myself or nodes
+ * for which we have no address. */
+ if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) continue;
+
+ if (node->flags & CLUSTER_NODE_PFAIL)
+ server.cluster->stats_pfail_nodes++;
/* A Node in HANDSHAKE state has a limited lifespan equal to the
* configured node timeout. */
if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
- freeClusterNode(node);
+ clusterDelNode(node);
continue;
}
@@ -2915,7 +3379,7 @@ void clusterCron(void) {
clusterLink *link;
fd = anetTcpNonBlockBindConnect(server.neterr, node->ip,
- node->port+REDIS_CLUSTER_PORT_INCR, REDIS_BIND_ADDR);
+ node->cport, NET_FIRST_BIND_ADDR);
if (fd == -1) {
/* We got a synchronous error from connect before
* clusterSendPing() had a chance to be called.
@@ -2923,10 +3387,9 @@ void clusterCron(void) {
* so we claim we actually sent a ping now (that will
* be really sent as soon as the link is obtained). */
if (node->ping_sent == 0) node->ping_sent = mstime();
- redisLog(REDIS_DEBUG, "Unable to connect to "
+ serverLog(LL_DEBUG, "Unable to connect to "
"Cluster Node [%s]:%d -> %s", node->ip,
- node->port+REDIS_CLUSTER_PORT_INCR,
- server.neterr);
+ node->cport, server.neterr);
continue;
}
link = createClusterLink(node);
@@ -2941,7 +3404,7 @@ void clusterCron(void) {
* of a PING one, to force the receiver to add us in its node
* table. */
old_ping_sent = node->ping_sent;
- clusterSendPing(link, node->flags & REDIS_NODE_MEET ?
+ clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
if (old_ping_sent) {
/* If there was an active ping before the link was
@@ -2954,10 +3417,10 @@ void clusterCron(void) {
* to this node. Instead after the PONG is received and we
* are no longer in meet/handshake status, we want to send
* normal PING packets. */
- node->flags &= ~REDIS_NODE_MEET;
+ node->flags &= ~CLUSTER_NODE_MEET;
- redisLog(REDIS_DEBUG,"Connecting with Node %.40s at %s:%d",
- node->name, node->ip, node->port+REDIS_CLUSTER_PORT_INCR);
+ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
+ node->name, node->ip, node->cport);
}
}
dictReleaseIterator(di);
@@ -2975,7 +3438,7 @@ void clusterCron(void) {
/* Don't ping nodes disconnected or with a ping currently active. */
if (this->link == NULL || this->ping_sent != 0) continue;
- if (this->flags & (REDIS_NODE_MYSELF|REDIS_NODE_HANDSHAKE))
+ if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
continue;
if (min_pong_node == NULL || min_pong > this->pong_received) {
min_pong_node = this;
@@ -2983,7 +3446,7 @@ void clusterCron(void) {
}
}
if (min_pong_node) {
- redisLog(REDIS_DEBUG,"Pinging node %.40s", min_pong_node->name);
+ serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
}
}
@@ -3004,7 +3467,7 @@ void clusterCron(void) {
mstime_t delay;
if (node->flags &
- (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE))
+ (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
continue;
/* Orphaned master check, useful only if the current instance
@@ -3014,9 +3477,12 @@ void clusterCron(void) {
/* A master is orphaned if it is serving a non-zero number of
* slots, have no working slaves, but used to have at least one
- * slave. */
- if (okslaves == 0 && node->numslots > 0 && node->numslaves)
+ * slave, or failed over a master that used to have slaves. */
+ if (okslaves == 0 && node->numslots > 0 &&
+ node->flags & CLUSTER_NODE_MIGRATE_TO)
+ {
orphaned_masters++;
+ }
if (okslaves > max_slaves) max_slaves = okslaves;
if (nodeIsSlave(myself) && myself->slaveof == node)
this_slaves = okslaves;
@@ -3071,10 +3537,10 @@ void clusterCron(void) {
if (delay > server.cluster_node_timeout) {
/* Timeout reached. Set the node as possibly failing if it is
* not already in this state. */
- if (!(node->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) {
- redisLog(REDIS_DEBUG,"*** NODE %.40s possibly failing",
+ if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
+ serverLog(LL_DEBUG,"*** NODE %.40s possibly failing",
node->name);
- node->flags |= REDIS_NODE_PFAIL;
+ node->flags |= CLUSTER_NODE_PFAIL;
update_state = 1;
}
}
@@ -3107,7 +3573,7 @@ void clusterCron(void) {
clusterHandleSlaveMigration(max_slaves);
}
- if (update_state || server.cluster->state == REDIS_CLUSTER_FAIL)
+ if (update_state || server.cluster->state == CLUSTER_FAIL)
clusterUpdateState();
}
@@ -3168,11 +3634,45 @@ void bitmapClearBit(unsigned char *bitmap, int pos) {
bitmap[byte] &= ~(1<<bit);
}
+/* Return non-zero if there is at least one master with slaves in the cluster.
+ * Otherwise zero is returned. Used by clusterNodeSetSlotBit() to set the
+ * MIGRATE_TO flag the when a master gets the first slot. */
+int clusterMastersHaveSlaves(void) {
+ dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
+ dictEntry *de;
+ int slaves = 0;
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+
+ if (nodeIsSlave(node)) continue;
+ slaves += node->numslaves;
+ }
+ dictReleaseIterator(di);
+ return slaves != 0;
+}
+
/* Set the slot bit and return the old value. */
int clusterNodeSetSlotBit(clusterNode *n, int slot) {
int old = bitmapTestBit(n->slots,slot);
bitmapSetBit(n->slots,slot);
- if (!old) n->numslots++;
+ if (!old) {
+ n->numslots++;
+ /* When a master gets its first slot, even if it has no slaves,
+ * it gets flagged with MIGRATE_TO, that is, the master is a valid
+ * target for replicas migration, if and only if at least one of
+ * the other masters has slaves right now.
+ *
+ * Normally masters are valid targerts of replica migration if:
+ * 1. The used to have slaves (but no longer have).
+ * 2. They are slaves failing over a master that used to have slaves.
+ *
+ * However new masters with slots assigned are considered valid
+ * migration tagets if the rest of the cluster is not a slave-less.
+ *
+ * See https://github.com/antirez/redis/issues/3043 for more info. */
+ if (n->numslots == 1 && clusterMastersHaveSlaves())
+ n->flags |= CLUSTER_NODE_MIGRATE_TO;
+ }
return old;
}
@@ -3190,26 +3690,26 @@ int clusterNodeGetSlotBit(clusterNode *n, int slot) {
}
/* Add the specified slot to the list of slots that node 'n' will
- * serve. Return REDIS_OK if the operation ended with success.
+ * serve. Return C_OK if the operation ended with success.
* If the slot is already assigned to another instance this is considered
- * an error and REDIS_ERR is returned. */
+ * an error and C_ERR is returned. */
int clusterAddSlot(clusterNode *n, int slot) {
- if (server.cluster->slots[slot]) return REDIS_ERR;
+ if (server.cluster->slots[slot]) return C_ERR;
clusterNodeSetSlotBit(n,slot);
server.cluster->slots[slot] = n;
- return REDIS_OK;
+ return C_OK;
}
/* Delete the specified slot marking it as unassigned.
- * Returns REDIS_OK if the slot was assigned, otherwise if the slot was
- * already unassigned REDIS_ERR is returned. */
+ * Returns C_OK if the slot was assigned, otherwise if the slot was
+ * already unassigned C_ERR is returned. */
int clusterDelSlot(int slot) {
clusterNode *n = server.cluster->slots[slot];
- if (!n) return REDIS_ERR;
- redisAssert(clusterNodeClearSlotBit(n,slot) == 1);
+ if (!n) return C_ERR;
+ serverAssert(clusterNodeClearSlotBit(n,slot) == 1);
server.cluster->slots[slot] = NULL;
- return REDIS_OK;
+ return C_OK;
}
/* Delete all the slots associated with the specified node.
@@ -3217,9 +3717,11 @@ int clusterDelSlot(int slot) {
int clusterDelNodeSlots(clusterNode *node) {
int deleted = 0, j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
- if (clusterNodeGetSlotBit(node,j)) clusterDelSlot(j);
- deleted++;
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
+ if (clusterNodeGetSlotBit(node,j)) {
+ clusterDelSlot(j);
+ deleted++;
+ }
}
return deleted;
}
@@ -3238,12 +3740,12 @@ void clusterCloseAllSlots(void) {
* -------------------------------------------------------------------------- */
/* The following are defines that are only used in the evaluation function
- * and are based on heuristics. Actaully the main point about the rejoin and
+ * and are based on heuristics. Actually the main point about the rejoin and
* writable delay is that they should be a few orders of magnitude larger
* than the network latency. */
-#define REDIS_CLUSTER_MAX_REJOIN_DELAY 5000
-#define REDIS_CLUSTER_MIN_REJOIN_DELAY 500
-#define REDIS_CLUSTER_WRITABLE_DELAY 2000
+#define CLUSTER_MAX_REJOIN_DELAY 5000
+#define CLUSTER_MIN_REJOIN_DELAY 500
+#define CLUSTER_WRITABLE_DELAY 2000
void clusterUpdateState(void) {
int j, new_state;
@@ -3261,20 +3763,20 @@ void clusterUpdateState(void) {
* to don't count the DB loading time. */
if (first_call_time == 0) first_call_time = mstime();
if (nodeIsMaster(myself) &&
- server.cluster->state == REDIS_CLUSTER_FAIL &&
- mstime() - first_call_time < REDIS_CLUSTER_WRITABLE_DELAY) return;
+ server.cluster->state == CLUSTER_FAIL &&
+ mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) return;
/* Start assuming the state is OK. We'll turn it into FAIL if there
* are the right conditions. */
- new_state = REDIS_CLUSTER_OK;
+ new_state = CLUSTER_OK;
/* Check if all the slots are covered. */
if (server.cluster_require_full_coverage) {
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (server.cluster->slots[j] == NULL ||
- server.cluster->slots[j]->flags & (REDIS_NODE_FAIL))
+ server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL))
{
- new_state = REDIS_CLUSTER_FAIL;
+ new_state = CLUSTER_FAIL;
break;
}
}
@@ -3296,7 +3798,7 @@ void clusterUpdateState(void) {
if (nodeIsMaster(node) && node->numslots) {
server.cluster->size++;
- if ((node->flags & (REDIS_NODE_FAIL|REDIS_NODE_PFAIL)) == 0)
+ if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0)
reachable_masters++;
}
}
@@ -3309,7 +3811,7 @@ void clusterUpdateState(void) {
int needed_quorum = (server.cluster->size / 2) + 1;
if (reachable_masters < needed_quorum) {
- new_state = REDIS_CLUSTER_FAIL;
+ new_state = CLUSTER_FAIL;
among_minority_time = mstime();
}
}
@@ -3322,12 +3824,12 @@ void clusterUpdateState(void) {
* minority, don't let it accept queries for some time after the
* partition heals, to make sure there is enough time to receive
* a configuration update. */
- if (rejoin_delay > REDIS_CLUSTER_MAX_REJOIN_DELAY)
- rejoin_delay = REDIS_CLUSTER_MAX_REJOIN_DELAY;
- if (rejoin_delay < REDIS_CLUSTER_MIN_REJOIN_DELAY)
- rejoin_delay = REDIS_CLUSTER_MIN_REJOIN_DELAY;
+ if (rejoin_delay > CLUSTER_MAX_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MAX_REJOIN_DELAY;
+ if (rejoin_delay < CLUSTER_MIN_REJOIN_DELAY)
+ rejoin_delay = CLUSTER_MIN_REJOIN_DELAY;
- if (new_state == REDIS_CLUSTER_OK &&
+ if (new_state == CLUSTER_OK &&
nodeIsMaster(myself) &&
mstime() - among_minority_time < rejoin_delay)
{
@@ -3335,8 +3837,8 @@ void clusterUpdateState(void) {
}
/* Change the state and log the event. */
- redisLog(REDIS_WARNING,"Cluster state changed: %s",
- new_state == REDIS_CLUSTER_OK ? "ok" : "fail");
+ serverLog(LL_WARNING,"Cluster state changed: %s",
+ new_state == CLUSTER_OK ? "ok" : "fail");
server.cluster->state = new_state;
}
}
@@ -3352,13 +3854,13 @@ void clusterUpdateState(void) {
* this lots, we set the slots as IMPORTING from our point of view
* in order to justify we have those slots, and in order to make
* redis-trib aware of the issue, so that it can try to fix it.
- * 2) If we find data in a DB different than DB0 we return REDIS_ERR to
+ * 2) If we find data in a DB different than DB0 we return C_ERR to
* signal the caller it should quit the server with an error message
* or take other actions.
*
- * The function always returns REDIS_OK even if it will try to correct
+ * The function always returns C_OK even if it will try to correct
* the error described in "1". However if data is found in DB different
- * from DB0, REDIS_ERR is returned.
+ * from DB0, C_ERR is returned.
*
* The function also uses the logging facility in order to warn the user
* about desynchronizations between the data we have in memory and the
@@ -3369,16 +3871,16 @@ int verifyClusterConfigWithData(void) {
/* If this node is a slave, don't perform the check at all as we
* completely depend on the replication stream. */
- if (nodeIsSlave(myself)) return REDIS_OK;
+ if (nodeIsSlave(myself)) return C_OK;
/* Make sure we only have keys in DB0. */
for (j = 1; j < server.dbnum; j++) {
- if (dictSize(server.db[j].dict)) return REDIS_ERR;
+ if (dictSize(server.db[j].dict)) return C_ERR;
}
/* Check that all the slots we see populated memory have a corresponding
* entry in the cluster table. Otherwise fix the table. */
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (!countKeysInSlot(j)) continue; /* No keys in this slot. */
/* Check if we are assigned to this slot or if we are importing it.
* In both cases check the next slot as the configuration makes
@@ -3393,18 +3895,18 @@ int verifyClusterConfigWithData(void) {
update_config++;
/* Case A: slot is unassigned. Take responsibility for it. */
if (server.cluster->slots[j] == NULL) {
- redisLog(REDIS_WARNING, "I have keys for unassigned slot %d. "
+ serverLog(LL_WARNING, "I have keys for unassigned slot %d. "
"Taking responsibility for it.",j);
clusterAddSlot(myself,j);
} else {
- redisLog(REDIS_WARNING, "I have keys for slot %d, but the slot is "
+ serverLog(LL_WARNING, "I have keys for slot %d, but the slot is "
"assigned to another node. "
"Setting it to importing state.",j);
server.cluster->importing_slots_from[j] = server.cluster->slots[j];
}
}
if (update_config) clusterSaveConfigOrDie(1);
- return REDIS_OK;
+ return C_OK;
}
/* -----------------------------------------------------------------------------
@@ -3414,12 +3916,12 @@ int verifyClusterConfigWithData(void) {
/* Set the specified node 'n' as master for this node.
* If this node is currently a master, it is turned into a slave. */
void clusterSetMaster(clusterNode *n) {
- redisAssert(n != myself);
- redisAssert(myself->numslots == 0);
+ serverAssert(n != myself);
+ serverAssert(myself->numslots == 0);
if (nodeIsMaster(myself)) {
- myself->flags &= ~REDIS_NODE_MASTER;
- myself->flags |= REDIS_NODE_SLAVE;
+ myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
+ myself->flags |= CLUSTER_NODE_SLAVE;
clusterCloseAllSlots();
} else {
if (myself->slaveof)
@@ -3441,27 +3943,27 @@ struct redisNodeFlags {
};
static struct redisNodeFlags redisNodeFlagsTable[] = {
- {REDIS_NODE_MYSELF, "myself,"},
- {REDIS_NODE_MASTER, "master,"},
- {REDIS_NODE_SLAVE, "slave,"},
- {REDIS_NODE_PFAIL, "fail?,"},
- {REDIS_NODE_FAIL, "fail,"},
- {REDIS_NODE_HANDSHAKE, "handshake,"},
- {REDIS_NODE_NOADDR, "noaddr,"}
+ {CLUSTER_NODE_MYSELF, "myself,"},
+ {CLUSTER_NODE_MASTER, "master,"},
+ {CLUSTER_NODE_SLAVE, "slave,"},
+ {CLUSTER_NODE_PFAIL, "fail?,"},
+ {CLUSTER_NODE_FAIL, "fail,"},
+ {CLUSTER_NODE_HANDSHAKE, "handshake,"},
+ {CLUSTER_NODE_NOADDR, "noaddr,"},
+ {CLUSTER_NODE_NOFAILOVER, "nofailover,"}
};
/* Concatenate the comma separated list of node flags to the given SDS
* string 'ci'. */
-sds representRedisNodeFlags(sds ci, uint16_t flags) {
- if (flags == 0) {
- ci = sdscat(ci,"noflags,");
- } else {
- int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
- for (i = 0; i < size; i++) {
- struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
- if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
- }
+sds representClusterNodeFlags(sds ci, uint16_t flags) {
+ size_t orig_len = sdslen(ci);
+ int i, size = sizeof(redisNodeFlagsTable)/sizeof(struct redisNodeFlags);
+ for (i = 0; i < size; i++) {
+ struct redisNodeFlags *nodeflag = redisNodeFlagsTable + i;
+ if (flags & nodeflag->flag) ci = sdscat(ci, nodeflag->name);
}
+ /* If no flag was added, add the "noflags" special flag. */
+ if (sdslen(ci) == orig_len) ci = sdscat(ci,"noflags,");
sdsIncrLen(ci,-1); /* Remove trailing comma. */
return ci;
}
@@ -3475,13 +3977,14 @@ sds clusterGenNodeDescription(clusterNode *node) {
sds ci;
/* Node coordinates */
- ci = sdscatprintf(sdsempty(),"%.40s %s:%d ",
+ ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ",
node->name,
node->ip,
- node->port);
+ node->port,
+ node->cport);
/* Flags */
- ci = representRedisNodeFlags(ci, node->flags);
+ ci = representClusterNodeFlags(ci, node->flags);
/* Slave of... or just "-" */
if (node->slaveof)
@@ -3489,24 +3992,24 @@ sds clusterGenNodeDescription(clusterNode *node) {
else
ci = sdscatlen(ci," - ",3);
- /* Latency from the POV of this node, link status */
+ /* Latency from the POV of this node, config epoch, link status */
ci = sdscatprintf(ci,"%lld %lld %llu %s",
(long long) node->ping_sent,
(long long) node->pong_received,
(unsigned long long) node->configEpoch,
- (node->link || node->flags & REDIS_NODE_MYSELF) ?
+ (node->link || node->flags & CLUSTER_NODE_MYSELF) ?
"connected" : "disconnected");
/* Slots served by this instance */
start = -1;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit;
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
- if (start != -1 && (!bit || j == REDIS_CLUSTER_SLOTS-1)) {
- if (bit && j == REDIS_CLUSTER_SLOTS-1) j++;
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
+ if (bit && j == CLUSTER_SLOTS-1) j++;
if (start == j-1) {
ci = sdscatprintf(ci," %d",start);
@@ -3520,8 +4023,8 @@ sds clusterGenNodeDescription(clusterNode *node) {
/* Just for MYSELF node we also dump info about slots that
* we are migrating to other instances or importing from other
* instances. */
- if (node->flags & REDIS_NODE_MYSELF) {
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ if (node->flags & CLUSTER_NODE_MYSELF) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (server.cluster->migrating_slots_to[j]) {
ci = sdscatprintf(ci," [%d->-%.40s]",j,
server.cluster->migrating_slots_to[j]->name);
@@ -3569,11 +4072,27 @@ sds clusterGenNodesDescription(int filter) {
* CLUSTER command
* -------------------------------------------------------------------------- */
-int getSlotOrReply(redisClient *c, robj *o) {
+const char *clusterGetMessageTypeString(int type) {
+ switch(type) {
+ case CLUSTERMSG_TYPE_PING: return "ping";
+ case CLUSTERMSG_TYPE_PONG: return "pong";
+ case CLUSTERMSG_TYPE_MEET: return "meet";
+ case CLUSTERMSG_TYPE_FAIL: return "fail";
+ case CLUSTERMSG_TYPE_PUBLISH: return "publish";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST: return "auth-req";
+ case CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK: return "auth-ack";
+ case CLUSTERMSG_TYPE_UPDATE: return "update";
+ case CLUSTERMSG_TYPE_MFSTART: return "mfstart";
+ case CLUSTERMSG_TYPE_MODULE: return "module";
+ }
+ return "unknown";
+}
+
+int getSlotOrReply(client *c, robj *o) {
long long slot;
- if (getLongLongFromObject(o,&slot) != REDIS_OK ||
- slot < 0 || slot >= REDIS_CLUSTER_SLOTS)
+ if (getLongLongFromObject(o,&slot) != C_OK ||
+ slot < 0 || slot >= CLUSTER_SLOTS)
{
addReplyError(c,"Invalid or out of range slot");
return -1;
@@ -3581,13 +4100,15 @@ int getSlotOrReply(redisClient *c, robj *o) {
return (int) slot;
}
-void clusterReplyMultiBulkSlots(redisClient *c) {
+void clusterReplyMultiBulkSlots(client *c) {
/* Format: 1) 1) start slot
* 2) end slot
* 3) 1) master IP
* 2) master port
+ * 3) node ID
* 4) 1) replica IP
* 2) replica port
+ * 3) node ID
* ... continued until done
*/
@@ -3604,17 +4125,17 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
* master) and masters not serving any slot. */
if (!nodeIsMaster(node) || node->numslots == 0) continue;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit, i;
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
- if (start != -1 && (!bit || j == REDIS_CLUSTER_SLOTS-1)) {
+ if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
int nested_elements = 3; /* slots (2) + master addr (1). */
void *nested_replylen = addDeferredMultiBulkLength(c);
- if (bit && j == REDIS_CLUSTER_SLOTS-1) j++;
+ if (bit && j == CLUSTER_SLOTS-1) j++;
/* If slot exists in output map, add to it's list.
* else, create a new output map for this slot */
@@ -3628,18 +4149,20 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
start = -1;
/* First node reply position is always the master */
- addReplyMultiBulkLen(c, 2);
+ addReplyMultiBulkLen(c, 3);
addReplyBulkCString(c, node->ip);
addReplyLongLong(c, node->port);
+ addReplyBulkCBuffer(c, node->name, CLUSTER_NAMELEN);
/* Remaining nodes in reply are replicas for slot range */
for (i = 0; i < node->numslaves; i++) {
/* This loop is copy/pasted from clusterGenNodeDescription()
* with modifications for per-slot node aggregation */
if (nodeFailed(node->slaves[i])) continue;
- addReplyMultiBulkLen(c, 2);
+ addReplyMultiBulkLen(c, 3);
addReplyBulkCString(c, node->slaves[i]->ip);
addReplyLongLong(c, node->slaves[i]->port);
+ addReplyBulkCBuffer(c, node->slaves[i]->name, CLUSTER_NAMELEN);
nested_elements++;
}
setDeferredMultiBulkLength(c, nested_replylen, nested_elements);
@@ -3651,22 +4174,60 @@ void clusterReplyMultiBulkSlots(redisClient *c) {
setDeferredMultiBulkLength(c, slot_replylen, num_masters);
}
-void clusterCommand(redisClient *c) {
+void clusterCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- if (!strcasecmp(c->argv[1]->ptr,"meet") && c->argc == 4) {
- long long port;
-
- if (getLongLongFromObject(c->argv[3], &port) != REDIS_OK) {
- addReplyErrorFormat(c,"Invalid TCP port specified: %s",
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"ADDSLOTS <slot> [slot ...] -- Assign slots to current node.",
+"BUMPEPOCH -- Advance the cluster config epoch.",
+"COUNT-failure-reports <node-id> -- Return number of failure reports for <node-id>.",
+"COUNTKEYSINSLOT <slot> - Return the number of keys in <slot>.",
+"DELSLOTS <slot> [slot ...] -- Delete slots information from current node.",
+"FAILOVER [force|takeover] -- Promote current slave node to being a master.",
+"FORGET <node-id> -- Remove a node from the cluster.",
+"GETKEYSINSLOT <slot> <count> -- Return key names stored by current node in a slot.",
+"FLUSHSLOTS -- Delete current node own slots information.",
+"INFO - Return onformation about the cluster.",
+"KEYSLOT <key> -- Return the hash slot for <key>.",
+"MEET <ip> <port> [bus-port] -- Connect nodes into a working cluster.",
+"MYID -- Return the node id.",
+"NODES -- Return cluster configuration seen by node. Output format:",
+" <id> <ip:port> <flags> <master> <pings> <pongs> <epoch> <link> <slot> ... <slot>",
+"REPLICATE <node-id> -- Configure current node as slave to <node-id>.",
+"RESET [hard|soft] -- Reset current node (default: soft).",
+"SET-config-epoch <epoch> - Set config epoch of current node.",
+"SETSLOT <slot> (importing|migrating|stable|node <node-id>) -- Set slot state.",
+"SLAVES <node-id> -- Return <node-id> slaves.",
+"SLOTS -- Return information about slots range mappings. Each range is made of:",
+" start, end, master and replicas IP addresses, ports and ids",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"meet") && (c->argc == 4 || c->argc == 5)) {
+ /* CLUSTER MEET <ip> <port> [cport] */
+ long long port, cport;
+
+ if (getLongLongFromObject(c->argv[3], &port) != C_OK) {
+ addReplyErrorFormat(c,"Invalid TCP base port specified: %s",
(char*)c->argv[3]->ptr);
return;
}
- if (clusterStartHandshake(c->argv[2]->ptr,port) == 0 &&
+ if (c->argc == 5) {
+ if (getLongLongFromObject(c->argv[4], &cport) != C_OK) {
+ addReplyErrorFormat(c,"Invalid TCP bus port specified: %s",
+ (char*)c->argv[4]->ptr);
+ return;
+ }
+ } else {
+ cport = port + CLUSTER_PORT_INCR;
+ }
+
+ if (clusterStartHandshake(c->argv[2]->ptr,port,cport) == 0 &&
errno == EINVAL)
{
addReplyErrorFormat(c,"Invalid node address specified: %s:%s",
@@ -3679,9 +4240,12 @@ void clusterCommand(redisClient *c) {
robj *o;
sds ci = clusterGenNodesDescription(0);
- o = createObject(REDIS_STRING,ci);
+ o = createObject(OBJ_STRING,ci);
addReplyBulk(c,o);
decrRefCount(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) {
+ /* CLUSTER MYID */
+ addReplyBulkCBuffer(c,myself->name, CLUSTER_NAMELEN);
} else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) {
/* CLUSTER SLOTS */
clusterReplyMultiBulkSlots(c);
@@ -3700,10 +4264,10 @@ void clusterCommand(redisClient *c) {
/* CLUSTER ADDSLOTS <slot> [slot] ... */
/* CLUSTER DELSLOTS <slot> [slot] ... */
int j, slot;
- unsigned char *slots = zmalloc(REDIS_CLUSTER_SLOTS);
+ unsigned char *slots = zmalloc(CLUSTER_SLOTS);
int del = !strcasecmp(c->argv[1]->ptr,"delslots");
- memset(slots,0,REDIS_CLUSTER_SLOTS);
+ memset(slots,0,CLUSTER_SLOTS);
/* Check that all the arguments are parseable and that all the
* slots are not already busy. */
for (j = 2; j < c->argc; j++) {
@@ -3727,7 +4291,7 @@ void clusterCommand(redisClient *c) {
return;
}
}
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
if (slots[j]) {
int retval;
@@ -3738,7 +4302,7 @@ void clusterCommand(redisClient *c) {
retval = del ? clusterDelSlot(j) :
clusterAddSlot(myself,j);
- redisAssertWithInfo(c,NULL,retval == REDIS_OK);
+ serverAssertWithInfo(c,NULL,retval == C_OK);
}
}
zfree(slots);
@@ -3752,6 +4316,11 @@ void clusterCommand(redisClient *c) {
int slot;
clusterNode *n;
+ if (nodeIsSlave(myself)) {
+ addReplyError(c,"Please use SETSLOT only with masters.");
+ return;
+ }
+
if ((slot = getSlotOrReply(c,c->argv[2])) == -1) return;
if (!strcasecmp(c->argv[3]->ptr,"migrating") && c->argc == 5) {
@@ -3773,7 +4342,7 @@ void clusterCommand(redisClient *c) {
}
if ((n = clusterLookupNode(c->argv[4]->ptr)) == NULL) {
addReplyErrorFormat(c,"I don't know about node %s",
- (char*)c->argv[3]->ptr);
+ (char*)c->argv[4]->ptr);
return;
}
server.cluster->importing_slots_from[slot] = n;
@@ -3821,17 +4390,9 @@ void clusterCommand(redisClient *c) {
* failover happens at the same time we close the slot, the
* configEpoch collision resolution will fix it assigning
* a different epoch to each node. */
- uint64_t maxEpoch = clusterGetMaxEpoch();
-
- if (myself->configEpoch == 0 ||
- myself->configEpoch != maxEpoch)
- {
- server.cluster->currentEpoch++;
- myself->configEpoch = server.cluster->currentEpoch;
- clusterDoBeforeSleep(CLUSTER_TODO_FSYNC_CONFIG);
- redisLog(REDIS_WARNING,
- "configEpoch set to %llu after importing slot %d",
- (unsigned long long) myself->configEpoch, slot);
+ if (clusterBumpConfigEpochWithoutConsensus() == C_OK) {
+ serverLog(LL_WARNING,
+ "configEpoch updated after importing slot %d", slot);
}
server.cluster->importing_slots_from[slot] = NULL;
}
@@ -3839,11 +4400,18 @@ void clusterCommand(redisClient *c) {
clusterAddSlot(n,slot);
} else {
addReplyError(c,
- "Invalid CLUSTER SETSLOT action or number of arguments");
+ "Invalid CLUSTER SETSLOT action or number of arguments. Try CLUSTER HELP");
return;
}
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|CLUSTER_TODO_UPDATE_STATE);
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"bumpepoch") && c->argc == 2) {
+ /* CLUSTER BUMPEPOCH */
+ int retval = clusterBumpConfigEpochWithoutConsensus();
+ sds reply = sdscatprintf(sdsempty(),"+%s %llu\r\n",
+ (retval == C_OK) ? "BUMPED" : "STILL",
+ (unsigned long long) myself->configEpoch);
+ addReplySds(c,reply);
} else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) {
/* CLUSTER INFO */
char *statestr[] = {"ok","fail","needhelp"};
@@ -3851,7 +4419,7 @@ void clusterCommand(redisClient *c) {
uint64_t myepoch;
int j;
- for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
+ for (j = 0; j < CLUSTER_SLOTS; j++) {
clusterNode *n = server.cluster->slots[j];
if (n == NULL) continue;
@@ -3878,8 +4446,6 @@ void clusterCommand(redisClient *c) {
"cluster_size:%d\r\n"
"cluster_current_epoch:%llu\r\n"
"cluster_my_epoch:%llu\r\n"
- "cluster_stats_messages_sent:%lld\r\n"
- "cluster_stats_messages_received:%lld\r\n"
, statestr[server.cluster->state],
slots_assigned,
slots_ok,
@@ -3888,11 +4454,40 @@ void clusterCommand(redisClient *c) {
dictSize(server.cluster->nodes),
server.cluster->size,
(unsigned long long) server.cluster->currentEpoch,
- (unsigned long long) myepoch,
- server.cluster->stats_bus_messages_sent,
- server.cluster->stats_bus_messages_received
+ (unsigned long long) myepoch
);
- addReplyBulkSds(c, info);
+
+ /* Show stats about messages sent and received. */
+ long long tot_msg_sent = 0;
+ long long tot_msg_received = 0;
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_sent[i] == 0) continue;
+ tot_msg_sent += server.cluster->stats_bus_messages_sent[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_sent:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_sent[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_sent:%lld\r\n", tot_msg_sent);
+
+ for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
+ if (server.cluster->stats_bus_messages_received[i] == 0) continue;
+ tot_msg_received += server.cluster->stats_bus_messages_received[i];
+ info = sdscatprintf(info,
+ "cluster_stats_messages_%s_received:%lld\r\n",
+ clusterGetMessageTypeString(i),
+ server.cluster->stats_bus_messages_received[i]);
+ }
+ info = sdscatprintf(info,
+ "cluster_stats_messages_received:%lld\r\n", tot_msg_received);
+
+ /* Produce the reply protocol. */
+ addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
+ (unsigned long)sdslen(info)));
+ addReplySds(c,info);
+ addReply(c,shared.crlf);
} else if (!strcasecmp(c->argv[1]->ptr,"saveconfig") && c->argc == 2) {
int retval = clusterSaveConfig(1);
@@ -3910,9 +4505,9 @@ void clusterCommand(redisClient *c) {
/* CLUSTER COUNTKEYSINSLOT <slot> */
long long slot;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
return;
- if (slot < 0 || slot >= REDIS_CLUSTER_SLOTS) {
+ if (slot < 0 || slot >= CLUSTER_SLOTS) {
addReplyError(c,"Invalid slot");
return;
}
@@ -3923,20 +4518,28 @@ void clusterCommand(redisClient *c) {
unsigned int numkeys, j;
robj **keys;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK)
return;
if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL)
- != REDIS_OK)
+ != C_OK)
return;
- if (slot < 0 || slot >= REDIS_CLUSTER_SLOTS || maxkeys < 0) {
+ if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) {
addReplyError(c,"Invalid slot or number of keys");
return;
}
+ /* Avoid allocating more than needed in case of large COUNT argument
+ * and smaller actual number of keys. */
+ unsigned int keys_in_slot = countKeysInSlot(slot);
+ if (maxkeys > keys_in_slot) maxkeys = keys_in_slot;
+
keys = zmalloc(sizeof(robj*)*maxkeys);
numkeys = getKeysInSlot(slot, keys, maxkeys);
addReplyMultiBulkLen(c,numkeys);
- for (j = 0; j < numkeys; j++) addReplyBulk(c,keys[j]);
+ for (j = 0; j < numkeys; j++) {
+ addReplyBulk(c,keys[j]);
+ decrRefCount(keys[j]);
+ }
zfree(keys);
} else if (!strcasecmp(c->argv[1]->ptr,"forget") && c->argc == 3) {
/* CLUSTER FORGET <NODE ID> */
@@ -3974,7 +4577,7 @@ void clusterCommand(redisClient *c) {
}
/* Can't replicate a slave. */
- if (n->slaveof != NULL) {
+ if (nodeIsSlave(n)) {
addReplyError(c,"I can only replicate a master, not a slave.");
return;
}
@@ -4016,44 +4619,72 @@ void clusterCommand(redisClient *c) {
addReplyBulkCString(c,ni);
sdsfree(ni);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"count-failure-reports") &&
+ c->argc == 3)
+ {
+ /* CLUSTER COUNT-FAILURE-REPORTS <NODE ID> */
+ clusterNode *n = clusterLookupNode(c->argv[2]->ptr);
+
+ if (!n) {
+ addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr);
+ return;
+ } else {
+ addReplyLongLong(c,clusterNodeFailureReportsCount(n));
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"failover") &&
(c->argc == 2 || c->argc == 3))
{
- /* CLUSTER FAILOVER [FORCE] */
- int force = 0;
+ /* CLUSTER FAILOVER [FORCE|TAKEOVER] */
+ int force = 0, takeover = 0;
if (c->argc == 3) {
if (!strcasecmp(c->argv[2]->ptr,"force")) {
force = 1;
+ } else if (!strcasecmp(c->argv[2]->ptr,"takeover")) {
+ takeover = 1;
+ force = 1; /* Takeover also implies force. */
} else {
addReply(c,shared.syntaxerr);
return;
}
}
+ /* Check preconditions. */
if (nodeIsMaster(myself)) {
addReplyError(c,"You should send CLUSTER FAILOVER to a slave");
return;
+ } else if (myself->slaveof == NULL) {
+ addReplyError(c,"I'm a slave but my master is unknown to me");
+ return;
} else if (!force &&
- (myself->slaveof == NULL || nodeFailed(myself->slaveof) ||
- myself->slaveof->link == NULL))
+ (nodeFailed(myself->slaveof) ||
+ myself->slaveof->link == NULL))
{
addReplyError(c,"Master is down or failed, "
"please use CLUSTER FAILOVER FORCE");
return;
}
resetManualFailover();
- server.cluster->mf_end = mstime() + REDIS_CLUSTER_MF_TIMEOUT;
-
- /* If this is a forced failover, we don't need to talk with our master
- * to agree about the offset. We just failover taking over it without
- * coordination. */
- if (force) {
+ server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
+
+ if (takeover) {
+ /* A takeover does not perform any initial check. It just
+ * generates a new configuration epoch for this node without
+ * consensus, claims the master's slots, and broadcast the new
+ * configuration. */
+ serverLog(LL_WARNING,"Taking over the master (user request).");
+ clusterBumpConfigEpochWithoutConsensus();
+ clusterFailoverReplaceYourMaster();
+ } else if (force) {
+ /* If this is a forced failover, we don't need to talk with our
+ * master to agree about the offset. We just failover taking over
+ * it without coordination. */
+ serverLog(LL_WARNING,"Forced failover user request accepted.");
server.cluster->mf_can_start = 1;
} else {
+ serverLog(LL_WARNING,"Manual failover user request accepted.");
clusterSendMFStart(myself->slaveof);
}
- redisLog(REDIS_WARNING,"Manual failover user request accepted.");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"set-config-epoch") && c->argc == 3)
{
@@ -4066,7 +4697,7 @@ void clusterCommand(redisClient *c) {
* resolution system which is too slow when a big cluster is created. */
long long epoch;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&epoch,NULL) != C_OK)
return;
if (epoch < 0) {
@@ -4078,7 +4709,7 @@ void clusterCommand(redisClient *c) {
addReplyError(c,"Node config epoch is already non-zero");
} else {
myself->configEpoch = epoch;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"configEpoch set to %llu via CLUSTER SET-CONFIG-EPOCH",
(unsigned long long) myself->configEpoch);
@@ -4119,7 +4750,8 @@ void clusterCommand(redisClient *c) {
clusterReset(hard);
addReply(c,shared.ok);
} else {
- addReplyError(c,"Wrong CLUSTER subcommand or number of arguments");
+ addReplySubcommandSyntaxError(c);
+ return;
}
}
@@ -4136,8 +4768,8 @@ void createDumpPayload(rio *payload, robj *o) {
/* Serialize the object in a RDB-like format. It consist of an object type
* byte followed by the serialized object. This is understood by RESTORE. */
rioInitWithBuffer(payload,sdsempty());
- redisAssert(rdbSaveObjectType(payload,o));
- redisAssert(rdbSaveObject(payload,o));
+ serverAssert(rdbSaveObjectType(payload,o));
+ serverAssert(rdbSaveObject(payload,o));
/* Write the footer, this is how it looks like:
* ----------------+---------------------+---------------+
@@ -4147,8 +4779,8 @@ void createDumpPayload(rio *payload, robj *o) {
*/
/* RDB version */
- buf[0] = REDIS_RDB_VERSION & 0xff;
- buf[1] = (REDIS_RDB_VERSION >> 8) & 0xff;
+ buf[0] = RDB_VERSION & 0xff;
+ buf[1] = (RDB_VERSION >> 8) & 0xff;
payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2);
/* CRC64 */
@@ -4160,7 +4792,7 @@ void createDumpPayload(rio *payload, robj *o) {
/* Verify that the RDB version of the dump payload matches the one of this Redis
* instance and that the checksum is ok.
- * If the DUMP payload looks valid REDIS_OK is returned, otherwise REDIS_ERR
+ * If the DUMP payload looks valid C_OK is returned, otherwise C_ERR
* is returned. */
int verifyDumpPayload(unsigned char *p, size_t len) {
unsigned char *footer;
@@ -4168,23 +4800,23 @@ int verifyDumpPayload(unsigned char *p, size_t len) {
uint64_t crc;
/* At least 2 bytes of RDB version and 8 of CRC64 should be present. */
- if (len < 10) return REDIS_ERR;
+ if (len < 10) return C_ERR;
footer = p+(len-10);
/* Verify RDB version */
rdbver = (footer[1] << 8) | footer[0];
- if (rdbver != REDIS_RDB_VERSION) return REDIS_ERR;
+ if (rdbver > RDB_VERSION) return C_ERR;
/* Verify CRC64 */
crc = crc64(0,p,len-8);
memrev64ifbe(&crc);
- return (memcmp(&crc,footer+2,8) == 0) ? REDIS_OK : REDIS_ERR;
+ return (memcmp(&crc,footer+2,8) == 0) ? C_OK : C_ERR;
}
/* DUMP keyname
* DUMP is actually not used by Redis Cluster but it is the obvious
* complement of RESTORE and can be useful for different applications. */
-void dumpCommand(redisClient *c) {
+void dumpCommand(client *c) {
robj *o, *dumpobj;
rio payload;
@@ -4198,23 +4830,47 @@ void dumpCommand(redisClient *c) {
createDumpPayload(&payload,o);
/* Transfer to the client */
- dumpobj = createObject(REDIS_STRING,payload.io.buffer.ptr);
+ dumpobj = createObject(OBJ_STRING,payload.io.buffer.ptr);
addReplyBulk(c,dumpobj);
decrRefCount(dumpobj);
return;
}
/* RESTORE key ttl serialized-value [REPLACE] */
-void restoreCommand(redisClient *c) {
- long long ttl;
+void restoreCommand(client *c) {
+ long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1;
rio payload;
- int j, type, replace = 0;
+ int j, type, replace = 0, absttl = 0;
robj *obj;
/* Parse additional options */
for (j = 4; j < c->argc; j++) {
+ int additional = c->argc-j-1;
if (!strcasecmp(c->argv[j]->ptr,"replace")) {
replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"absttl")) {
+ absttl = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"idletime") && additional >= 1 &&
+ lfu_freq == -1)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL)
+ != C_OK) return;
+ if (lru_idle < 0) {
+ addReplyError(c,"Invalid IDLETIME value, must be >= 0");
+ return;
+ }
+ lru_clock = LRU_CLOCK();
+ j++; /* Consume additional arg. */
+ } else if (!strcasecmp(c->argv[j]->ptr,"freq") && additional >= 1 &&
+ lru_idle == -1)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL)
+ != C_OK) return;
+ if (lfu_freq < 0 || lfu_freq > 255) {
+ addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255");
+ return;
+ }
+ j++; /* Consume additional arg. */
} else {
addReply(c,shared.syntaxerr);
return;
@@ -4228,7 +4884,7 @@ void restoreCommand(redisClient *c) {
}
/* Check if the TTL value makes sense */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != REDIS_OK) {
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != C_OK) {
return;
} else if (ttl < 0) {
addReplyError(c,"Invalid TTL value, must be >= 0");
@@ -4236,7 +4892,7 @@ void restoreCommand(redisClient *c) {
}
/* Verify RDB version and data checksum. */
- if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == REDIS_ERR)
+ if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr)) == C_ERR)
{
addReplyError(c,"DUMP payload version or checksum are wrong");
return;
@@ -4255,7 +4911,11 @@ void restoreCommand(redisClient *c) {
/* Create the key and set the TTL if any */
dbAdd(c->db,c->argv[1],obj);
- if (ttl) setExpire(c->db,c->argv[1],mstime()+ttl);
+ if (ttl) {
+ if (!absttl) ttl+=mstime();
+ setExpire(c,c->db,c->argv[1],ttl);
+ }
+ objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock);
signalModifiedKey(c->db,c->argv[1]);
addReply(c,shared.ok);
server.dirty++;
@@ -4272,11 +4932,12 @@ void restoreCommand(redisClient *c) {
typedef struct migrateCachedSocket {
int fd;
+ long last_dbid;
time_t last_use_time;
} migrateCachedSocket;
-/* Return a TCP socket connected with the target instance, possibly returning
- * a cached one.
+/* Return a migrateCachedSocket containing a TCP socket connected with the
+ * target instance, possibly returning a cached one.
*
* This function is responsible of sending errors to the client if a
* connection can't be established. In this case -1 is returned.
@@ -4286,7 +4947,7 @@ typedef struct migrateCachedSocket {
* If the caller detects an error while using the socket, migrateCloseSocket()
* should be called so that the connection will be created from scratch
* the next time. */
-int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
+migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) {
int fd;
sds name = sdsempty();
migrateCachedSocket *cs;
@@ -4299,7 +4960,7 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
if (cs) {
sdsfree(name);
cs->last_use_time = server.unixtime;
- return cs->fd;
+ return cs;
}
/* No cached socket, create one. */
@@ -4313,13 +4974,13 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
}
/* Create the socket */
- fd = anetTcpNonBlockBindConnect(server.neterr,c->argv[1]->ptr,
- atoi(c->argv[2]->ptr),REDIS_BIND_ADDR);
+ fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr,
+ atoi(c->argv[2]->ptr));
if (fd == -1) {
sdsfree(name);
addReplyErrorFormat(c,"Can't connect to target node: %s",
server.neterr);
- return -1;
+ return NULL;
}
anetEnableTcpNoDelay(server.neterr,fd);
@@ -4329,15 +4990,16 @@ int migrateGetSocket(redisClient *c, robj *host, robj *port, long timeout) {
addReplySds(c,
sdsnew("-IOERR error or timeout connecting to the client\r\n"));
close(fd);
- return -1;
+ return NULL;
}
/* Add to the cache and return it to the caller. */
cs = zmalloc(sizeof(*cs));
cs->fd = fd;
+ cs->last_dbid = -1;
cs->last_use_time = server.unixtime;
dictAdd(server.migrate_cached_sockets,name,cs);
- return fd;
+ return cs;
}
/* Free a migrate cached connection. */
@@ -4376,28 +5038,54 @@ void migrateCloseTimedoutSockets(void) {
dictReleaseIterator(di);
}
-/* MIGRATE host port key dbid timeout [COPY | REPLACE] */
-void migrateCommand(redisClient *c) {
- int fd, copy, replace, j;
+/* MIGRATE host port key dbid timeout [COPY | REPLACE | AUTH password]
+ *
+ * On in the multiple keys form:
+ *
+ * MIGRATE host port "" dbid timeout [COPY | REPLACE | AUTH password] KEYS key1
+ * key2 ... keyN */
+void migrateCommand(client *c) {
+ migrateCachedSocket *cs;
+ int copy = 0, replace = 0, j;
+ char *password = NULL;
long timeout;
long dbid;
- long long ttl, expireat;
- robj *o;
+ robj **ov = NULL; /* Objects to migrate. */
+ robj **kv = NULL; /* Key names. */
+ robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */
rio cmd, payload;
- int retry_num = 0;
+ int may_retry = 1;
+ int write_error = 0;
+ int argv_rewritten = 0;
-try_again:
- /* Initialization */
- copy = 0;
- replace = 0;
- ttl = 0;
+ /* To support the KEYS option we need the following additional state. */
+ int first_key = 3; /* Argument index of the first key. */
+ int num_keys = 1; /* By default only migrate the 'key' argument. */
/* Parse additional options */
for (j = 6; j < c->argc; j++) {
+ int moreargs = j < c->argc-1;
if (!strcasecmp(c->argv[j]->ptr,"copy")) {
copy = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"replace")) {
replace = 1;
+ } else if (!strcasecmp(c->argv[j]->ptr,"auth")) {
+ if (!moreargs) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ j++;
+ password = c->argv[j]->ptr;
+ } else if (!strcasecmp(c->argv[j]->ptr,"keys")) {
+ if (sdslen(c->argv[3]->ptr) != 0) {
+ addReplyError(c,
+ "When using MIGRATE KEYS option, the key argument"
+ " must be set to the empty string");
+ return;
+ }
+ first_key = j+1;
+ num_keys = c->argc - j - 1;
+ break; /* All the remaining args are keys. */
} else {
addReply(c,shared.syntaxerr);
return;
@@ -4405,57 +5093,107 @@ try_again:
}
/* Sanity check */
- if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != REDIS_OK)
- return;
- if (getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != C_OK ||
+ getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != C_OK)
+ {
return;
+ }
if (timeout <= 0) timeout = 1000;
- /* Check if the key is here. If not we reply with success as there is
- * nothing to migrate (for instance the key expired in the meantime), but
- * we include such information in the reply string. */
- if ((o = lookupKeyWrite(c->db,c->argv[3])) == NULL) {
+ /* Check if the keys are here. If at least one key is to migrate, do it
+ * otherwise if all the keys are missing reply with "NOKEY" to signal
+ * the caller there was nothing to migrate. We don't return an error in
+ * this case, since often this is due to a normal condition like the key
+ * expiring in the meantime. */
+ ov = zrealloc(ov,sizeof(robj*)*num_keys);
+ kv = zrealloc(kv,sizeof(robj*)*num_keys);
+ int oi = 0;
+
+ for (j = 0; j < num_keys; j++) {
+ if ((ov[oi] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) {
+ kv[oi] = c->argv[first_key+j];
+ oi++;
+ }
+ }
+ num_keys = oi;
+ if (num_keys == 0) {
+ zfree(ov); zfree(kv);
addReplySds(c,sdsnew("+NOKEY\r\n"));
return;
}
+try_again:
+ write_error = 0;
+
/* Connect */
- fd = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
- if (fd == -1) return; /* error sent to the client by migrateGetSocket() */
+ cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
+ if (cs == NULL) {
+ zfree(ov); zfree(kv);
+ return; /* error sent to the client by migrateGetSocket() */
+ }
- /* Create RESTORE payload and generate the protocol to call the command. */
rioInitWithBuffer(&cmd,sdsempty());
- redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
- redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
-
- expireat = getExpire(c->db,c->argv[3]);
- if (expireat != -1) {
- ttl = expireat-mstime();
- if (ttl < 1) ttl = 1;
- }
- redisAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
- if (server.cluster_enabled)
- redisAssertWithInfo(c,NULL,
- rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
- else
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
- redisAssertWithInfo(c,NULL,sdsEncodedObject(c->argv[3]));
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,c->argv[3]->ptr,
- sdslen(c->argv[3]->ptr)));
- redisAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
-
- /* Emit the payload argument, that is the serialized object using
- * the DUMP format. */
- createDumpPayload(&payload,o);
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,payload.io.buffer.ptr,
- sdslen(payload.io.buffer.ptr)));
- sdsfree(payload.io.buffer.ptr);
- /* Add the REPLACE option to the RESTORE command if it was specified
- * as a MIGRATE option. */
- if (replace)
- redisAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7));
+ /* Authentication */
+ if (password) {
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"AUTH",4));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,password,
+ sdslen(password)));
+ }
+
+ /* Send the SELECT command if the current DB is not already selected. */
+ int select = cs->last_dbid != dbid; /* Should we emit SELECT? */
+ if (select) {
+ serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid));
+ }
+
+ int expired = 0; /* Number of keys that we'll find already expired.
+ Note that serializing large keys may take some time
+ so certain keys that were found non expired by the
+ lookupKey() function, may be expired later. */
+
+ /* Create RESTORE payload and generate the protocol to call the command. */
+ for (j = 0; j < num_keys; j++) {
+ long long ttl = 0;
+ long long expireat = getExpire(c->db,kv[j]);
+
+ if (expireat != -1) {
+ ttl = expireat-mstime();
+ if (ttl < 0) {
+ expired++;
+ continue;
+ }
+ if (ttl < 1) ttl = 1;
+ }
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
+
+ if (server.cluster_enabled)
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
+ else
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
+ serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j]));
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr,
+ sdslen(kv[j]->ptr)));
+ serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
+
+ /* Emit the payload argument, that is the serialized object using
+ * the DUMP format. */
+ createDumpPayload(&payload,ov[j]);
+ serverAssertWithInfo(c,NULL,
+ rioWriteBulkString(&cmd,payload.io.buffer.ptr,
+ sdslen(payload.io.buffer.ptr)));
+ sdsfree(payload.io.buffer.ptr);
+
+ /* Add the REPLACE option to the RESTORE command if it was specified
+ * as a MIGRATE option. */
+ if (replace)
+ serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7));
+ }
/* Transfer the query to the other node in 64K chunks. */
errno = 0;
@@ -4466,60 +5204,158 @@ try_again:
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
- nwritten = syncWrite(fd,buf+pos,towrite,timeout);
- if (nwritten != (signed)towrite) goto socket_wr_err;
+ nwritten = syncWrite(cs->fd,buf+pos,towrite,timeout);
+ if (nwritten != (signed)towrite) {
+ write_error = 1;
+ goto socket_err;
+ }
pos += nwritten;
}
}
- /* Read back the reply. */
- {
- char buf1[1024];
- char buf2[1024];
-
- /* Read the two replies */
- if (syncReadLine(fd, buf1, sizeof(buf1), timeout) <= 0)
- goto socket_rd_err;
- if (syncReadLine(fd, buf2, sizeof(buf2), timeout) <= 0)
- goto socket_rd_err;
- if (buf1[0] == '-' || buf2[0] == '-') {
- addReplyErrorFormat(c,"Target instance replied with error: %s",
- (buf1[0] == '-') ? buf1+1 : buf2+1);
+ char buf0[1024]; /* Auth reply. */
+ char buf1[1024]; /* Select reply. */
+ char buf2[1024]; /* Restore reply. */
+
+ /* Read the AUTH reply if needed. */
+ if (password && syncReadLine(cs->fd, buf0, sizeof(buf0), timeout) <= 0)
+ goto socket_err;
+
+ /* Read the SELECT reply if needed. */
+ if (select && syncReadLine(cs->fd, buf1, sizeof(buf1), timeout) <= 0)
+ goto socket_err;
+
+ /* Read the RESTORE replies. */
+ int error_from_target = 0;
+ int socket_error = 0;
+ int del_idx = 1; /* Index of the key argument for the replicated DEL op. */
+
+ /* Allocate the new argument vector that will replace the current command,
+ * to propagate the MIGRATE as a DEL command (if no COPY option was given).
+ * We allocate num_keys+1 because the additional argument is for "DEL"
+ * command name itself. */
+ if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1));
+
+ for (j = 0; j < num_keys-expired; j++) {
+ if (syncReadLine(cs->fd, buf2, sizeof(buf2), timeout) <= 0) {
+ socket_error = 1;
+ break;
+ }
+ if ((password && buf0[0] == '-') ||
+ (select && buf1[0] == '-') ||
+ buf2[0] == '-')
+ {
+ /* On error assume that last_dbid is no longer valid. */
+ if (!error_from_target) {
+ cs->last_dbid = -1;
+ char *errbuf;
+ if (password && buf0[0] == '-') errbuf = buf0;
+ else if (select && buf1[0] == '-') errbuf = buf1;
+ else errbuf = buf2;
+
+ error_from_target = 1;
+ addReplyErrorFormat(c,"Target instance replied with error: %s",
+ errbuf+1);
+ }
} else {
- robj *aux;
-
if (!copy) {
/* No COPY option: remove the local key, signal the change. */
- dbDelete(c->db,c->argv[3]);
- signalModifiedKey(c->db,c->argv[3]);
+ dbDelete(c->db,kv[j]);
+ signalModifiedKey(c->db,kv[j]);
+ server.dirty++;
+
+ /* Populate the argument vector to replace the old one. */
+ newargv[del_idx++] = kv[j];
+ incrRefCount(kv[j]);
}
- addReply(c,shared.ok);
- server.dirty++;
+ }
+ }
- /* Translate MIGRATE as DEL for replication/AOF. */
- aux = createStringObject("DEL",3);
- rewriteClientCommandVector(c,2,aux,c->argv[3]);
- decrRefCount(aux);
+ /* On socket error, if we want to retry, do it now before rewriting the
+ * command vector. We only retry if we are sure nothing was processed
+ * and we failed to read the first reply (j == 0 test). */
+ if (!error_from_target && socket_error && j == 0 && may_retry &&
+ errno != ETIMEDOUT)
+ {
+ goto socket_err; /* A retry is guaranteed because of tested conditions.*/
+ }
+
+ /* On socket errors, close the migration socket now that we still have
+ * the original host/port in the ARGV. Later the original command may be
+ * rewritten to DEL and will be too later. */
+ if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]);
+
+ if (!copy) {
+ /* Translate MIGRATE as DEL for replication/AOF. Note that we do
+ * this only for the keys for which we received an acknowledgement
+ * from the receiving Redis server, by using the del_idx index. */
+ if (del_idx > 1) {
+ newargv[0] = createStringObject("DEL",3);
+ /* Note that the following call takes ownership of newargv. */
+ replaceClientCommandVector(c,del_idx,newargv);
+ argv_rewritten = 1;
+ } else {
+ /* No key transfer acknowledged, no need to rewrite as DEL. */
+ zfree(newargv);
}
+ newargv = NULL; /* Make it safe to call zfree() on it in the future. */
}
- sdsfree(cmd.io.buffer.ptr);
- return;
+ /* If we are here and a socket error happened, we don't want to retry.
+ * Just signal the problem to the client, but only do it if we did not
+ * already queue a different error reported by the destination server. */
+ if (!error_from_target && socket_error) {
+ may_retry = 0;
+ goto socket_err;
+ }
+
+ if (!error_from_target) {
+ /* Success! Update the last_dbid in migrateCachedSocket, so that we can
+ * avoid SELECT the next time if the target DB is the same. Reply +OK.
+ *
+ * Note: If we reached this point, even if socket_error is true
+ * still the SELECT command succeeded (otherwise the code jumps to
+ * socket_err label. */
+ cs->last_dbid = dbid;
+ addReply(c,shared.ok);
+ } else {
+ /* On error we already sent it in the for loop above, and set
+ * the currently selected socket to -1 to force SELECT the next time. */
+ }
-socket_wr_err:
sdsfree(cmd.io.buffer.ptr);
- migrateCloseSocket(c->argv[1],c->argv[2]);
- if (errno != ETIMEDOUT && retry_num++ == 0) goto try_again;
- addReplySds(c,
- sdsnew("-IOERR error or timeout writing to target instance\r\n"));
+ zfree(ov); zfree(kv); zfree(newargv);
return;
-socket_rd_err:
+/* On socket errors we try to close the cached socket and try again.
+ * It is very common for the cached socket to get closed, if just reopening
+ * it works it's a shame to notify the error to the caller. */
+socket_err:
+ /* Cleanup we want to perform in both the retry and no retry case.
+ * Note: Closing the migrate socket will also force SELECT next time. */
sdsfree(cmd.io.buffer.ptr);
- migrateCloseSocket(c->argv[1],c->argv[2]);
- if (errno != ETIMEDOUT && retry_num++ == 0) goto try_again;
+
+ /* If the command was rewritten as DEL and there was a socket error,
+ * we already closed the socket earlier. While migrateCloseSocket()
+ * is idempotent, the host/port arguments are now gone, so don't do it
+ * again. */
+ if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]);
+ zfree(newargv);
+ newargv = NULL; /* This will get reallocated on retry. */
+
+ /* Retry only if it's not a timeout and we never attempted a retry
+ * (or the code jumping here did not set may_retry to zero). */
+ if (errno != ETIMEDOUT && may_retry) {
+ may_retry = 0;
+ goto try_again;
+ }
+
+ /* Cleanup we want to do if no retry is attempted. */
+ zfree(ov); zfree(kv);
addReplySds(c,
- sdsnew("-IOERR error or timeout reading from target node\r\n"));
+ sdscatprintf(sdsempty(),
+ "-IOERR error or timeout %s to target instance\r\n",
+ write_error ? "writing" : "reading"));
return;
}
@@ -4531,30 +5367,30 @@ socket_rd_err:
* The client should issue ASKING before to actually send the command to
* the target instance. See the Redis Cluster specification for more
* information. */
-void askingCommand(redisClient *c) {
+void askingCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- c->flags |= REDIS_ASKING;
+ c->flags |= CLIENT_ASKING;
addReply(c,shared.ok);
}
/* The READONLY command is used by clients to enter the read-only mode.
* In this mode slaves will not redirect clients as long as clients access
* with read-only commands to keys that are served by the slave's master. */
-void readonlyCommand(redisClient *c) {
+void readonlyCommand(client *c) {
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
}
- c->flags |= REDIS_READONLY;
+ c->flags |= CLIENT_READONLY;
addReply(c,shared.ok);
}
/* The READWRITE command just clears the READONLY command state. */
-void readwriteCommand(redisClient *c) {
- c->flags &= ~REDIS_READONLY;
+void readwriteCommand(client *c) {
+ c->flags &= ~CLIENT_READONLY;
addReply(c,shared.ok);
}
@@ -4568,21 +5404,29 @@ void readwriteCommand(redisClient *c) {
* On success the function returns the node that is able to serve the request.
* If the node is not 'myself' a redirection must be perfomed. The kind of
* redirection is specified setting the integer passed by reference
- * 'error_code', which will be set to REDIS_CLUSTER_REDIR_ASK or
- * REDIS_CLUSTER_REDIR_MOVED.
+ * 'error_code', which will be set to CLUSTER_REDIR_ASK or
+ * CLUSTER_REDIR_MOVED.
*
- * When the node is 'myself' 'error_code' is set to REDIS_CLUSTER_REDIR_NONE.
+ * When the node is 'myself' 'error_code' is set to CLUSTER_REDIR_NONE.
*
* If the command fails NULL is returned, and the reason of the failure is
* provided via 'error_code', which will be set to:
*
- * REDIS_CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that
+ * CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that
* don't belong to the same hash slot.
*
- * REDIS_CLUSTER_REDIR_UNSTABLE if the request contains mutliple keys
+ * CLUSTER_REDIR_UNSTABLE if the request contains multiple keys
* belonging to the same slot, but the slot is not stable (in migration or
- * importing state, likely because a resharding is in progress). */
-clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
+ * importing state, likely because a resharding is in progress).
+ *
+ * CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is
+ * not bound to any node. In this case the cluster global state should be
+ * already "down" but it is fragile to rely on the update of the global state,
+ * so we also handle it here.
+ *
+ * CLUSTER_REDIR_DOWN_STATE if the cluster is down but the user attempts to
+ * execute a command that addresses one or more keys. */
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
clusterNode *n = NULL;
robj *firstkey = NULL;
int multiple_keys = 0;
@@ -4591,14 +5435,14 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0;
/* Set error code optimistically for the base case. */
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_NONE;
+ if (error_code) *error_code = CLUSTER_REDIR_NONE;
/* We handle all the cases as if they were EXEC commands, so we have
* a common code path for everything */
if (cmd->proc == execCommand) {
- /* If REDIS_MULTI flag is not set EXEC is just going to return an
+ /* If CLIENT_MULTI flag is not set EXEC is just going to return an
* error. */
- if (!(c->flags & REDIS_MULTI)) return myself;
+ if (!(c->flags & CLIENT_MULTI)) return myself;
ms = &c->mstate;
} else {
/* In order to have a single codepath create a fake Multi State
@@ -4635,7 +5479,18 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
firstkey = thiskey;
slot = thisslot;
n = server.cluster->slots[slot];
- redisAssertWithInfo(c,firstkey,n != NULL);
+
+ /* Error: If a slot is not served, we are in "cluster down"
+ * state. However the state is yet to be updated, so this was
+ * not trapped earlier in processCommand(). Report the same
+ * error to the client. */
+ if (n == NULL) {
+ getKeysFreeResult(keyindex);
+ if (error_code)
+ *error_code = CLUSTER_REDIR_DOWN_UNBOUND;
+ return NULL;
+ }
+
/* If we are migrating or importing this slot, we need to check
* if we have all the keys in the request (the only way we
* can safely serve the request, otherwise we return a TRYAGAIN
@@ -4656,7 +5511,7 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Error: multiple keys from different slots. */
getKeysFreeResult(keyindex);
if (error_code)
- *error_code = REDIS_CLUSTER_REDIR_CROSS_SLOT;
+ *error_code = CLUSTER_REDIR_CROSS_SLOT;
return NULL;
} else {
/* Flag this request as one with multiple different
@@ -4677,19 +5532,28 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
}
/* No key at all in command? then we can serve the request
- * without redirections or errors. */
+ * without redirections or errors in all the cases. */
if (n == NULL) return myself;
+ /* Cluster is globally down but we got keys? We can't serve the request. */
+ if (server.cluster->state != CLUSTER_OK) {
+ if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
+ return NULL;
+ }
+
/* Return the hashslot by reference. */
if (hashslot) *hashslot = slot;
- /* This request is about a slot we are migrating into another instance?
- * Then if we have all the keys. */
+ /* MIGRATE always works in the context of the local node if the slot
+ * is open (migrating or importing state). We need to be able to freely
+ * move keys among instances in this case. */
+ if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand)
+ return myself;
/* If we don't have all the keys and we are migrating the slot, send
* an ASK redirection. */
if (migrating_slot && missing_keys) {
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_ASK;
+ if (error_code) *error_code = CLUSTER_REDIR_ASK;
return server.cluster->migrating_slots_to[slot];
}
@@ -4698,10 +5562,10 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
* involves multiple keys and we don't have them all, the only option is
* to send a TRYAGAIN error. */
if (importing_slot &&
- (c->flags & REDIS_ASKING || cmd->flags & REDIS_CMD_ASKING))
+ (c->flags & CLIENT_ASKING || cmd->flags & CMD_ASKING))
{
if (multiple_keys && missing_keys) {
- if (error_code) *error_code = REDIS_CLUSTER_REDIR_UNSTABLE;
+ if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE;
return NULL;
} else {
return myself;
@@ -4711,8 +5575,9 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Handle the read-only client case reading from a slave: if this
* node is a slave and the request is about an hash slot our master
* is serving, we can reply without redirection. */
- if (c->flags & REDIS_READONLY &&
- cmd->flags & REDIS_CMD_READONLY &&
+ if (c->flags & CLIENT_READONLY &&
+ (cmd->flags & CMD_READONLY || cmd->proc == evalCommand ||
+ cmd->proc == evalShaCommand) &&
nodeIsSlave(myself) &&
myself->slaveof == n)
{
@@ -4721,6 +5586,92 @@ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **arg
/* Base case: just return the right node. However if this node is not
* myself, set error_code to MOVED since we need to issue a rediretion. */
- if (n != myself && error_code) *error_code = REDIS_CLUSTER_REDIR_MOVED;
+ if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED;
return n;
}
+
+/* Send the client the right redirection code, according to error_code
+ * that should be set to one of CLUSTER_REDIR_* macros.
+ *
+ * If CLUSTER_REDIR_ASK or CLUSTER_REDIR_MOVED error codes
+ * are used, then the node 'n' should not be NULL, but should be the
+ * node we want to mention in the redirection. Moreover hashslot should
+ * be set to the hash slot that caused the redirection. */
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) {
+ if (error_code == CLUSTER_REDIR_CROSS_SLOT) {
+ addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
+ } else if (error_code == CLUSTER_REDIR_UNSTABLE) {
+ /* The request spawns multiple keys in the same slot,
+ * but the slot is not "stable" currently as there is
+ * a migration or import in progress. */
+ addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
+ } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n"));
+ } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
+ addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n"));
+ } else if (error_code == CLUSTER_REDIR_MOVED ||
+ error_code == CLUSTER_REDIR_ASK)
+ {
+ addReplySds(c,sdscatprintf(sdsempty(),
+ "-%s %d %s:%d\r\n",
+ (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
+ hashslot,n->ip,n->port));
+ } else {
+ serverPanic("getNodeByQuery() unknown error.");
+ }
+}
+
+/* This function is called by the function processing clients incrementally
+ * to detect timeouts, in order to handle the following case:
+ *
+ * 1) A client blocks with BLPOP or similar blocking operation.
+ * 2) The master migrates the hash slot elsewhere or turns into a slave.
+ * 3) The client may remain blocked forever (or up to the max timeout time)
+ * waiting for a key change that will never happen.
+ *
+ * If the client is found to be blocked into an hash slot this node no
+ * longer handles, the client is sent a redirection error, and the function
+ * returns 1. Otherwise 0 is returned and no operation is performed. */
+int clusterRedirectBlockedClientIfNeeded(client *c) {
+ if (c->flags & CLIENT_BLOCKED &&
+ (c->btype == BLOCKED_LIST ||
+ c->btype == BLOCKED_ZSET ||
+ c->btype == BLOCKED_STREAM))
+ {
+ dictEntry *de;
+ dictIterator *di;
+
+ /* If the cluster is down, unblock the client with the right error. */
+ if (server.cluster->state == CLUSTER_FAIL) {
+ clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE);
+ return 1;
+ }
+
+ /* All keys must belong to the same slot, so check first key only. */
+ di = dictGetIterator(c->bpop.keys);
+ if ((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr));
+ clusterNode *node = server.cluster->slots[slot];
+
+ /* We send an error and unblock the client if:
+ * 1) The slot is unassigned, emitting a cluster down error.
+ * 2) The slot is not handled by this node, nor being imported. */
+ if (node != myself &&
+ server.cluster->importing_slots_from[slot] == NULL)
+ {
+ if (node == NULL) {
+ clusterRedirectClient(c,NULL,0,
+ CLUSTER_REDIR_DOWN_UNBOUND);
+ } else {
+ clusterRedirectClient(c,node,slot,
+ CLUSTER_REDIR_MOVED);
+ }
+ dictReleaseIterator(di);
+ return 1;
+ }
+ }
+ dictReleaseIterator(di);
+ }
+ return 0;
+}
diff --git a/src/cluster.h b/src/cluster.h
index ef5caf0d6..6f9954d24 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -1,35 +1,39 @@
-#ifndef __REDIS_CLUSTER_H
-#define __REDIS_CLUSTER_H
+#ifndef __CLUSTER_H
+#define __CLUSTER_H
/*-----------------------------------------------------------------------------
* Redis cluster data structures, defines, exported API.
*----------------------------------------------------------------------------*/
-#define REDIS_CLUSTER_SLOTS 16384
-#define REDIS_CLUSTER_OK 0 /* Everything looks ok */
-#define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */
-#define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */
-#define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
+#define CLUSTER_SLOTS 16384
+#define CLUSTER_OK 0 /* Everything looks ok */
+#define CLUSTER_FAIL 1 /* The cluster can't work */
+#define CLUSTER_NAMELEN 40 /* sha1 hex length */
+#define CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
/* The following defines are amount of time, sometimes expressed as
* multiplicators of the node timeout value (when ending with MULT). */
-#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15000
-#define REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
-#define REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1
-#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
-#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
-#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
-#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
-#define REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER 1
-#define REDIS_CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
-#define REDIS_CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_DEFAULT_NODE_TIMEOUT 15000
+#define CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
+#define CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1
+#define CLUSTER_DEFAULT_SLAVE_NO_FAILOVER 0 /* Failover by default. */
+#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
+#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
+#define CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
+#define CLUSTER_FAILOVER_DELAY 5 /* Seconds */
+#define CLUSTER_DEFAULT_MIGRATION_BARRIER 1
+#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
+#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
+#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
/* Redirection errors returned by getNodeByQuery(). */
-#define REDIS_CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
-#define REDIS_CLUSTER_REDIR_CROSS_SLOT 1 /* Keys in different slots. */
-#define REDIS_CLUSTER_REDIR_UNSTABLE 2 /* Keys in slot resharding. */
-#define REDIS_CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
-#define REDIS_CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define CLUSTER_REDIR_NONE 0 /* Node can serve the request. */
+#define CLUSTER_REDIR_CROSS_SLOT 1 /* -CROSSSLOT request. */
+#define CLUSTER_REDIR_UNSTABLE 2 /* -TRYAGAIN redirection required */
+#define CLUSTER_REDIR_ASK 3 /* -ASK redirection required. */
+#define CLUSTER_REDIR_MOVED 4 /* -MOVED redirection required. */
+#define CLUSTER_REDIR_DOWN_STATE 5 /* -CLUSTERDOWN, global state. */
+#define CLUSTER_REDIR_DOWN_UNBOUND 6 /* -CLUSTERDOWN, unbound slot. */
struct clusterNode;
@@ -43,32 +47,58 @@ typedef struct clusterLink {
} clusterLink;
/* Cluster node flags and macros. */
-#define REDIS_NODE_MASTER 1 /* The node is a master */
-#define REDIS_NODE_SLAVE 2 /* The node is a slave */
-#define REDIS_NODE_PFAIL 4 /* Failure? Need acknowledge */
-#define REDIS_NODE_FAIL 8 /* The node is believed to be malfunctioning */
-#define REDIS_NODE_MYSELF 16 /* This node is myself */
-#define REDIS_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
-#define REDIS_NODE_NOADDR 64 /* We don't know the address of this node */
-#define REDIS_NODE_MEET 128 /* Send a MEET message to this node */
-#define REDIS_NODE_PROMOTED 256 /* Master was a slave promoted by failover */
-#define REDIS_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
-
-#define nodeIsMaster(n) ((n)->flags & REDIS_NODE_MASTER)
-#define nodeIsSlave(n) ((n)->flags & REDIS_NODE_SLAVE)
-#define nodeInHandshake(n) ((n)->flags & REDIS_NODE_HANDSHAKE)
-#define nodeHasAddr(n) (!((n)->flags & REDIS_NODE_NOADDR))
-#define nodeWithoutAddr(n) ((n)->flags & REDIS_NODE_NOADDR)
-#define nodeTimedOut(n) ((n)->flags & REDIS_NODE_PFAIL)
-#define nodeFailed(n) ((n)->flags & REDIS_NODE_FAIL)
+#define CLUSTER_NODE_MASTER 1 /* The node is a master */
+#define CLUSTER_NODE_SLAVE 2 /* The node is a slave */
+#define CLUSTER_NODE_PFAIL 4 /* Failure? Need acknowledge */
+#define CLUSTER_NODE_FAIL 8 /* The node is believed to be malfunctioning */
+#define CLUSTER_NODE_MYSELF 16 /* This node is myself */
+#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
+#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
+#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
+#define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */
+#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failver. */
+#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
+
+#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
+#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
+#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
+#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
+#define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR)
+#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
+#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
+#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
/* Reasons why a slave is not able to failover. */
-#define REDIS_CLUSTER_CANT_FAILOVER_NONE 0
-#define REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE 1
-#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
-#define REDIS_CLUSTER_CANT_FAILOVER_EXPIRED 3
-#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
-#define REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */
+#define CLUSTER_CANT_FAILOVER_NONE 0
+#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
+#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
+#define CLUSTER_CANT_FAILOVER_EXPIRED 3
+#define CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */
+
+/* clusterState todo_before_sleep flags. */
+#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
+#define CLUSTER_TODO_UPDATE_STATE (1<<1)
+#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
+#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
+
+/* Message types.
+ *
+ * Note that the PING, PONG and MEET messages are actually the same exact
+ * kind of packet. PONG is the reply to ping, in the exact format as a PING,
+ * while MEET is a special PING that forces the receiver to add the sender
+ * as a node (if it is not already in the list). */
+#define CLUSTERMSG_TYPE_PING 0 /* Ping */
+#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
+#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
+#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
+#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
+#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */
+#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */
+#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */
+#define CLUSTERMSG_TYPE_MODULE 9 /* Module cluster API message. */
+#define CLUSTERMSG_TYPE_COUNT 10 /* Total number of message types. */
/* This structure represent elements of node->fail_reports. */
typedef struct clusterNodeFailReport {
@@ -78,22 +108,27 @@ typedef struct clusterNodeFailReport {
typedef struct clusterNode {
mstime_t ctime; /* Node object creation time. */
- char name[REDIS_CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
- int flags; /* REDIS_NODE_... */
+ char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
+ int flags; /* CLUSTER_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
- unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* slots handled by this node */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */
- struct clusterNode *slaveof; /* pointer to the master node */
+ struct clusterNode *slaveof; /* pointer to the master node. Note that it
+ may be NULL even if the node is a slave
+ if we don't have the master node in our
+ tables. */
mstime_t ping_sent; /* Unix time we sent latest ping */
mstime_t pong_received; /* Unix time we received the pong */
mstime_t fail_time; /* Unix time when FAIL flag was set */
mstime_t voted_time; /* Last time we voted for a slave of this master */
mstime_t repl_offset_time; /* Unix time we received offset for this node */
+ mstime_t orphaned_time; /* Starting time of orphaned master condition */
long long repl_offset; /* Last known repl offset for this node. */
- char ip[REDIS_IP_STR_LEN]; /* Latest known IP address of this node */
- int port; /* Latest known port of this node */
+ char ip[NET_IP_STR_LEN]; /* Latest known IP address of this node */
+ int port; /* Latest known clients port of this node */
+ int cport; /* Latest known cluster port of this node. */
clusterLink *link; /* TCP/IP link with this node */
list *fail_reports; /* List of nodes signaling this as failing */
} clusterNode;
@@ -101,14 +136,15 @@ typedef struct clusterNode {
typedef struct clusterState {
clusterNode *myself; /* This node */
uint64_t currentEpoch;
- int state; /* REDIS_CLUSTER_OK, REDIS_CLUSTER_FAIL, ... */
+ int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */
int size; /* Num of master nodes with at least one slot */
dict *nodes; /* Hash table of name -> clusterNode structures */
dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
- clusterNode *migrating_slots_to[REDIS_CLUSTER_SLOTS];
- clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
- clusterNode *slots[REDIS_CLUSTER_SLOTS];
- zskiplist *slots_to_keys;
+ clusterNode *migrating_slots_to[CLUSTER_SLOTS];
+ clusterNode *importing_slots_from[CLUSTER_SLOTS];
+ clusterNode *slots[CLUSTER_SLOTS];
+ uint64_t slots_keys_count[CLUSTER_SLOTS];
+ rax *slots_to_keys;
/* The following fields are used to take the slave state on elections. */
mstime_t failover_auth_time; /* Time of previous or next election. */
int failover_auth_count; /* Number of votes received so far. */
@@ -130,65 +166,52 @@ typedef struct clusterState {
/* The followign fields are used by masters to take state on elections. */
uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */
int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
- long long stats_bus_messages_sent; /* Num of msg sent via cluster bus. */
- long long stats_bus_messages_received; /* Num of msg rcvd via cluster bus.*/
+ /* Messages received and sent by type. */
+ long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
+ long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
+ long long stats_pfail_nodes; /* Number of nodes in PFAIL status,
+ excluding nodes without address. */
} clusterState;
-/* clusterState todo_before_sleep flags. */
-#define CLUSTER_TODO_HANDLE_FAILOVER (1<<0)
-#define CLUSTER_TODO_UPDATE_STATE (1<<1)
-#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
-#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
-
/* Redis cluster messages header */
-/* Note that the PING, PONG and MEET messages are actually the same exact
- * kind of packet. PONG is the reply to ping, in the exact format as a PING,
- * while MEET is a special PING that forces the receiver to add the sender
- * as a node (if it is not already in the list). */
-#define CLUSTERMSG_TYPE_PING 0 /* Ping */
-#define CLUSTERMSG_TYPE_PONG 1 /* Pong (reply to Ping) */
-#define CLUSTERMSG_TYPE_MEET 2 /* Meet "let's join" message */
-#define CLUSTERMSG_TYPE_FAIL 3 /* Mark node xxx as failing */
-#define CLUSTERMSG_TYPE_PUBLISH 4 /* Pub/Sub Publish propagation */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
-#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6 /* Yes, you have my vote */
-#define CLUSTERMSG_TYPE_UPDATE 7 /* Another node slots configuration */
-#define CLUSTERMSG_TYPE_MFSTART 8 /* Pause clients for manual failover */
-
/* Initially we don't know our "name", but we'll find it once we connect
* to the first node, using the getsockname() function. Then we'll use this
* address for all the next messages. */
typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
+ char nodename[CLUSTER_NAMELEN];
uint32_t ping_sent;
uint32_t pong_received;
- char ip[REDIS_IP_STR_LEN]; /* IP address last time it was seen */
- uint16_t port; /* port last time it was seen */
+ char ip[NET_IP_STR_LEN]; /* IP address last time it was seen */
+ uint16_t port; /* base port last time it was seen */
+ uint16_t cport; /* cluster port last time it was seen */
uint16_t flags; /* node->flags copy */
- uint16_t notused1; /* Some room for future improvements. */
- uint32_t notused2;
+ uint32_t notused1;
} clusterMsgDataGossip;
typedef struct {
- char nodename[REDIS_CLUSTER_NAMELEN];
+ char nodename[CLUSTER_NAMELEN];
} clusterMsgDataFail;
typedef struct {
uint32_t channel_len;
uint32_t message_len;
- /* We can't reclare bulk_data as bulk_data[] since this structure is
- * nested. The 8 bytes are removed from the count during the message
- * length computation. */
- unsigned char bulk_data[8];
+ unsigned char bulk_data[8]; /* 8 bytes just as placeholder. */
} clusterMsgDataPublish;
typedef struct {
uint64_t configEpoch; /* Config epoch of the specified instance. */
- char nodename[REDIS_CLUSTER_NAMELEN]; /* Name of the slots owner. */
- unsigned char slots[REDIS_CLUSTER_SLOTS/8]; /* Slots bitmap. */
+ char nodename[CLUSTER_NAMELEN]; /* Name of the slots owner. */
+ unsigned char slots[CLUSTER_SLOTS/8]; /* Slots bitmap. */
} clusterMsgDataUpdate;
+typedef struct {
+ uint64_t module_id; /* ID of the sender module. */
+ uint32_t len; /* ID of the sender module. */
+ uint8_t type; /* Type from 0 to 255. */
+ unsigned char bulk_data[3]; /* 3 bytes just as placeholder. */
+} clusterMsgModule;
+
union clusterMsgData {
/* PING, MEET and PONG */
struct {
@@ -210,15 +233,20 @@ union clusterMsgData {
struct {
clusterMsgDataUpdate nodecfg;
} update;
+
+ /* MODULE */
+ struct {
+ clusterMsgModule msg;
+ } module;
};
-#define CLUSTER_PROTO_VER 0 /* Cluster bus protocol version. */
+#define CLUSTER_PROTO_VER 1 /* Cluster bus protocol version. */
typedef struct {
- char sig[4]; /* Siganture "RCmb" (Redis Cluster message bus). */
+ char sig[4]; /* Signature "RCmb" (Redis Cluster message bus). */
uint32_t totlen; /* Total length of this message */
- uint16_t ver; /* Protocol version, currently set to 0. */
- uint16_t notused0; /* 2 bytes not used. */
+ uint16_t ver; /* Protocol version, currently set to 1. */
+ uint16_t port; /* TCP base port number. */
uint16_t type; /* Message type */
uint16_t count; /* Only used for some kind of messages. */
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
@@ -227,12 +255,13 @@ typedef struct {
slave. */
uint64_t offset; /* Master replication offset if node is a master or
processed replication offset if node is a slave. */
- char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */
- unsigned char myslots[REDIS_CLUSTER_SLOTS/8];
- char slaveof[REDIS_CLUSTER_NAMELEN];
- char notused1[32]; /* 32 bytes reserved for future usage. */
- uint16_t port; /* Sender TCP base port */
- uint16_t flags; /* Sender node flags */
+ char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
+ unsigned char myslots[CLUSTER_SLOTS/8];
+ char slaveof[CLUSTER_NAMELEN];
+ char myip[NET_IP_STR_LEN]; /* Sender IP, if not all zeroed. */
+ char notused1[34]; /* 34 bytes reserved for future usage. */
+ uint16_t cport; /* Sender TCP cluster bus port */
+ uint16_t flags; /* Sender node flags */
unsigned char state; /* Cluster state from the POV of the sender */
unsigned char mflags[3]; /* Message flags: CLUSTERMSG_FLAG[012]_... */
union clusterMsgData data;
@@ -247,6 +276,8 @@ typedef struct {
master is up. */
/* ---------------------- API exported outside cluster.c -------------------- */
-clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask);
+int clusterRedirectBlockedClientIfNeeded(client *c);
+void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
-#endif /* __REDIS_CLUSTER_H */
+#endif /* __CLUSTER_H */
diff --git a/src/config.c b/src/config.c
index 8255a56b7..7bd9592b2 100644
--- a/src/config.c
+++ b/src/config.c
@@ -28,16 +28,34 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include <fcntl.h>
#include <sys/stat.h>
-static struct {
- const char *name;
- const int value;
-} validSyslogFacilities[] = {
+/*-----------------------------------------------------------------------------
+ * Config file name-value maps.
+ *----------------------------------------------------------------------------*/
+
+typedef struct configEnum {
+ const char *name;
+ const int val;
+} configEnum;
+
+configEnum maxmemory_policy_enum[] = {
+ {"volatile-lru", MAXMEMORY_VOLATILE_LRU},
+ {"volatile-lfu", MAXMEMORY_VOLATILE_LFU},
+ {"volatile-random",MAXMEMORY_VOLATILE_RANDOM},
+ {"volatile-ttl",MAXMEMORY_VOLATILE_TTL},
+ {"allkeys-lru",MAXMEMORY_ALLKEYS_LRU},
+ {"allkeys-lfu",MAXMEMORY_ALLKEYS_LFU},
+ {"allkeys-random",MAXMEMORY_ALLKEYS_RANDOM},
+ {"noeviction",MAXMEMORY_NO_EVICTION},
+ {NULL, 0}
+};
+
+configEnum syslog_facility_enum[] = {
{"user", LOG_USER},
{"local0", LOG_LOCAL0},
{"local1", LOG_LOCAL1},
@@ -50,17 +68,73 @@ static struct {
{NULL, 0}
};
-clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT] = {
+configEnum loglevel_enum[] = {
+ {"debug", LL_DEBUG},
+ {"verbose", LL_VERBOSE},
+ {"notice", LL_NOTICE},
+ {"warning", LL_WARNING},
+ {NULL,0}
+};
+
+configEnum supervised_mode_enum[] = {
+ {"upstart", SUPERVISED_UPSTART},
+ {"systemd", SUPERVISED_SYSTEMD},
+ {"auto", SUPERVISED_AUTODETECT},
+ {"no", SUPERVISED_NONE},
+ {NULL, 0}
+};
+
+configEnum aof_fsync_enum[] = {
+ {"everysec", AOF_FSYNC_EVERYSEC},
+ {"always", AOF_FSYNC_ALWAYS},
+ {"no", AOF_FSYNC_NO},
+ {NULL, 0}
+};
+
+/* Output buffer limits presets. */
+clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
{0, 0, 0}, /* normal */
{1024*1024*256, 1024*1024*64, 60}, /* slave */
{1024*1024*32, 1024*1024*8, 60} /* pubsub */
};
/*-----------------------------------------------------------------------------
- * Config file parsing
+ * Enum access functions
*----------------------------------------------------------------------------*/
-int supervisedToMode(const char *str);
+/* Get enum value from name. If there is no match INT_MIN is returned. */
+int configEnumGetValue(configEnum *ce, char *name) {
+ while(ce->name != NULL) {
+ if (!strcasecmp(ce->name,name)) return ce->val;
+ ce++;
+ }
+ return INT_MIN;
+}
+
+/* Get enum name from value. If no match is found NULL is returned. */
+const char *configEnumGetName(configEnum *ce, int val) {
+ while(ce->name != NULL) {
+ if (ce->val == val) return ce->name;
+ ce++;
+ }
+ return NULL;
+}
+
+/* Wrapper for configEnumGetName() returning "unknown" insetad of NULL if
+ * there is no match. */
+const char *configEnumGetNameOrUnknown(configEnum *ce, int val) {
+ const char *name = configEnumGetName(ce,val);
+ return name ? name : "unknown";
+}
+
+/* Used for INFO generation. */
+const char *evictPolicyToString(void) {
+ return configEnumGetNameOrUnknown(maxmemory_policy_enum,server.maxmemory_policy);
+}
+
+/*-----------------------------------------------------------------------------
+ * Config file parsing
+ *----------------------------------------------------------------------------*/
int yesnotoi(char *s) {
if (!strcasecmp(s,"yes")) return 1;
@@ -81,6 +155,20 @@ void resetServerSaveParams(void) {
server.saveparamslen = 0;
}
+void queueLoadModule(sds path, sds *argv, int argc) {
+ int i;
+ struct moduleLoadQueueEntry *loadmod;
+
+ loadmod = zmalloc(sizeof(struct moduleLoadQueueEntry));
+ loadmod->argv = zmalloc(sizeof(robj*)*argc);
+ loadmod->path = sdsnew(path);
+ loadmod->argc = argc;
+ for (i = 0; i < argc; i++) {
+ loadmod->argv[i] = createRawStringObject(argv[i],sdslen(argv[i]));
+ }
+ listAddNodeTail(server.loadmodule_queue,loadmod);
+}
+
void loadServerConfigFromString(char *config) {
char *err = NULL;
int linenum = 0, totlines, i;
@@ -124,6 +212,10 @@ void loadServerConfigFromString(char *config) {
if (server.tcpkeepalive < 0) {
err = "Invalid tcp-keepalive value"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"protected-mode") && argc == 2) {
+ if ((server.protected_mode = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"port") && argc == 2) {
server.port = atoi(argv[1]);
if (server.port < 0 || server.port > 65535) {
@@ -137,7 +229,7 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"bind") && argc >= 2) {
int j, addresses = argc-1;
- if (addresses > REDIS_BINDADDR_MAX) {
+ if (addresses > CONFIG_BINDADDR_MAX) {
err = "Too many bind addresses specified"; goto loaderr;
}
for (j = 0; j < addresses; j++)
@@ -164,17 +256,15 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"dir") && argc == 2) {
if (chdir(argv[1]) == -1) {
- redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
+ serverLog(LL_WARNING,"Can't chdir to '%s': %s",
argv[1], strerror(errno));
exit(1);
}
} else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
- if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
- else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
- else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
- else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
- else {
- err = "Invalid log level. Must be one of debug, notice, warning";
+ server.verbosity = configEnumGetValue(loglevel_enum,argv[1]);
+ if (server.verbosity == INT_MIN) {
+ err = "Invalid log level. "
+ "Must be one of debug, verbose, notice, warning";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
@@ -193,6 +283,10 @@ void loadServerConfigFromString(char *config) {
}
fclose(logfp);
}
+ } else if (!strcasecmp(argv[0],"always-show-logo") && argc == 2) {
+ if ((server.always_show_logo = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"syslog-enabled") && argc == 2) {
if ((server.syslog_enabled = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@@ -201,16 +295,9 @@ void loadServerConfigFromString(char *config) {
if (server.syslog_ident) zfree(server.syslog_ident);
server.syslog_ident = zstrdup(argv[1]);
} else if (!strcasecmp(argv[0],"syslog-facility") && argc == 2) {
- int i;
-
- for (i = 0; validSyslogFacilities[i].name; i++) {
- if (!strcasecmp(validSyslogFacilities[i].name, argv[1])) {
- server.syslog_facility = validSyslogFacilities[i].value;
- break;
- }
- }
-
- if (!validSyslogFacilities[i].name) {
+ server.syslog_facility =
+ configEnumGetValue(syslog_facility_enum,argv[1]);
+ if (server.syslog_facility == INT_MIN) {
err = "Invalid log facility. Must be one of USER or between LOCAL0-LOCAL7";
goto loaderr;
}
@@ -229,19 +316,9 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
server.maxmemory = memtoll(argv[1],NULL);
} else if (!strcasecmp(argv[0],"maxmemory-policy") && argc == 2) {
- if (!strcasecmp(argv[1],"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(argv[1],"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(argv[1],"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(argv[1],"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(argv[1],"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(argv[1],"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
+ server.maxmemory_policy =
+ configEnumGetValue(maxmemory_policy_enum,argv[1]);
+ if (server.maxmemory_policy == INT_MIN) {
err = "Invalid maxmemory policy";
goto loaderr;
}
@@ -251,11 +328,27 @@ void loadServerConfigFromString(char *config) {
err = "maxmemory-samples must be 1 or greater";
goto loaderr;
}
+ } else if ((!strcasecmp(argv[0],"proto-max-bulk-len")) && argc == 2) {
+ server.proto_max_bulk_len = memtoll(argv[1],NULL);
+ } else if ((!strcasecmp(argv[0],"client-query-buffer-limit")) && argc == 2) {
+ server.client_max_querybuf_len = memtoll(argv[1],NULL);
+ } else if (!strcasecmp(argv[0],"lfu-log-factor") && argc == 2) {
+ server.lfu_log_factor = atoi(argv[1]);
+ if (server.lfu_log_factor < 0) {
+ err = "lfu-log-factor must be 0 or greater";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lfu-decay-time") && argc == 2) {
+ server.lfu_decay_time = atoi(argv[1]);
+ if (server.lfu_decay_time < 0) {
+ err = "lfu-decay-time must be 0 or greater";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
slaveof_linenum = linenum;
server.masterhost = sdsnew(argv[1]);
server.masterport = atoi(argv[2]);
- server.repl_state = REDIS_REPL_CONNECT;
+ server.repl_state = REPL_STATE_CONNECT;
} else if (!strcasecmp(argv[0],"repl-ping-slave-period") && argc == 2) {
server.repl_ping_slave_period = atoi(argv[1]);
if (server.repl_ping_slave_period <= 0) {
@@ -296,7 +389,8 @@ void loadServerConfigFromString(char *config) {
goto loaderr;
}
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
- server.masterauth = zstrdup(argv[1]);
+ zfree(server.masterauth);
+ server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"slave-serve-stale-data") && argc == 2) {
if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
@@ -317,21 +411,50 @@ void loadServerConfigFromString(char *config) {
if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-eviction") && argc == 2) {
+ if ((server.lazyfree_lazy_eviction = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-expire") && argc == 2) {
+ if ((server.lazyfree_lazy_expire = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"lazyfree-lazy-server-del") && argc == 2){
+ if ((server.lazyfree_lazy_server_del = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"slave-lazy-flush") && argc == 2) {
+ if ((server.repl_slave_lazy_flush = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"activedefrag") && argc == 2) {
+ if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
+ if (server.active_defrag_enabled) {
+#ifndef HAVE_DEFRAG
+ err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr;
+#endif
+ }
} else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
if ((server.daemonize = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"dynamic-hz") && argc == 2) {
+ if ((server.dynamic_hz = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"hz") && argc == 2) {
- server.hz = atoi(argv[1]);
- if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
- if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
+ server.config_hz = atoi(argv[1]);
+ if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ;
+ if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ;
} else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
int yes;
if ((yes = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
- server.aof_state = yes ? REDIS_AOF_ON : REDIS_AOF_OFF;
+ server.aof_state = yes ? AOF_ON : AOF_OFF;
} else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
if (!pathIsBaseName(argv[1])) {
err = "appendfilename can't be a path, just a filename";
@@ -345,13 +468,8 @@ void loadServerConfigFromString(char *config) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
} else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
- if (!strcasecmp(argv[1],"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(argv[1],"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else if (!strcasecmp(argv[1],"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else {
+ server.aof_fsync = configEnumGetValue(aof_fsync_enum,argv[1]);
+ if (server.aof_fsync == INT_MIN) {
err = "argument must be 'no', 'always' or 'everysec'";
goto loaderr;
}
@@ -374,16 +492,27 @@ void loadServerConfigFromString(char *config) {
yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"rdb-save-incremental-fsync") &&
+ argc == 2)
+ {
+ if ((server.rdb_save_incremental_fsync =
+ yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) {
if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) {
err = "argument must be 'yes' or 'no'"; goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"aof-use-rdb-preamble") && argc == 2) {
+ if ((server.aof_use_rdb_preamble = yesnotoi(argv[1])) == -1) {
+ err = "argument must be 'yes' or 'no'"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
- if (strlen(argv[1]) > REDIS_AUTHPASS_MAX_LEN) {
- err = "Password is longer than REDIS_AUTHPASS_MAX_LEN";
+ if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) {
+ err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
goto loaderr;
}
- server.requirepass = zstrdup(argv[1]);
+ server.requirepass = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
zfree(server.pidfile);
server.pidfile = zstrdup(argv[1]);
@@ -394,10 +523,52 @@ void loadServerConfigFromString(char *config) {
}
zfree(server.rdb_filename);
server.rdb_filename = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"active-defrag-threshold-lower") && argc == 2) {
+ server.active_defrag_threshold_lower = atoi(argv[1]);
+ if (server.active_defrag_threshold_lower < 0 ||
+ server.active_defrag_threshold_lower > 1000) {
+ err = "active-defrag-threshold-lower must be between 0 and 1000";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-threshold-upper") && argc == 2) {
+ server.active_defrag_threshold_upper = atoi(argv[1]);
+ if (server.active_defrag_threshold_upper < 0 ||
+ server.active_defrag_threshold_upper > 1000) {
+ err = "active-defrag-threshold-upper must be between 0 and 1000";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-ignore-bytes") && argc == 2) {
+ server.active_defrag_ignore_bytes = memtoll(argv[1], NULL);
+ if (server.active_defrag_ignore_bytes <= 0) {
+ err = "active-defrag-ignore-bytes must above 0";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-cycle-min") && argc == 2) {
+ server.active_defrag_cycle_min = atoi(argv[1]);
+ if (server.active_defrag_cycle_min < 1 || server.active_defrag_cycle_min > 99) {
+ err = "active-defrag-cycle-min must be between 1 and 99";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-cycle-max") && argc == 2) {
+ server.active_defrag_cycle_max = atoi(argv[1]);
+ if (server.active_defrag_cycle_max < 1 || server.active_defrag_cycle_max > 99) {
+ err = "active-defrag-cycle-max must be between 1 and 99";
+ goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"active-defrag-max-scan-fields") && argc == 2) {
+ server.active_defrag_max_scan_fields = strtoll(argv[1],NULL,10);
+ if (server.active_defrag_max_scan_fields < 1) {
+ err = "active-defrag-max-scan-fields must be positive";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) {
server.hash_max_ziplist_entries = memtoll(argv[1], NULL);
} else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) {
server.hash_max_ziplist_value = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"stream-node-max-bytes") && argc == 2) {
+ server.stream_node_max_bytes = memtoll(argv[1], NULL);
+ } else if (!strcasecmp(argv[0],"stream-node-max-entries") && argc == 2) {
+ server.stream_node_max_entries = atoi(argv[1]);
} else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){
/* DEAD OPTION */
} else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) {
@@ -426,7 +597,7 @@ void loadServerConfigFromString(char *config) {
/* If the target command name is the empty string we just
* remove it from the command table. */
retval = dictDelete(server.commands, argv[1]);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
/* Otherwise we re-add the command under a different name. */
if (sdslen(argv[2]) != 0) {
@@ -445,6 +616,25 @@ void loadServerConfigFromString(char *config) {
} else if (!strcasecmp(argv[0],"cluster-config-file") && argc == 2) {
zfree(server.cluster_configfile);
server.cluster_configfile = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"cluster-announce-ip") && argc == 2) {
+ zfree(server.cluster_announce_ip);
+ server.cluster_announce_ip = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"cluster-announce-port") && argc == 2) {
+ server.cluster_announce_port = atoi(argv[1]);
+ if (server.cluster_announce_port < 0 ||
+ server.cluster_announce_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"cluster-announce-bus-port") &&
+ argc == 2)
+ {
+ server.cluster_announce_bus_port = atoi(argv[1]);
+ if (server.cluster_announce_bus_port < 0 ||
+ server.cluster_announce_bus_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"cluster-require-full-coverage") &&
argc == 2)
{
@@ -473,6 +663,14 @@ void loadServerConfigFromString(char *config) {
err = "cluster slave validity factor must be zero or positive";
goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"cluster-slave-no-failover") &&
+ argc == 2)
+ {
+ server.cluster_slave_no_failover = yesnotoi(argv[1]);
+ if (server.cluster_slave_no_failover == -1) {
+ err = "argument must be 'yes' or 'no'";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) {
server.lua_time_limit = strtoll(argv[1],NULL,10);
} else if (!strcasecmp(argv[0],"slowlog-log-slower-than") &&
@@ -496,8 +694,9 @@ void loadServerConfigFromString(char *config) {
unsigned long long hard, soft;
int soft_seconds;
- if (class == -1) {
- err = "Unrecognized client limit class";
+ if (class == -1 || class == CLIENT_TYPE_MASTER) {
+ err = "Unrecognized client limit class: the user specified "
+ "an invalid one, or 'master' which has no buffer limits.";
goto loaderr;
}
hard = memtoll(argv[2],NULL);
@@ -517,6 +716,16 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"slave-priority") && argc == 2) {
server.slave_priority = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"slave-announce-ip") && argc == 2) {
+ zfree(server.slave_announce_ip);
+ server.slave_announce_ip = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"slave-announce-port") && argc == 2) {
+ server.slave_announce_port = atoi(argv[1]);
+ if (server.slave_announce_port < 0 ||
+ server.slave_announce_port > 65535)
+ {
+ err = "Invalid port"; goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"min-slaves-to-write") && argc == 2) {
server.repl_min_slaves_to_write = atoi(argv[1]);
if (server.repl_min_slaves_to_write < 0) {
@@ -536,14 +745,16 @@ void loadServerConfigFromString(char *config) {
}
server.notify_keyspace_events = flags;
} else if (!strcasecmp(argv[0],"supervised") && argc == 2) {
- int mode = supervisedToMode(argv[1]);
+ server.supervised_mode =
+ configEnumGetValue(supervised_mode_enum,argv[1]);
- if (mode == -1) {
+ if (server.supervised_mode == INT_MIN) {
err = "Invalid option for 'supervised'. "
"Allowed values: 'upstart', 'systemd', 'auto', or 'no'";
goto loaderr;
}
- server.supervised_mode = mode;
+ } else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) {
+ queueLoadModule(argv[1],&argv[2],argc-2);
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@@ -589,7 +800,7 @@ loaderr:
* just load a string. */
void loadServerConfig(char *filename, char *options) {
sds config = sdsempty();
- char buf[REDIS_CONFIGLINE_MAX+1];
+ char buf[CONFIG_MAX_LINE+1];
/* Load the file content */
if (filename) {
@@ -599,12 +810,12 @@ void loadServerConfig(char *filename, char *options) {
fp = stdin;
} else {
if ((fp = fopen(filename,"r")) == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Fatal error, can't open config file '%s'", filename);
exit(1);
}
}
- while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL)
+ while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL)
config = sdscat(config,buf);
if (fp != stdin) fclose(fp);
}
@@ -621,41 +832,68 @@ void loadServerConfig(char *filename, char *options) {
* CONFIG SET implementation
*----------------------------------------------------------------------------*/
-void configSetCommand(redisClient *c) {
+#define config_set_bool_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int yn = yesnotoi(o->ptr); \
+ if (yn == -1) goto badfmt; \
+ _var = yn;
+
+#define config_set_numerical_field(_name,_var,min,max) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ if (getLongLongFromObject(o,&ll) == C_ERR) goto badfmt; \
+ if (min != LLONG_MIN && ll < min) goto badfmt; \
+ if (max != LLONG_MAX && ll > max) goto badfmt; \
+ _var = ll;
+
+#define config_set_memory_field(_name,_var) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ ll = memtoll(o->ptr,&err); \
+ if (err || ll < 0) goto badfmt; \
+ _var = ll;
+
+#define config_set_enum_field(_name,_var,_enumvar) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) { \
+ int enumval = configEnumGetValue(_enumvar,o->ptr); \
+ if (enumval == INT_MIN) goto badfmt; \
+ _var = enumval;
+
+#define config_set_special_field(_name) \
+ } else if (!strcasecmp(c->argv[2]->ptr,_name)) {
+
+#define config_set_else } else
+
+void configSetCommand(client *c) {
robj *o;
long long ll;
- redisAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
- redisAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
+ int err;
+ serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
+ serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
- if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
+ if (0) { /* this starts the config_set macros else-if chain. */
+
+ /* Special fields that can't be handled with general macros. */
+ config_set_special_field("dbfilename") {
if (!pathIsBaseName(o->ptr)) {
addReplyError(c, "dbfilename can't be a path, just a filename");
return;
}
zfree(server.rdb_filename);
server.rdb_filename = zstrdup(o->ptr);
- } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
- if (sdslen(o->ptr) > REDIS_AUTHPASS_MAX_LEN) goto badfmt;
+ } config_set_special_field("requirepass") {
+ if (sdslen(o->ptr) > CONFIG_AUTHPASS_MAX_LEN) goto badfmt;
zfree(server.requirepass);
server.requirepass = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
+ } config_set_special_field("masterauth") {
zfree(server.masterauth);
server.masterauth = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.maxmemory = ll;
- if (server.maxmemory) {
- if (server.maxmemory < zmalloc_used_memory()) {
- redisLog(REDIS_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
- }
- freeMemoryIfNeeded();
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxclients")) {
+ } config_set_special_field("cluster-announce-ip") {
+ zfree(server.cluster_announce_ip);
+ server.cluster_announce_ip = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
+ } config_set_special_field("maxclients") {
int orig_value = server.maxclients;
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 1) goto badfmt;
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll < 1) goto badfmt;
/* Try to check if the OS is capable of supporting so many FDs. */
server.maxclients = ll;
@@ -667,10 +905,10 @@ void configSetCommand(redisClient *c) {
return;
}
if ((unsigned int) aeGetSetSize(server.el) <
- server.maxclients + REDIS_EVENTLOOP_FDSET_INCR)
+ server.maxclients + CONFIG_FDSET_INCR)
{
if (aeResizeSetSize(server.el,
- server.maxclients + REDIS_EVENTLOOP_FDSET_INCR) == AE_ERR)
+ server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
{
addReplyError(c,"The event loop API used by Redis is not able to handle the specified number of clients");
server.maxclients = orig_value;
@@ -678,84 +916,20 @@ void configSetCommand(redisClient *c) {
}
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hz")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hz = ll;
- if (server.hz < REDIS_MIN_HZ) server.hz = REDIS_MIN_HZ;
- if (server.hz > REDIS_MAX_HZ) server.hz = REDIS_MAX_HZ;
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-policy")) {
- if (!strcasecmp(o->ptr,"volatile-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU;
- } else if (!strcasecmp(o->ptr,"volatile-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_RANDOM;
- } else if (!strcasecmp(o->ptr,"volatile-ttl")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_TTL;
- } else if (!strcasecmp(o->ptr,"allkeys-lru")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_LRU;
- } else if (!strcasecmp(o->ptr,"allkeys-random")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_ALLKEYS_RANDOM;
- } else if (!strcasecmp(o->ptr,"noeviction")) {
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory-samples")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.maxmemory_samples = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > LONG_MAX) goto badfmt;
- server.maxidletime = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"tcp-keepalive")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0 || ll > INT_MAX) goto badfmt;
- server.tcpkeepalive = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
- if (!strcasecmp(o->ptr,"no")) {
- server.aof_fsync = AOF_FSYNC_NO;
- } else if (!strcasecmp(o->ptr,"everysec")) {
- server.aof_fsync = AOF_FSYNC_EVERYSEC;
- } else if (!strcasecmp(o->ptr,"always")) {
- server.aof_fsync = AOF_FSYNC_ALWAYS;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_no_fsync_on_rewrite = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
+ } config_set_special_field("appendonly") {
int enable = yesnotoi(o->ptr);
if (enable == -1) goto badfmt;
- if (enable == 0 && server.aof_state != REDIS_AOF_OFF) {
+ if (enable == 0 && server.aof_state != AOF_OFF) {
stopAppendOnly();
- } else if (enable && server.aof_state == REDIS_AOF_OFF) {
- if (startAppendOnly() == REDIS_ERR) {
+ } else if (enable && server.aof_state == AOF_OFF) {
+ if (startAppendOnly() == C_ERR) {
addReplyError(c,
"Unable to turn on AOF. Check server logs.");
return;
}
}
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-percentage")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_perc = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"auto-aof-rewrite-min-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.aof_rewrite_min_size = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"aof-rewrite-incremental-fsync")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_rewrite_incremental_fsync = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"aof-load-truncated")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.aof_load_truncated = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
+ } config_set_special_field("save") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -789,70 +963,12 @@ void configSetCommand(redisClient *c) {
appendServerSaveParams(seconds, changes);
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-serve-stale-data")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_serve_stale_data = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-read-only")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_slave_ro = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"dir")) {
+ } config_set_special_field("dir") {
if (chdir((char*)o->ptr) == -1) {
addReplyErrorFormat(c,"Changing directory: %s", strerror(errno));
return;
}
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hash-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hash_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-max-ziplist-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_max_ziplist_size = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"list-compress-depth")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.list_compress_depth = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"set-max-intset-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.set_max_intset_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-entries")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_entries = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.zset_max_ziplist_value = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.hll_sparse_max_bytes = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.lua_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-log-slower-than")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR) goto badfmt;
- server.slowlog_log_slower_than = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slowlog-max-len")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.slowlog_max_len = (unsigned)ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"latency-monitor-threshold")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.latency_monitor_threshold = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"loglevel")) {
- if (!strcasecmp(o->ptr,"warning")) {
- server.verbosity = REDIS_WARNING;
- } else if (!strcasecmp(o->ptr,"notice")) {
- server.verbosity = REDIS_NOTICE;
- } else if (!strcasecmp(o->ptr,"verbose")) {
- server.verbosity = REDIS_VERBOSE;
- } else if (!strcasecmp(o->ptr,"debug")) {
- server.verbosity = REDIS_DEBUG;
- } else {
- goto badfmt;
- }
- } else if (!strcasecmp(c->argv[2]->ptr,"client-output-buffer-limit")) {
+ } config_set_special_field("client-output-buffer-limit") {
int vlen, j;
sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
@@ -866,17 +982,17 @@ void configSetCommand(redisClient *c) {
* whole configuration string or accept it all, even if a single
* error in a single client class is present. */
for (j = 0; j < vlen; j++) {
- char *eptr;
long val;
if ((j % 4) == 0) {
- if (getClientTypeByName(v[j]) == -1) {
+ int class = getClientTypeByName(v[j]);
+ if (class == -1 || class == CLIENT_TYPE_MASTER) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
} else {
- val = strtoll(v[j], &eptr, 10);
- if (eptr[0] != '\0' || val < 0) {
+ val = memtoll(v[j], &err);
+ if (err || val < 0) {
sdsfreesplitres(v,vlen);
goto badfmt;
}
@@ -898,89 +1014,203 @@ void configSetCommand(redisClient *c) {
server.client_obuf_limits[class].soft_limit_seconds = soft_seconds;
}
sdsfreesplitres(v,vlen);
- } else if (!strcasecmp(c->argv[2]->ptr,"stop-writes-on-bgsave-error")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.stop_writes_on_bgsave_err = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-ping-slave-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_ping_slave_period = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- server.repl_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-size")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt;
- resizeReplicationBacklog(ll);
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-backlog-ttl")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- server.repl_backlog_time_limit = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"watchdog-period")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt;
- if (ll)
- enableWatchdog(ll);
- else
- disableWatchdog();
- } else if (!strcasecmp(c->argv[2]->ptr,"rdbcompression")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.rdb_compression = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"notify-keyspace-events")) {
+ } config_set_special_field("notify-keyspace-events") {
int flags = keyspaceEventsStringToFlags(o->ptr);
if (flags == -1) goto badfmt;
server.notify_keyspace_events = flags;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-disable-tcp-nodelay")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_disable_tcp_nodelay = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-diskless-sync")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.repl_diskless_sync = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"repl-diskless-sync-delay")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_diskless_sync_delay = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"slave-priority")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.slave_priority = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-to-write")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_to_write = ll;
+ } config_set_special_field("slave-announce-ip") {
+ zfree(server.slave_announce_ip);
+ server.slave_announce_ip = ((char*)o->ptr)[0] ? zstrdup(o->ptr) : NULL;
+
+ /* Boolean fields.
+ * config_set_bool_field(name,var). */
+ } config_set_bool_field(
+ "rdbcompression", server.rdb_compression) {
+ } config_set_bool_field(
+ "repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay) {
+ } config_set_bool_field(
+ "repl-diskless-sync",server.repl_diskless_sync) {
+ } config_set_bool_field(
+ "cluster-require-full-coverage",server.cluster_require_full_coverage) {
+ } config_set_bool_field(
+ "cluster-slave-no-failover",server.cluster_slave_no_failover) {
+ } config_set_bool_field(
+ "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
+ } config_set_bool_field(
+ "rdb-save-incremental-fsync",server.rdb_save_incremental_fsync) {
+ } config_set_bool_field(
+ "aof-load-truncated",server.aof_load_truncated) {
+ } config_set_bool_field(
+ "aof-use-rdb-preamble",server.aof_use_rdb_preamble) {
+ } config_set_bool_field(
+ "slave-serve-stale-data",server.repl_serve_stale_data) {
+ } config_set_bool_field(
+ "slave-read-only",server.repl_slave_ro) {
+ } config_set_bool_field(
+ "activerehashing",server.activerehashing) {
+ } config_set_bool_field(
+ "activedefrag",server.active_defrag_enabled) {
+#ifndef HAVE_DEFRAG
+ if (server.active_defrag_enabled) {
+ server.active_defrag_enabled = 0;
+ addReplyError(c,
+ "-DISABLED Active defragmentation cannot be enabled: it "
+ "requires a Redis server compiled with a modified Jemalloc "
+ "like the one shipped by default with the Redis source "
+ "distribution");
+ return;
+ }
+#endif
+ } config_set_bool_field(
+ "protected-mode",server.protected_mode) {
+ } config_set_bool_field(
+ "stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err) {
+ } config_set_bool_field(
+ "lazyfree-lazy-eviction",server.lazyfree_lazy_eviction) {
+ } config_set_bool_field(
+ "lazyfree-lazy-expire",server.lazyfree_lazy_expire) {
+ } config_set_bool_field(
+ "lazyfree-lazy-server-del",server.lazyfree_lazy_server_del) {
+ } config_set_bool_field(
+ "slave-lazy-flush",server.repl_slave_lazy_flush) {
+ } config_set_bool_field(
+ "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) {
+ } config_set_bool_field(
+ "dynamic-hz",server.dynamic_hz) {
+
+ /* Numerical fields.
+ * config_set_numerical_field(name,var,min,max) */
+ } config_set_numerical_field(
+ "tcp-keepalive",server.tcpkeepalive,0,INT_MAX) {
+ } config_set_numerical_field(
+ "maxmemory-samples",server.maxmemory_samples,1,INT_MAX) {
+ } config_set_numerical_field(
+ "lfu-log-factor",server.lfu_log_factor,0,INT_MAX) {
+ } config_set_numerical_field(
+ "lfu-decay-time",server.lfu_decay_time,0,INT_MAX) {
+ } config_set_numerical_field(
+ "timeout",server.maxidletime,0,INT_MAX) {
+ } config_set_numerical_field(
+ "active-defrag-threshold-lower",server.active_defrag_threshold_lower,0,1000) {
+ } config_set_numerical_field(
+ "active-defrag-threshold-upper",server.active_defrag_threshold_upper,0,1000) {
+ } config_set_memory_field(
+ "active-defrag-ignore-bytes",server.active_defrag_ignore_bytes) {
+ } config_set_numerical_field(
+ "active-defrag-cycle-min",server.active_defrag_cycle_min,1,99) {
+ } config_set_numerical_field(
+ "active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) {
+ } config_set_numerical_field(
+ "active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LONG_MAX) {
+ } config_set_numerical_field(
+ "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,INT_MAX){
+ } config_set_numerical_field(
+ "hash-max-ziplist-entries",server.hash_max_ziplist_entries,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "hash-max-ziplist-value",server.hash_max_ziplist_value,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "stream-node-max-bytes",server.stream_node_max_bytes,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "stream-node-max-entries",server.stream_node_max_entries,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "list-max-ziplist-size",server.list_max_ziplist_size,INT_MIN,INT_MAX) {
+ } config_set_numerical_field(
+ "list-compress-depth",server.list_compress_depth,0,INT_MAX) {
+ } config_set_numerical_field(
+ "set-max-intset-entries",server.set_max_intset_entries,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-entries",server.zset_max_ziplist_entries,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "zset-max-ziplist-value",server.zset_max_ziplist_value,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "hll-sparse-max-bytes",server.hll_sparse_max_bytes,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "lua-time-limit",server.lua_time_limit,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-log-slower-than",server.slowlog_log_slower_than,-1,LLONG_MAX) {
+ } config_set_numerical_field(
+ "slowlog-max-len",ll,0,LONG_MAX) {
+ /* Cast to unsigned. */
+ server.slowlog_max_len = (unsigned long)ll;
+ } config_set_numerical_field(
+ "latency-monitor-threshold",server.latency_monitor_threshold,0,LLONG_MAX){
+ } config_set_numerical_field(
+ "repl-ping-slave-period",server.repl_ping_slave_period,1,INT_MAX) {
+ } config_set_numerical_field(
+ "repl-timeout",server.repl_timeout,1,INT_MAX) {
+ } config_set_numerical_field(
+ "repl-backlog-ttl",server.repl_backlog_time_limit,0,LONG_MAX) {
+ } config_set_numerical_field(
+ "repl-diskless-sync-delay",server.repl_diskless_sync_delay,0,INT_MAX) {
+ } config_set_numerical_field(
+ "slave-priority",server.slave_priority,0,INT_MAX) {
+ } config_set_numerical_field(
+ "slave-announce-port",server.slave_announce_port,0,65535) {
+ } config_set_numerical_field(
+ "min-slaves-to-write",server.repl_min_slaves_to_write,0,INT_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"min-slaves-max-lag")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.repl_min_slaves_max_lag = ll;
+ } config_set_numerical_field(
+ "min-slaves-max-lag",server.repl_min_slaves_max_lag,0,INT_MAX) {
refreshGoodSlavesCount();
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-require-full-coverage")) {
- int yn = yesnotoi(o->ptr);
-
- if (yn == -1) goto badfmt;
- server.cluster_require_full_coverage = yn;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-node-timeout")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll <= 0) goto badfmt;
- server.cluster_node_timeout = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-migration-barrier")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_migration_barrier = ll;
- } else if (!strcasecmp(c->argv[2]->ptr,"cluster-slave-validity-factor")) {
- if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
- ll < 0) goto badfmt;
- server.cluster_slave_validity_factor = ll;
- } else {
+ } config_set_numerical_field(
+ "cluster-node-timeout",server.cluster_node_timeout,0,LLONG_MAX) {
+ } config_set_numerical_field(
+ "cluster-announce-port",server.cluster_announce_port,0,65535) {
+ } config_set_numerical_field(
+ "cluster-announce-bus-port",server.cluster_announce_bus_port,0,65535) {
+ } config_set_numerical_field(
+ "cluster-migration-barrier",server.cluster_migration_barrier,0,INT_MAX){
+ } config_set_numerical_field(
+ "cluster-slave-validity-factor",server.cluster_slave_validity_factor,0,INT_MAX) {
+ } config_set_numerical_field(
+ "hz",server.config_hz,0,INT_MAX) {
+ /* Hz is more an hint from the user, so we accept values out of range
+ * but cap them to reasonable values. */
+ if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ;
+ if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ;
+ } config_set_numerical_field(
+ "watchdog-period",ll,0,INT_MAX) {
+ if (ll)
+ enableWatchdog(ll);
+ else
+ disableWatchdog();
+
+ /* Memory fields.
+ * config_set_memory_field(name,var) */
+ } config_set_memory_field("maxmemory",server.maxmemory) {
+ if (server.maxmemory) {
+ if (server.maxmemory < zmalloc_used_memory()) {
+ serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
+ }
+ freeMemoryIfNeeded();
+ }
+ } config_set_memory_field(
+ "proto-max-bulk-len",server.proto_max_bulk_len) {
+ } config_set_memory_field(
+ "client-query-buffer-limit",server.client_max_querybuf_len) {
+ } config_set_memory_field("repl-backlog-size",ll) {
+ resizeReplicationBacklog(ll);
+ } config_set_memory_field("auto-aof-rewrite-min-size",ll) {
+ server.aof_rewrite_min_size = ll;
+
+ /* Enumeration fields.
+ * config_set_enum_field(name,var,enum_var) */
+ } config_set_enum_field(
+ "loglevel",server.verbosity,loglevel_enum) {
+ } config_set_enum_field(
+ "maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum) {
+ } config_set_enum_field(
+ "appendfsync",server.aof_fsync,aof_fsync_enum) {
+
+ /* Everyhing else is an error... */
+ } config_set_else {
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
(char*)c->argv[2]->ptr);
return;
}
+
+ /* On success we just return a generic OK for all the options. */
addReply(c,shared.ok);
return;
@@ -995,7 +1225,7 @@ badfmt: /* Bad format errors */
*----------------------------------------------------------------------------*/
#define config_get_string_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,_var ? _var : ""); \
matches++; \
@@ -1003,7 +1233,7 @@ badfmt: /* Bad format errors */
} while(0);
#define config_get_bool_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,_var ? "yes" : "no"); \
matches++; \
@@ -1011,7 +1241,7 @@ badfmt: /* Bad format errors */
} while(0);
#define config_get_numerical_field(_name,_var) do { \
- if (stringmatch(pattern,_name,0)) { \
+ if (stringmatch(pattern,_name,1)) { \
ll2string(buf,sizeof(buf),_var); \
addReplyBulkCString(c,_name); \
addReplyBulkCString(c,buf); \
@@ -1019,68 +1249,46 @@ badfmt: /* Bad format errors */
} \
} while(0);
-char *maxmemoryToString() {
- char *s;
- switch(server.maxmemory_policy) {
- case REDIS_MAXMEMORY_VOLATILE_LRU: s = "volatile-lru"; break;
- case REDIS_MAXMEMORY_VOLATILE_TTL: s = "volatile-ttl"; break;
- case REDIS_MAXMEMORY_VOLATILE_RANDOM: s = "volatile-random"; break;
- case REDIS_MAXMEMORY_ALLKEYS_LRU: s = "allkeys-lru"; break;
- case REDIS_MAXMEMORY_ALLKEYS_RANDOM: s = "allkeys-random"; break;
- case REDIS_MAXMEMORY_NO_EVICTION: s = "noeviction"; break;
- default: s = "unknown"; break;
- }
- return s;
-}
-
-int supervisedToMode(const char *str) {
- int mode;
- if (!strcasecmp(str,"upstart")) {
- mode = REDIS_SUPERVISED_UPSTART;
- } else if (!strcasecmp(str,"systemd")) {
- mode = REDIS_SUPERVISED_SYSTEMD;
- } else if (!strcasecmp(str,"auto")) {
- mode = REDIS_SUPERVISED_AUTODETECT;
- } else if (!strcasecmp(str,"no")) {
- mode = REDIS_SUPERVISED_NONE;
- } else {
- mode = -1;
- }
- return mode;
-}
+#define config_get_enum_field(_name,_var,_enumvar) do { \
+ if (stringmatch(pattern,_name,1)) { \
+ addReplyBulkCString(c,_name); \
+ addReplyBulkCString(c,configEnumGetNameOrUnknown(_enumvar,_var)); \
+ matches++; \
+ } \
+} while(0);
-char *supervisedToString(void) {
- char *s;
- switch(server.supervised_mode) {
- case REDIS_SUPERVISED_UPSTART: s = "upstart"; break;
- case REDIS_SUPERVISED_SYSTEMD: s = "systemd"; break;
- case REDIS_SUPERVISED_AUTODETECT: s = "auto"; break;
- case REDIS_SUPERVISED_NONE: s = "no"; break;
- default: s = "no"; break;
- }
- return s;
-}
-void configGetCommand(redisClient *c) {
+void configGetCommand(client *c) {
robj *o = c->argv[2];
void *replylen = addDeferredMultiBulkLength(c);
char *pattern = o->ptr;
char buf[128];
int matches = 0;
- redisAssertWithInfo(c,o,sdsEncodedObject(o));
+ serverAssertWithInfo(c,o,sdsEncodedObject(o));
/* String values */
config_get_string_field("dbfilename",server.rdb_filename);
config_get_string_field("requirepass",server.requirepass);
config_get_string_field("masterauth",server.masterauth);
+ config_get_string_field("cluster-announce-ip",server.cluster_announce_ip);
config_get_string_field("unixsocket",server.unixsocket);
config_get_string_field("logfile",server.logfile);
config_get_string_field("pidfile",server.pidfile);
+ config_get_string_field("slave-announce-ip",server.slave_announce_ip);
/* Numerical values */
config_get_numerical_field("maxmemory",server.maxmemory);
+ config_get_numerical_field("proto-max-bulk-len",server.proto_max_bulk_len);
+ config_get_numerical_field("client-query-buffer-limit",server.client_max_querybuf_len);
config_get_numerical_field("maxmemory-samples",server.maxmemory_samples);
+ config_get_numerical_field("lfu-log-factor",server.lfu_log_factor);
+ config_get_numerical_field("lfu-decay-time",server.lfu_decay_time);
config_get_numerical_field("timeout",server.maxidletime);
- config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
+ config_get_numerical_field("active-defrag-threshold-lower",server.active_defrag_threshold_lower);
+ config_get_numerical_field("active-defrag-threshold-upper",server.active_defrag_threshold_upper);
+ config_get_numerical_field("active-defrag-ignore-bytes",server.active_defrag_ignore_bytes);
+ config_get_numerical_field("active-defrag-cycle-min",server.active_defrag_cycle_min);
+ config_get_numerical_field("active-defrag-cycle-max",server.active_defrag_cycle_max);
+ config_get_numerical_field("active-defrag-max-scan-fields",server.active_defrag_max_scan_fields);
config_get_numerical_field("auto-aof-rewrite-percentage",
server.aof_rewrite_perc);
config_get_numerical_field("auto-aof-rewrite-min-size",
@@ -1089,6 +1297,10 @@ void configGetCommand(redisClient *c) {
server.hash_max_ziplist_entries);
config_get_numerical_field("hash-max-ziplist-value",
server.hash_max_ziplist_value);
+ config_get_numerical_field("stream-node-max-bytes",
+ server.stream_node_max_bytes);
+ config_get_numerical_field("stream-node-max-entries",
+ server.stream_node_max_entries);
config_get_numerical_field("list-max-ziplist-size",
server.list_max_ziplist_size);
config_get_numerical_field("list-compress-depth",
@@ -1109,6 +1321,8 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("slowlog-max-len",
server.slowlog_max_len);
config_get_numerical_field("port",server.port);
+ config_get_numerical_field("cluster-announce-port",server.cluster_announce_port);
+ config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port);
config_get_numerical_field("tcp-backlog",server.tcp_backlog);
config_get_numerical_field("databases",server.dbnum);
config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period);
@@ -1118,17 +1332,21 @@ void configGetCommand(redisClient *c) {
config_get_numerical_field("maxclients",server.maxclients);
config_get_numerical_field("watchdog-period",server.watchdog_period);
config_get_numerical_field("slave-priority",server.slave_priority);
+ config_get_numerical_field("slave-announce-port",server.slave_announce_port);
config_get_numerical_field("min-slaves-to-write",server.repl_min_slaves_to_write);
config_get_numerical_field("min-slaves-max-lag",server.repl_min_slaves_max_lag);
- config_get_numerical_field("hz",server.hz);
+ config_get_numerical_field("hz",server.config_hz);
config_get_numerical_field("cluster-node-timeout",server.cluster_node_timeout);
config_get_numerical_field("cluster-migration-barrier",server.cluster_migration_barrier);
config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor);
config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay);
+ config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
/* Bool (yes/no) values */
config_get_bool_field("cluster-require-full-coverage",
server.cluster_require_full_coverage);
+ config_get_bool_field("cluster-slave-no-failover",
+ server.cluster_slave_no_failover);
config_get_bool_field("no-appendfsync-on-rewrite",
server.aof_no_fsync_on_rewrite);
config_get_bool_field("slave-serve-stale-data",
@@ -1141,23 +1359,51 @@ void configGetCommand(redisClient *c) {
config_get_bool_field("rdbcompression", server.rdb_compression);
config_get_bool_field("rdbchecksum", server.rdb_checksum);
config_get_bool_field("activerehashing", server.activerehashing);
+ config_get_bool_field("activedefrag", server.active_defrag_enabled);
+ config_get_bool_field("protected-mode", server.protected_mode);
config_get_bool_field("repl-disable-tcp-nodelay",
server.repl_disable_tcp_nodelay);
config_get_bool_field("repl-diskless-sync",
server.repl_diskless_sync);
config_get_bool_field("aof-rewrite-incremental-fsync",
server.aof_rewrite_incremental_fsync);
+ config_get_bool_field("rdb-save-incremental-fsync",
+ server.rdb_save_incremental_fsync);
config_get_bool_field("aof-load-truncated",
server.aof_load_truncated);
+ config_get_bool_field("aof-use-rdb-preamble",
+ server.aof_use_rdb_preamble);
+ config_get_bool_field("lazyfree-lazy-eviction",
+ server.lazyfree_lazy_eviction);
+ config_get_bool_field("lazyfree-lazy-expire",
+ server.lazyfree_lazy_expire);
+ config_get_bool_field("lazyfree-lazy-server-del",
+ server.lazyfree_lazy_server_del);
+ config_get_bool_field("slave-lazy-flush",
+ server.repl_slave_lazy_flush);
+ config_get_bool_field("dynamic-hz",
+ server.dynamic_hz);
+
+ /* Enum values */
+ config_get_enum_field("maxmemory-policy",
+ server.maxmemory_policy,maxmemory_policy_enum);
+ config_get_enum_field("loglevel",
+ server.verbosity,loglevel_enum);
+ config_get_enum_field("supervised",
+ server.supervised_mode,supervised_mode_enum);
+ config_get_enum_field("appendfsync",
+ server.aof_fsync,aof_fsync_enum);
+ config_get_enum_field("syslog-facility",
+ server.syslog_facility,syslog_facility_enum);
/* Everything we can't handle with macros follows. */
- if (stringmatch(pattern,"appendonly",0)) {
+ if (stringmatch(pattern,"appendonly",1)) {
addReplyBulkCString(c,"appendonly");
- addReplyBulkCString(c,server.aof_state == REDIS_AOF_OFF ? "no" : "yes");
+ addReplyBulkCString(c,server.aof_state == AOF_OFF ? "no" : "yes");
matches++;
}
- if (stringmatch(pattern,"dir",0)) {
+ if (stringmatch(pattern,"dir",1)) {
char buf[1024];
if (getcwd(buf,sizeof(buf)) == NULL)
@@ -1167,25 +1413,7 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"maxmemory-policy",0)) {
- addReplyBulkCString(c,"maxmemory-policy");
- addReplyBulkCString(c,maxmemoryToString());
- matches++;
- }
- if (stringmatch(pattern,"appendfsync",0)) {
- char *policy;
-
- switch(server.aof_fsync) {
- case AOF_FSYNC_NO: policy = "no"; break;
- case AOF_FSYNC_EVERYSEC: policy = "everysec"; break;
- case AOF_FSYNC_ALWAYS: policy = "always"; break;
- default: policy = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"appendfsync");
- addReplyBulkCString(c,policy);
- matches++;
- }
- if (stringmatch(pattern,"save",0)) {
+ if (stringmatch(pattern,"save",1)) {
sds buf = sdsempty();
int j;
@@ -1201,36 +1429,17 @@ void configGetCommand(redisClient *c) {
sdsfree(buf);
matches++;
}
- if (stringmatch(pattern,"loglevel",0)) {
- char *s;
-
- switch(server.verbosity) {
- case REDIS_WARNING: s = "warning"; break;
- case REDIS_VERBOSE: s = "verbose"; break;
- case REDIS_NOTICE: s = "notice"; break;
- case REDIS_DEBUG: s = "debug"; break;
- default: s = "unknown"; break; /* too harmless to panic */
- }
- addReplyBulkCString(c,"loglevel");
- addReplyBulkCString(c,s);
- matches++;
- }
- if (stringmatch(pattern,"supervised",0)) {
- addReplyBulkCString(c,"supervised");
- addReplyBulkCString(c,supervisedToString());
- matches++;
- }
- if (stringmatch(pattern,"client-output-buffer-limit",0)) {
+ if (stringmatch(pattern,"client-output-buffer-limit",1)) {
sds buf = sdsempty();
int j;
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++) {
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
buf = sdscatprintf(buf,"%s %llu %llu %ld",
getClientTypeName(j),
server.client_obuf_limits[j].hard_limit_bytes,
server.client_obuf_limits[j].soft_limit_bytes,
(long) server.client_obuf_limits[j].soft_limit_seconds);
- if (j != REDIS_CLIENT_TYPE_COUNT-1)
+ if (j != CLIENT_TYPE_OBUF_COUNT-1)
buf = sdscatlen(buf," ",1);
}
addReplyBulkCString(c,"client-output-buffer-limit");
@@ -1238,14 +1447,14 @@ void configGetCommand(redisClient *c) {
sdsfree(buf);
matches++;
}
- if (stringmatch(pattern,"unixsocketperm",0)) {
+ if (stringmatch(pattern,"unixsocketperm",1)) {
char buf[32];
snprintf(buf,sizeof(buf),"%o",server.unixsocketperm);
addReplyBulkCString(c,"unixsocketperm");
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"slaveof",0)) {
+ if (stringmatch(pattern,"slaveof",1)) {
char buf[256];
addReplyBulkCString(c,"slaveof");
@@ -1257,8 +1466,8 @@ void configGetCommand(redisClient *c) {
addReplyBulkCString(c,buf);
matches++;
}
- if (stringmatch(pattern,"notify-keyspace-events",0)) {
- robj *flagsobj = createObject(REDIS_STRING,
+ if (stringmatch(pattern,"notify-keyspace-events",1)) {
+ robj *flagsobj = createObject(OBJ_STRING,
keyspaceEventsFlagsToString(server.notify_keyspace_events));
addReplyBulkCString(c,"notify-keyspace-events");
@@ -1266,7 +1475,7 @@ void configGetCommand(redisClient *c) {
decrRefCount(flagsobj);
matches++;
}
- if (stringmatch(pattern,"bind",0)) {
+ if (stringmatch(pattern,"bind",1)) {
sds aux = sdsjoin(server.bindaddr,server.bindaddr_count," ");
addReplyBulkCString(c,"bind");
@@ -1286,7 +1495,7 @@ void configGetCommand(redisClient *c) {
/* We use the following dictionary type to store where a configuration
* option is mentioned in the old configuration file, so it's
* like "maxmemory" -> list of line numbers (first line is zero). */
-unsigned int dictSdsCaseHash(const void *key);
+uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void dictSdsDestructor(void *privdata, void *val);
void dictListDestructor(void *privdata, void *val);
@@ -1344,7 +1553,7 @@ void rewriteConfigAddLineNumberToOption(struct rewriteConfigState *state, sds op
* This is useful as only unused lines of processed options will be blanked
* in the config file, while options the rewrite process does not understand
* remain untouched. */
-void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, char *option) {
+void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option) {
sds opt = sdsnew(option);
if (dictAdd(state->rewritten,opt,NULL) != DICT_OK) sdsfree(opt);
@@ -1358,7 +1567,7 @@ void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, char *option
struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
FILE *fp = fopen(path,"r");
struct rewriteConfigState *state = zmalloc(sizeof(*state));
- char buf[REDIS_CONFIGLINE_MAX+1];
+ char buf[CONFIG_MAX_LINE+1];
int linenum = -1;
if (fp == NULL && errno != ENOENT) return NULL;
@@ -1371,7 +1580,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
if (fp == NULL) return state;
/* Read the old file line by line, populate the state. */
- while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
+ while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL) {
int argc;
sds *argv;
sds line = sdstrim(sdsnew(buf),"\r\n\t ");
@@ -1428,7 +1637,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
*
* "line" is either used, or freed, so the caller does not need to free it
* in any way. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force) {
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force) {
sds o = sdsnew(option);
list *l = dictFetchValue(state->option_to_line,o);
@@ -1539,45 +1748,26 @@ void rewriteConfigOctalOption(struct rewriteConfigState *state, char *option, in
rewriteConfigRewriteLine(state,option,line,force);
}
-/* Rewrite an enumeration option, after the "value" every enum/value pair
- * is specified, terminated by NULL. After NULL the default value is
- * specified. See how the function is used for more information. */
-void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, ...) {
- va_list ap;
- char *enum_name, *matching_name = NULL;
- int enum_val, def_val, force;
+/* Rewrite an enumeration option. It takes as usually state and option name,
+ * and in addition the enumeration array and the default value for the
+ * option. */
+void rewriteConfigEnumOption(struct rewriteConfigState *state, char *option, int value, configEnum *ce, int defval) {
sds line;
+ const char *name = configEnumGetNameOrUnknown(ce,value);
+ int force = value != defval;
- va_start(ap, value);
- while(1) {
- enum_name = va_arg(ap,char*);
- enum_val = va_arg(ap,int);
- if (enum_name == NULL) {
- def_val = enum_val;
- break;
- }
- if (value == enum_val) matching_name = enum_name;
- }
- va_end(ap);
-
- force = value != def_val;
- line = sdscatprintf(sdsempty(),"%s %s",option,matching_name);
+ line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
/* Rewrite the syslog-facility option. */
void rewriteConfigSyslogfacilityOption(struct rewriteConfigState *state) {
- int value = server.syslog_facility, j;
+ int value = server.syslog_facility;
int force = value != LOG_LOCAL0;
- char *name = NULL, *option = "syslog-facility";
+ const char *name = NULL, *option = "syslog-facility";
sds line;
- for (j = 0; validSyslogFacilities[j].name; j++) {
- if (validSyslogFacilities[j].value == value) {
- name = (char*) validSyslogFacilities[j].name;
- break;
- }
- }
+ name = configEnumGetNameOrUnknown(syslog_facility_enum,value);
line = sdscatprintf(sdsempty(),"%s %s",option,name);
rewriteConfigRewriteLine(state,option,line,force);
}
@@ -1646,7 +1836,7 @@ void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state
int j;
char *option = "client-output-buffer-limit";
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++) {
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) {
int force = (server.client_obuf_limits[j].hard_limit_bytes !=
clientBufferLimitsDefaults[j].hard_limit_bytes) ||
(server.client_obuf_limits[j].soft_limit_bytes !=
@@ -1737,7 +1927,7 @@ void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) {
/* Don't blank lines about options the rewrite process
* don't understand. */
if (dictFind(state->rewritten,option) == NULL) {
- redisLog(REDIS_DEBUG,"Not rewritten option: %s", option);
+ serverLog(LL_DEBUG,"Not rewritten option: %s", option);
continue;
}
@@ -1830,97 +2020,105 @@ int rewriteConfig(char *path) {
* the rewrite state. */
rewriteConfigYesNoOption(state,"daemonize",server.daemonize,0);
- rewriteConfigStringOption(state,"pidfile",server.pidfile,REDIS_DEFAULT_PID_FILE);
- rewriteConfigNumericalOption(state,"port",server.port,REDIS_SERVERPORT);
- rewriteConfigNumericalOption(state,"tcp-backlog",server.tcp_backlog,REDIS_TCP_BACKLOG);
+ rewriteConfigStringOption(state,"pidfile",server.pidfile,CONFIG_DEFAULT_PID_FILE);
+ rewriteConfigNumericalOption(state,"port",server.port,CONFIG_DEFAULT_SERVER_PORT);
+ rewriteConfigNumericalOption(state,"cluster-announce-port",server.cluster_announce_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT);
+ rewriteConfigNumericalOption(state,"cluster-announce-bus-port",server.cluster_announce_bus_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT);
+ rewriteConfigNumericalOption(state,"tcp-backlog",server.tcp_backlog,CONFIG_DEFAULT_TCP_BACKLOG);
rewriteConfigBindOption(state);
rewriteConfigStringOption(state,"unixsocket",server.unixsocket,NULL);
- rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,REDIS_DEFAULT_UNIX_SOCKET_PERM);
- rewriteConfigNumericalOption(state,"timeout",server.maxidletime,REDIS_MAXIDLETIME);
- rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,REDIS_DEFAULT_TCP_KEEPALIVE);
- rewriteConfigEnumOption(state,"loglevel",server.verbosity,
- "debug", REDIS_DEBUG,
- "verbose", REDIS_VERBOSE,
- "notice", REDIS_NOTICE,
- "warning", REDIS_WARNING,
- NULL, REDIS_DEFAULT_VERBOSITY);
- rewriteConfigStringOption(state,"logfile",server.logfile,REDIS_DEFAULT_LOGFILE);
- rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,REDIS_DEFAULT_SYSLOG_ENABLED);
- rewriteConfigStringOption(state,"syslog-ident",server.syslog_ident,REDIS_DEFAULT_SYSLOG_IDENT);
+ rewriteConfigOctalOption(state,"unixsocketperm",server.unixsocketperm,CONFIG_DEFAULT_UNIX_SOCKET_PERM);
+ rewriteConfigNumericalOption(state,"timeout",server.maxidletime,CONFIG_DEFAULT_CLIENT_TIMEOUT);
+ rewriteConfigNumericalOption(state,"tcp-keepalive",server.tcpkeepalive,CONFIG_DEFAULT_TCP_KEEPALIVE);
+ rewriteConfigNumericalOption(state,"slave-announce-port",server.slave_announce_port,CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT);
+ rewriteConfigEnumOption(state,"loglevel",server.verbosity,loglevel_enum,CONFIG_DEFAULT_VERBOSITY);
+ rewriteConfigStringOption(state,"logfile",server.logfile,CONFIG_DEFAULT_LOGFILE);
+ rewriteConfigYesNoOption(state,"syslog-enabled",server.syslog_enabled,CONFIG_DEFAULT_SYSLOG_ENABLED);
+ rewriteConfigStringOption(state,"syslog-ident",server.syslog_ident,CONFIG_DEFAULT_SYSLOG_IDENT);
rewriteConfigSyslogfacilityOption(state);
rewriteConfigSaveOption(state);
- rewriteConfigNumericalOption(state,"databases",server.dbnum,REDIS_DEFAULT_DBNUM);
- rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR);
- rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,REDIS_DEFAULT_RDB_COMPRESSION);
- rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,REDIS_DEFAULT_RDB_CHECKSUM);
- rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,REDIS_DEFAULT_RDB_FILENAME);
+ rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM);
+ rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR);
+ rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION);
+ rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM);
+ rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,CONFIG_DEFAULT_RDB_FILENAME);
rewriteConfigDirOption(state);
rewriteConfigSlaveofOption(state);
+ rewriteConfigStringOption(state,"slave-announce-ip",server.slave_announce_ip,CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP);
rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL);
- rewriteConfigYesNoOption(state,"slave-serve-stale-data",server.repl_serve_stale_data,REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA);
- rewriteConfigYesNoOption(state,"slave-read-only",server.repl_slave_ro,REDIS_DEFAULT_SLAVE_READ_ONLY);
- rewriteConfigNumericalOption(state,"repl-ping-slave-period",server.repl_ping_slave_period,REDIS_REPL_PING_SLAVE_PERIOD);
- rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,REDIS_REPL_TIMEOUT);
- rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,REDIS_DEFAULT_REPL_BACKLOG_SIZE);
- rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT);
- rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY);
- rewriteConfigYesNoOption(state,"repl-diskless-sync",server.repl_diskless_sync,REDIS_DEFAULT_REPL_DISKLESS_SYNC);
- rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,REDIS_DEFAULT_REPL_DISKLESS_SYNC_DELAY);
- rewriteConfigNumericalOption(state,"slave-priority",server.slave_priority,REDIS_DEFAULT_SLAVE_PRIORITY);
- rewriteConfigNumericalOption(state,"min-slaves-to-write",server.repl_min_slaves_to_write,REDIS_DEFAULT_MIN_SLAVES_TO_WRITE);
- rewriteConfigNumericalOption(state,"min-slaves-max-lag",server.repl_min_slaves_max_lag,REDIS_DEFAULT_MIN_SLAVES_MAX_LAG);
+ rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL);
+ rewriteConfigYesNoOption(state,"slave-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA);
+ rewriteConfigYesNoOption(state,"slave-read-only",server.repl_slave_ro,CONFIG_DEFAULT_SLAVE_READ_ONLY);
+ rewriteConfigNumericalOption(state,"repl-ping-slave-period",server.repl_ping_slave_period,CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD);
+ rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT);
+ rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE);
+ rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT);
+ rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY);
+ rewriteConfigYesNoOption(state,"repl-diskless-sync",server.repl_diskless_sync,CONFIG_DEFAULT_REPL_DISKLESS_SYNC);
+ rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY);
+ rewriteConfigNumericalOption(state,"slave-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY);
+ rewriteConfigNumericalOption(state,"min-slaves-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE);
+ rewriteConfigNumericalOption(state,"min-slaves-max-lag",server.repl_min_slaves_max_lag,CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG);
rewriteConfigStringOption(state,"requirepass",server.requirepass,NULL);
- rewriteConfigNumericalOption(state,"maxclients",server.maxclients,REDIS_MAX_CLIENTS);
- rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,REDIS_DEFAULT_MAXMEMORY);
- rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,
- "volatile-lru", REDIS_MAXMEMORY_VOLATILE_LRU,
- "allkeys-lru", REDIS_MAXMEMORY_ALLKEYS_LRU,
- "volatile-random", REDIS_MAXMEMORY_VOLATILE_RANDOM,
- "allkeys-random", REDIS_MAXMEMORY_ALLKEYS_RANDOM,
- "volatile-ttl", REDIS_MAXMEMORY_VOLATILE_TTL,
- "noeviction", REDIS_MAXMEMORY_NO_EVICTION,
- NULL, REDIS_DEFAULT_MAXMEMORY_POLICY);
- rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,REDIS_DEFAULT_MAXMEMORY_SAMPLES);
- rewriteConfigYesNoOption(state,"appendonly",server.aof_state != REDIS_AOF_OFF,0);
- rewriteConfigStringOption(state,"appendfilename",server.aof_filename,REDIS_DEFAULT_AOF_FILENAME);
- rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,
- "everysec", AOF_FSYNC_EVERYSEC,
- "always", AOF_FSYNC_ALWAYS,
- "no", AOF_FSYNC_NO,
- NULL, REDIS_DEFAULT_AOF_FSYNC);
- rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE);
- rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,REDIS_AOF_REWRITE_PERC);
- rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,REDIS_AOF_REWRITE_MIN_SIZE);
- rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,REDIS_LUA_TIME_LIMIT);
+ rewriteConfigNumericalOption(state,"maxclients",server.maxclients,CONFIG_DEFAULT_MAX_CLIENTS);
+ rewriteConfigBytesOption(state,"maxmemory",server.maxmemory,CONFIG_DEFAULT_MAXMEMORY);
+ rewriteConfigBytesOption(state,"proto-max-bulk-len",server.proto_max_bulk_len,CONFIG_DEFAULT_PROTO_MAX_BULK_LEN);
+ rewriteConfigBytesOption(state,"client-query-buffer-limit",server.client_max_querybuf_len,PROTO_MAX_QUERYBUF_LEN);
+ rewriteConfigEnumOption(state,"maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum,CONFIG_DEFAULT_MAXMEMORY_POLICY);
+ rewriteConfigNumericalOption(state,"maxmemory-samples",server.maxmemory_samples,CONFIG_DEFAULT_MAXMEMORY_SAMPLES);
+ rewriteConfigNumericalOption(state,"lfu-log-factor",server.lfu_log_factor,CONFIG_DEFAULT_LFU_LOG_FACTOR);
+ rewriteConfigNumericalOption(state,"lfu-decay-time",server.lfu_decay_time,CONFIG_DEFAULT_LFU_DECAY_TIME);
+ rewriteConfigNumericalOption(state,"active-defrag-threshold-lower",server.active_defrag_threshold_lower,CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER);
+ rewriteConfigNumericalOption(state,"active-defrag-threshold-upper",server.active_defrag_threshold_upper,CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER);
+ rewriteConfigBytesOption(state,"active-defrag-ignore-bytes",server.active_defrag_ignore_bytes,CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES);
+ rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN);
+ rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX);
+ rewriteConfigNumericalOption(state,"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS);
+ rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0);
+ rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME);
+ rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC);
+ rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE);
+ rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC);
+ rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,AOF_REWRITE_MIN_SIZE);
+ rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,LUA_SCRIPT_TIME_LIMIT);
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
- rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
- rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE);
- rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT);
- rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER);
- rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY);
- rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,REDIS_SLOWLOG_LOG_SLOWER_THAN);
- rewriteConfigNumericalOption(state,"latency-monitor-threshold",server.latency_monitor_threshold,REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD);
- rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,REDIS_SLOWLOG_MAX_LEN);
+ rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
+ rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE);
+ rewriteConfigYesNoOption(state,"cluster-slave-no-failover",server.cluster_slave_no_failover,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER);
+ rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT);
+ rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER);
+ rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY);
+ rewriteConfigNumericalOption(state,"slowlog-log-slower-than",server.slowlog_log_slower_than,CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN);
+ rewriteConfigNumericalOption(state,"latency-monitor-threshold",server.latency_monitor_threshold,CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD);
+ rewriteConfigNumericalOption(state,"slowlog-max-len",server.slowlog_max_len,CONFIG_DEFAULT_SLOWLOG_MAX_LEN);
rewriteConfigNotifykeyspaceeventsOption(state);
- rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,REDIS_HASH_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,REDIS_HASH_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,REDIS_LIST_MAX_ZIPLIST_SIZE);
- rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,REDIS_LIST_COMPRESS_DEPTH);
- rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES);
- rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES);
- rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE);
- rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES);
- rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING);
+ rewriteConfigNumericalOption(state,"hash-max-ziplist-entries",server.hash_max_ziplist_entries,OBJ_HASH_MAX_ZIPLIST_ENTRIES);
+ rewriteConfigNumericalOption(state,"hash-max-ziplist-value",server.hash_max_ziplist_value,OBJ_HASH_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"stream-node-max-bytes",server.stream_node_max_bytes,OBJ_STREAM_NODE_MAX_BYTES);
+ rewriteConfigNumericalOption(state,"stream-node-max-entries",server.stream_node_max_entries,OBJ_STREAM_NODE_MAX_ENTRIES);
+ rewriteConfigNumericalOption(state,"list-max-ziplist-size",server.list_max_ziplist_size,OBJ_LIST_MAX_ZIPLIST_SIZE);
+ rewriteConfigNumericalOption(state,"list-compress-depth",server.list_compress_depth,OBJ_LIST_COMPRESS_DEPTH);
+ rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,OBJ_SET_MAX_INTSET_ENTRIES);
+ rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,OBJ_ZSET_MAX_ZIPLIST_ENTRIES);
+ rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,OBJ_ZSET_MAX_ZIPLIST_VALUE);
+ rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES);
+ rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING);
+ rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG);
+ rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE);
rewriteConfigClientoutputbufferlimitOption(state);
- rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ);
- rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
- rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,REDIS_DEFAULT_AOF_LOAD_TRUNCATED);
- rewriteConfigEnumOption(state,"supervised",server.supervised_mode,
- "upstart", REDIS_SUPERVISED_UPSTART,
- "systemd", REDIS_SUPERVISED_SYSTEMD,
- "auto", REDIS_SUPERVISED_AUTODETECT,
- "no", REDIS_SUPERVISED_NONE,
- NULL, REDIS_SUPERVISED_NONE);
+ rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ);
+ rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC);
+ rewriteConfigYesNoOption(state,"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC);
+ rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED);
+ rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE);
+ rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-eviction",server.lazyfree_lazy_eviction,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE);
+ rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL);
+ rewriteConfigYesNoOption(state,"slave-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH);
+ rewriteConfigYesNoOption(state,"dynamic-hz",server.dynamic_hz,CONFIG_DEFAULT_DYNAMIC_HZ);
+
+ /* Rewrite Sentinel config if in Sentinel mode. */
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
/* Step 3: remove all the orphaned lines in the old file, that is, lines
@@ -1942,38 +2140,44 @@ int rewriteConfig(char *path) {
* CONFIG command entry point
*----------------------------------------------------------------------------*/
-void configCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"set")) {
- if (c->argc != 4) goto badarity;
+void configCommand(client *c) {
+ /* Only allow CONFIG GET while loading. */
+ if (server.loading && strcasecmp(c->argv[1]->ptr,"get")) {
+ addReplyError(c,"Only CONFIG GET is allowed during loading");
+ return;
+ }
+
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"GET <pattern> -- Return parameters matching the glob-like <pattern> and their values.",
+"SET <parameter> <value> -- Set parameter to value.",
+"RESETSTAT -- Reset statistics reported by INFO.",
+"REWRITE -- Rewrite the configuration file.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"set") && c->argc == 4) {
configSetCommand(c);
- } else if (!strcasecmp(c->argv[1]->ptr,"get")) {
- if (c->argc != 3) goto badarity;
+ } else if (!strcasecmp(c->argv[1]->ptr,"get") && c->argc == 3) {
configGetCommand(c);
- } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) {
- if (c->argc != 2) goto badarity;
+ } else if (!strcasecmp(c->argv[1]->ptr,"resetstat") && c->argc == 2) {
resetServerStats();
resetCommandTableStats();
addReply(c,shared.ok);
- } else if (!strcasecmp(c->argv[1]->ptr,"rewrite")) {
- if (c->argc != 2) goto badarity;
+ } else if (!strcasecmp(c->argv[1]->ptr,"rewrite") && c->argc == 2) {
if (server.configfile == NULL) {
addReplyError(c,"The server is running without a config file");
return;
}
if (rewriteConfig(server.configfile) == -1) {
- redisLog(REDIS_WARNING,"CONFIG REWRITE failed: %s", strerror(errno));
+ serverLog(LL_WARNING,"CONFIG REWRITE failed: %s", strerror(errno));
addReplyErrorFormat(c,"Rewriting config file: %s", strerror(errno));
} else {
- redisLog(REDIS_WARNING,"CONFIG REWRITE executed with success.");
+ serverLog(LL_WARNING,"CONFIG REWRITE executed with success.");
addReply(c,shared.ok);
}
} else {
- addReplyError(c,
- "CONFIG subcommand must be one of GET, SET, RESETSTAT, REWRITE");
+ addReplySubcommandSyntaxError(c);
+ return;
}
- return;
-
-badarity:
- addReplyErrorFormat(c,"Wrong number of arguments for CONFIG %s",
- (char*) c->argv[1]->ptr);
}
diff --git a/src/config.h b/src/config.h
index 2b5004baa..ee3ad508e 100644
--- a/src/config.h
+++ b/src/config.h
@@ -34,6 +34,11 @@
#include <AvailabilityMacros.h>
#endif
+#ifdef __linux__
+#include <linux/version.h>
+#include <features.h>
+#endif
+
/* Define redis_fstat to fstat or fstat64() */
#if defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
#define redis_fstat fstat64
@@ -57,7 +62,7 @@
#endif
/* Test for backtrace() */
-#if defined(__APPLE__) || defined(__linux__)
+#if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__))
#define HAVE_BACKTRACE 1
#endif
@@ -82,18 +87,16 @@
#endif
#endif
-/* Define aof_fsync to fdatasync() in Linux and fsync() for all the rest */
+/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#ifdef __linux__
-#define aof_fsync fdatasync
+#define redis_fsync fdatasync
#else
-#define aof_fsync fsync
+#define redis_fsync fsync
#endif
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
* the plain fsync() call. */
#ifdef __linux__
-#include <linux/version.h>
-#include <features.h>
#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if (LINUX_VERSION_CODE >= 0x020611 && __GLIBC_PREREQ(2, 6))
#define HAVE_SYNC_FILE_RANGE 1
@@ -118,7 +121,7 @@
#define USE_SETPROCTITLE
#endif
-#if (defined __linux || defined __APPLE__)
+#if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__)
#define USE_SETPROCTITLE
#define INIT_SETPROCTITLE_REPLACEMENT
void spt_init(int argc, char *argv[]);
@@ -203,4 +206,22 @@ void setproctitle(const char *fmt, ...);
#endif
#endif
+/* Make sure we can test for ARM just checking for __arm__, since sometimes
+ * __arm is defined but __arm__ is not. */
+#if defined(__arm) && !defined(__arm__)
+#define __arm__
+#endif
+#if defined (__aarch64__) && !defined(__arm64__)
+#define __arm64__
+#endif
+
+/* Make sure we can test for SPARC just checking for __sparc__. */
+#if defined(__sparc) && !defined(__sparc__)
+#define __sparc__
+#endif
+
+#if defined(__sparc__) || defined(__arm__)
+#define USE_ALIGNED_ACCESS
+#endif
+
#endif
diff --git a/src/crc16.c b/src/crc16.c
index 1ec9161c9..7b8c1dad0 100644
--- a/src/crc16.c
+++ b/src/crc16.c
@@ -1,4 +1,4 @@
-#include "redis.h"
+#include "server.h"
/*
* Copyright 2001-2010 Georges Menie (www.menie.org)
diff --git a/src/db.c b/src/db.c
index 69d1a7768..ec92a2b4e 100644
--- a/src/db.c
+++ b/src/db.c
@@ -27,21 +27,30 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
+#include "atomicvar.h"
#include <signal.h>
#include <ctype.h>
-void slotToKeyAdd(robj *key);
-void slotToKeyDel(robj *key);
-void slotToKeyFlush(void);
-
/*-----------------------------------------------------------------------------
* C-level DB API
*----------------------------------------------------------------------------*/
-robj *lookupKey(redisDb *db, robj *key) {
+/* Update LFU when an object is accessed.
+ * Firstly, decrement the counter if the decrement time is reached.
+ * Then logarithmically increment the counter, and update the access time. */
+void updateLFU(robj *val) {
+ unsigned long counter = LFUDecrAndReturn(val);
+ counter = LFULogIncr(counter);
+ val->lru = (LFUGetTimeInMinutes()<<8) | counter;
+}
+
+/* Low level key lookup API, not actually called directly from commands
+ * implementations that should instead rely on lookupKeyRead(),
+ * lookupKeyWrite() and lookupKeyReadWithFlags(). */
+robj *lookupKey(redisDb *db, robj *key, int flags) {
dictEntry *de = dictFind(db->dict,key->ptr);
if (de) {
robj *val = dictGetVal(de);
@@ -49,20 +58,49 @@ robj *lookupKey(redisDb *db, robj *key) {
/* Update the access time for the ageing algorithm.
* Don't do it if we have a saving child, as this will trigger
* a copy on write madness. */
- if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
- val->lru = LRU_CLOCK();
+ if (server.rdb_child_pid == -1 &&
+ server.aof_child_pid == -1 &&
+ !(flags & LOOKUP_NOTOUCH))
+ {
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ updateLFU(val);
+ } else {
+ val->lru = LRU_CLOCK();
+ }
+ }
return val;
} else {
return NULL;
}
}
-robj *lookupKeyRead(redisDb *db, robj *key) {
+/* Lookup a key for read operations, or return NULL if the key is not found
+ * in the specified DB.
+ *
+ * As a side effect of calling this function:
+ * 1. A key gets expired if it reached it's TTL.
+ * 2. The key last access time is updated.
+ * 3. The global keys hits/misses stats are updated (reported in INFO).
+ *
+ * This API should not be used when we write to the key after obtaining
+ * the object linked to the key, but only for read only operations.
+ *
+ * Flags change the behavior of this command:
+ *
+ * LOOKUP_NONE (or zero): no special flags are passed.
+ * LOOKUP_NOTOUCH: don't alter the last access time of the key.
+ *
+ * Note: this function also returns NULL if the key is logically expired
+ * but still existing, in case this is a slave, since this API is called only
+ * for read operations. Even if the key expiry is master-driven, we can
+ * correctly report a key is expired on slaves even if the master is lagging
+ * expiring our key via DELs in the replication link. */
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
robj *val;
if (expireIfNeeded(db,key) == 1) {
/* Key expired. If we are in the context of a master, expireIfNeeded()
- * returns 0 only when the key does not exist at all, so it's save
+ * returns 0 only when the key does not exist at all, so it's safe
* to return NULL ASAP. */
if (server.masterhost == NULL) return NULL;
@@ -75,18 +113,18 @@ robj *lookupKeyRead(redisDb *db, robj *key) {
* safety measure, the command invoked is a read-only command, we can
* safely return NULL here, and provide a more consistent behavior
* to clients accessign expired values in a read-only fashion, that
- * will say the key as non exisitng.
+ * will say the key as non existing.
*
* Notably this covers GETs when slaves are used to scale reads. */
if (server.current_client &&
server.current_client != server.master &&
server.current_client->cmd &&
- server.current_client->cmd->flags & REDIS_CMD_READONLY)
+ server.current_client->cmd->flags & CMD_READONLY)
{
return NULL;
}
}
- val = lookupKey(db,key);
+ val = lookupKey(db,key,flags);
if (val == NULL)
server.stat_keyspace_misses++;
else
@@ -94,18 +132,29 @@ robj *lookupKeyRead(redisDb *db, robj *key) {
return val;
}
+/* Like lookupKeyReadWithFlags(), but does not use any flag, which is the
+ * common case. */
+robj *lookupKeyRead(redisDb *db, robj *key) {
+ return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
+}
+
+/* Lookup a key for write operations, and as a side effect, if needed, expires
+ * the key if its TTL is reached.
+ *
+ * Returns the linked value object if the key exists or NULL if the key
+ * does not exist in the specified DB. */
robj *lookupKeyWrite(redisDb *db, robj *key) {
expireIfNeeded(db,key);
- return lookupKey(db,key);
+ return lookupKey(db,key,LOOKUP_NONE);
}
-robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyRead(c->db, key);
if (!o) addReply(c,reply);
return o;
}
-robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyWrite(c->db, key);
if (!o) addReply(c,reply);
return o;
@@ -119,10 +168,12 @@ void dbAdd(redisDb *db, robj *key, robj *val) {
sds copy = sdsdup(key->ptr);
int retval = dictAdd(db->dict, copy, val);
- redisAssertWithInfo(NULL,key,retval == REDIS_OK);
- if (val->type == REDIS_LIST) signalListAsReady(db, key);
+ serverAssertWithInfo(NULL,key,retval == DICT_OK);
+ if (val->type == OBJ_LIST ||
+ val->type == OBJ_ZSET)
+ signalKeyAsReady(db, key);
if (server.cluster_enabled) slotToKeyAdd(key);
- }
+}
/* Overwrite an existing key with a new value. Incrementing the reference
* count of the new value is up to the caller.
@@ -132,8 +183,20 @@ void dbAdd(redisDb *db, robj *key, robj *val) {
void dbOverwrite(redisDb *db, robj *key, robj *val) {
dictEntry *de = dictFind(db->dict,key->ptr);
- redisAssertWithInfo(NULL,key,de != NULL);
- dictReplace(db->dict, key->ptr, val);
+ serverAssertWithInfo(NULL,key,de != NULL);
+ dictEntry auxentry = *de;
+ robj *old = dictGetVal(de);
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ val->lru = old->lru;
+ }
+ dictSetVal(db->dict, de, val);
+
+ if (server.lazyfree_lazy_server_del) {
+ freeObjAsync(old);
+ dictSetVal(db->dict, &auxentry, NULL);
+ }
+
+ dictFreeVal(db->dict, &auxentry);
}
/* High level Set operation. This function can be used in order to set
@@ -141,7 +204,9 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) {
*
* 1) The ref count of the value object is incremented.
* 2) clients WATCHing for the destination key notified.
- * 3) The expire time of the key is reset (the key is made persistent). */
+ * 3) The expire time of the key is reset (the key is made persistent).
+ *
+ * All the new keys in the database should be craeted via this interface. */
void setKey(redisDb *db, robj *key, robj *val) {
if (lookupKeyWrite(db,key) == NULL) {
dbAdd(db,key,val);
@@ -163,6 +228,8 @@ int dbExists(redisDb *db, robj *key) {
* The function makes sure to return keys not already expired. */
robj *dbRandomKey(redisDb *db) {
dictEntry *de;
+ int maxtries = 100;
+ int allvolatile = dictSize(db->dict) == dictSize(db->expires);
while(1) {
sds key;
@@ -174,6 +241,17 @@ robj *dbRandomKey(redisDb *db) {
key = dictGetKey(de);
keyobj = createStringObject(key,sdslen(key));
if (dictFind(db->expires,key)) {
+ if (allvolatile && server.masterhost && --maxtries == 0) {
+ /* If the DB is composed only of keys with an expire set,
+ * it could happen that all the keys are already logically
+ * expired in the slave, so the function cannot stop because
+ * expireIfNeeded() is false, nor it can stop because
+ * dictGetRandomKey() returns NULL (there are keys to return).
+ * To prevent the infinite loop we do some tries, but if there
+ * are the conditions for an infinite loop, eventually we
+ * return a key name that may be already expired. */
+ return keyobj;
+ }
if (expireIfNeeded(db,keyobj)) {
decrRefCount(keyobj);
continue; /* search for another key. This expired. */
@@ -184,7 +262,7 @@ robj *dbRandomKey(redisDb *db) {
}
/* Delete a key, value, and associated expiration entry if any, from the DB */
-int dbDelete(redisDb *db, robj *key) {
+int dbSyncDelete(redisDb *db, robj *key) {
/* Deleting an entry from the expires dict will not free the sds of
* the key, because it is shared with the main dictionary. */
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
@@ -196,6 +274,13 @@ int dbDelete(redisDb *db, robj *key) {
}
}
+/* This is a wrapper whose behavior depends on the Redis lazy free
+ * configuration. Deletes the key synchronously or asynchronously. */
+int dbDelete(redisDb *db, robj *key) {
+ return server.lazyfree_lazy_server_del ? dbAsyncDelete(db,key) :
+ dbSyncDelete(db,key);
+}
+
/* Prepare the string object stored at 'key' to be modified destructively
* to implement commands like SETBIT or APPEND.
*
@@ -217,15 +302,15 @@ int dbDelete(redisDb *db, robj *key) {
* in 'db', the usage pattern looks like this:
*
* o = lookupKeyWrite(db,key);
- * if (checkType(c,o,REDIS_STRING)) return;
+ * if (checkType(c,o,OBJ_STRING)) return;
* o = dbUnshareStringValue(db,key,o);
*
* At this point the caller is ready to modify the object, for example
* using an sdscat() call to append some data, or anything else.
*/
robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
- redisAssert(o->type == REDIS_STRING);
- if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
+ serverAssert(o->type == OBJ_STRING);
+ if (o->refcount != 1 || o->encoding != OBJ_ENCODING_RAW) {
robj *decoded = getDecodedObject(o);
o = createRawStringObject(decoded->ptr, sdslen(decoded->ptr));
decrRefCount(decoded);
@@ -234,24 +319,62 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) {
return o;
}
-long long emptyDb(void(callback)(void*)) {
- int j;
+/* Remove all keys from all the databases in a Redis server.
+ * If callback is given the function is called from time to time to
+ * signal that work is in progress.
+ *
+ * The dbnum can be -1 if all the DBs should be flushed, or the specified
+ * DB number if we want to flush only a single Redis database number.
+ *
+ * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or
+ * EMPTYDB_ASYNC if we want the memory to be freed in a different thread
+ * and the function to return ASAP.
+ *
+ * On success the fuction returns the number of keys removed from the
+ * database(s). Otherwise -1 is returned in the specific case the
+ * DB number is out of range, and errno is set to EINVAL. */
+long long emptyDb(int dbnum, int flags, void(callback)(void*)) {
+ int async = (flags & EMPTYDB_ASYNC);
long long removed = 0;
- for (j = 0; j < server.dbnum; j++) {
+ if (dbnum < -1 || dbnum >= server.dbnum) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ int startdb, enddb;
+ if (dbnum == -1) {
+ startdb = 0;
+ enddb = server.dbnum-1;
+ } else {
+ startdb = enddb = dbnum;
+ }
+
+ for (int j = startdb; j <= enddb; j++) {
removed += dictSize(server.db[j].dict);
- dictEmpty(server.db[j].dict,callback);
- dictEmpty(server.db[j].expires,callback);
+ if (async) {
+ emptyDbAsync(&server.db[j]);
+ } else {
+ dictEmpty(server.db[j].dict,callback);
+ dictEmpty(server.db[j].expires,callback);
+ }
}
- if (server.cluster_enabled) slotToKeyFlush();
+ if (server.cluster_enabled) {
+ if (async) {
+ slotToKeyFlushAsync();
+ } else {
+ slotToKeyFlush();
+ }
+ }
+ if (dbnum == -1) flushSlaveKeysWithExpireList();
return removed;
}
-int selectDb(redisClient *c, int id) {
+int selectDb(client *c, int id) {
if (id < 0 || id >= server.dbnum)
- return REDIS_ERR;
+ return C_ERR;
c->db = &server.db[id];
- return REDIS_OK;
+ return C_OK;
}
/*-----------------------------------------------------------------------------
@@ -275,18 +398,49 @@ void signalFlushedDb(int dbid) {
* Type agnostic commands operating on the key space
*----------------------------------------------------------------------------*/
-void flushdbCommand(redisClient *c) {
- server.dirty += dictSize(c->db->dict);
+/* Return the set of flags to use for the emptyDb() call for FLUSHALL
+ * and FLUSHDB commands.
+ *
+ * Currently the command just attempts to parse the "ASYNC" option. It
+ * also checks if the command arity is wrong.
+ *
+ * On success C_OK is returned and the flags are stored in *flags, otherwise
+ * C_ERR is returned and the function sends an error to the client. */
+int getFlushCommandFlags(client *c, int *flags) {
+ /* Parse the optional ASYNC option. */
+ if (c->argc > 1) {
+ if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
+ addReply(c,shared.syntaxerr);
+ return C_ERR;
+ }
+ *flags = EMPTYDB_ASYNC;
+ } else {
+ *flags = EMPTYDB_NO_FLAGS;
+ }
+ return C_OK;
+}
+
+/* FLUSHDB [ASYNC]
+ *
+ * Flushes the currently SELECTed Redis DB. */
+void flushdbCommand(client *c) {
+ int flags;
+
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
signalFlushedDb(c->db->id);
- dictEmpty(c->db->dict,NULL);
- dictEmpty(c->db->expires,NULL);
- if (server.cluster_enabled) slotToKeyFlush();
+ server.dirty += emptyDb(c->db->id,flags,NULL);
addReply(c,shared.ok);
}
-void flushallCommand(redisClient *c) {
+/* FLUSHALL [ASYNC]
+ *
+ * Flushes the whole server data set. */
+void flushallCommand(client *c) {
+ int flags;
+
+ if (getFlushCommandFlags(c,&flags) == C_ERR) return;
signalFlushedDb(-1);
- server.dirty += emptyDb(NULL);
+ server.dirty += emptyDb(-1,flags,NULL);
addReply(c,shared.ok);
if (server.rdb_child_pid != -1) {
kill(server.rdb_child_pid,SIGUSR1);
@@ -296,56 +450,72 @@ void flushallCommand(redisClient *c) {
/* Normally rdbSave() will reset dirty, but we don't want this here
* as otherwise FLUSHALL will not be replicated nor put into the AOF. */
int saved_dirty = server.dirty;
- rdbSave(server.rdb_filename);
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ rdbSave(server.rdb_filename,rsiptr);
server.dirty = saved_dirty;
}
server.dirty++;
}
-void delCommand(redisClient *c) {
- int deleted = 0, j;
+/* This command implements DEL and LAZYDEL. */
+void delGenericCommand(client *c, int lazy) {
+ int numdel = 0, j;
for (j = 1; j < c->argc; j++) {
expireIfNeeded(c->db,c->argv[j]);
- if (dbDelete(c->db,c->argv[j])) {
+ int deleted = lazy ? dbAsyncDelete(c->db,c->argv[j]) :
+ dbSyncDelete(c->db,c->argv[j]);
+ if (deleted) {
signalModifiedKey(c->db,c->argv[j]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,
+ notifyKeyspaceEvent(NOTIFY_GENERIC,
"del",c->argv[j],c->db->id);
server.dirty++;
- deleted++;
+ numdel++;
}
}
- addReplyLongLong(c,deleted);
+ addReplyLongLong(c,numdel);
}
-void existsCommand(redisClient *c) {
- expireIfNeeded(c->db,c->argv[1]);
- if (dbExists(c->db,c->argv[1])) {
- addReply(c, shared.cone);
- } else {
- addReply(c, shared.czero);
+void delCommand(client *c) {
+ delGenericCommand(c,0);
+}
+
+void unlinkCommand(client *c) {
+ delGenericCommand(c,1);
+}
+
+/* EXISTS key1 key2 ... key_N.
+ * Return value is the number of keys existing. */
+void existsCommand(client *c) {
+ long long count = 0;
+ int j;
+
+ for (j = 1; j < c->argc; j++) {
+ if (lookupKeyRead(c->db,c->argv[j])) count++;
}
+ addReplyLongLong(c,count);
}
-void selectCommand(redisClient *c) {
+void selectCommand(client *c) {
long id;
if (getLongFromObjectOrReply(c, c->argv[1], &id,
- "invalid DB index") != REDIS_OK)
+ "invalid DB index") != C_OK)
return;
if (server.cluster_enabled && id != 0) {
addReplyError(c,"SELECT is not allowed in cluster mode");
return;
}
- if (selectDb(c,id) == REDIS_ERR) {
- addReplyError(c,"invalid DB index");
+ if (selectDb(c,id) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
} else {
addReply(c,shared.ok);
}
}
-void randomkeyCommand(redisClient *c) {
+void randomkeyCommand(client *c) {
robj *key;
if ((key = dbRandomKey(c->db)) == NULL) {
@@ -357,7 +527,7 @@ void randomkeyCommand(redisClient *c) {
decrRefCount(key);
}
-void keysCommand(redisClient *c) {
+void keysCommand(client *c) {
dictIterator *di;
dictEntry *de;
sds pattern = c->argv[1]->ptr;
@@ -395,20 +565,20 @@ void scanCallback(void *privdata, const dictEntry *de) {
if (o == NULL) {
sds sdskey = dictGetKey(de);
key = createStringObject(sdskey, sdslen(sdskey));
- } else if (o->type == REDIS_SET) {
- key = dictGetKey(de);
- incrRefCount(key);
- } else if (o->type == REDIS_HASH) {
- key = dictGetKey(de);
- incrRefCount(key);
- val = dictGetVal(de);
- incrRefCount(val);
- } else if (o->type == REDIS_ZSET) {
- key = dictGetKey(de);
- incrRefCount(key);
+ } else if (o->type == OBJ_SET) {
+ sds keysds = dictGetKey(de);
+ key = createStringObject(keysds,sdslen(keysds));
+ } else if (o->type == OBJ_HASH) {
+ sds sdskey = dictGetKey(de);
+ sds sdsval = dictGetVal(de);
+ key = createStringObject(sdskey,sdslen(sdskey));
+ val = createStringObject(sdsval,sdslen(sdsval));
+ } else if (o->type == OBJ_ZSET) {
+ sds sdskey = dictGetKey(de);
+ key = createStringObject(sdskey,sdslen(sdskey));
val = createStringObjectFromLongDouble(*(double*)dictGetVal(de),0);
} else {
- redisPanic("Type not handled in SCAN callback.");
+ serverPanic("Type not handled in SCAN callback.");
}
listAddNodeTail(keys, key);
@@ -417,9 +587,9 @@ void scanCallback(void *privdata, const dictEntry *de) {
/* Try to parse a SCAN cursor stored at object 'o':
* if the cursor is valid, store it as unsigned integer into *cursor and
- * returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the
+ * returns C_OK. Otherwise return C_ERR and send an error to the
* client. */
-int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) {
char *eptr;
/* Use strtoul() because we need an *unsigned* long, so
@@ -429,9 +599,9 @@ int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' || errno == ERANGE)
{
addReplyError(c, "invalid cursor");
- return REDIS_ERR;
+ return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* This command implements SCAN, HSCAN and SSCAN commands.
@@ -445,7 +615,7 @@ int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor) {
*
* In the case of a Hash object the function returns both the field and value
* of every element on the Hash. */
-void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
+void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
int i, j;
list *keys = listCreate();
listNode *node, *nextnode;
@@ -456,8 +626,8 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
/* Object must be NULL (to iterate keys names), or the type of the object
* must be Set, Sorted Set, or Hash. */
- redisAssert(o == NULL || o->type == REDIS_SET || o->type == REDIS_HASH ||
- o->type == REDIS_ZSET);
+ serverAssert(o == NULL || o->type == OBJ_SET || o->type == OBJ_HASH ||
+ o->type == OBJ_ZSET);
/* Set i to the first option argument. The previous one is the cursor. */
i = (o == NULL) ? 2 : 3; /* Skip the key argument if needed. */
@@ -467,7 +637,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
j = c->argc - i;
if (!strcasecmp(c->argv[i]->ptr, "count") && j >= 2) {
if (getLongFromObjectOrReply(c, c->argv[i+1], &count, NULL)
- != REDIS_OK)
+ != C_OK)
{
goto cleanup;
}
@@ -505,12 +675,12 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
ht = NULL;
if (o == NULL) {
ht = c->db->dict;
- } else if (o->type == REDIS_SET && o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
- } else if (o->type == REDIS_HASH && o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
ht = o->ptr;
count *= 2; /* We return key / value for this type. */
- } else if (o->type == REDIS_ZSET && o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
ht = zs->dict;
count *= 2; /* We return key / value for this type. */
@@ -530,18 +700,18 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
privdata[0] = keys;
privdata[1] = o;
do {
- cursor = dictScan(ht, cursor, scanCallback, privdata);
+ cursor = dictScan(ht, cursor, scanCallback, NULL, privdata);
} while (cursor &&
maxiterations-- &&
listLength(keys) < (unsigned long)count);
- } else if (o->type == REDIS_SET) {
+ } else if (o->type == OBJ_SET) {
int pos = 0;
int64_t ll;
while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
- } else if (o->type == REDIS_HASH || o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
unsigned char *p = ziplistIndex(o->ptr,0);
unsigned char *vstr;
unsigned int vlen;
@@ -556,7 +726,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
}
cursor = 0;
} else {
- redisPanic("Not handled encoding in SCAN.");
+ serverPanic("Not handled encoding in SCAN.");
}
/* Step 3: Filter elements. */
@@ -572,10 +742,10 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
filter = 1;
} else {
- char buf[REDIS_LONGSTR_SIZE];
+ char buf[LONG_STR_SIZE];
int len;
- redisAssert(kobj->encoding == REDIS_ENCODING_INT);
+ serverAssert(kobj->encoding == OBJ_ENCODING_INT);
len = ll2string(buf,sizeof(buf),(long)kobj->ptr);
if (!stringmatchlen(pat, patlen, buf, len, 0)) filter = 1;
}
@@ -593,7 +763,7 @@ void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor) {
/* If this is a hash or a sorted set, we have a flat list of
* key-value elements, so if this element was filtered, remove the
* value, or skip it if it was not filtered: we only match keys. */
- if (o && (o->type == REDIS_ZSET || o->type == REDIS_HASH)) {
+ if (o && (o->type == OBJ_ZSET || o->type == OBJ_HASH)) {
node = nextnode;
nextnode = listNextNode(node);
if (filter) {
@@ -623,41 +793,46 @@ cleanup:
}
/* The SCAN command completely relies on scanGenericCommand. */
-void scanCommand(redisClient *c) {
+void scanCommand(client *c) {
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[1],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[1],&cursor) == C_ERR) return;
scanGenericCommand(c,NULL,cursor);
}
-void dbsizeCommand(redisClient *c) {
+void dbsizeCommand(client *c) {
addReplyLongLong(c,dictSize(c->db->dict));
}
-void lastsaveCommand(redisClient *c) {
+void lastsaveCommand(client *c) {
addReplyLongLong(c,server.lastsave);
}
-void typeCommand(redisClient *c) {
+void typeCommand(client *c) {
robj *o;
char *type;
- o = lookupKeyRead(c->db,c->argv[1]);
+ o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
if (o == NULL) {
type = "none";
} else {
switch(o->type) {
- case REDIS_STRING: type = "string"; break;
- case REDIS_LIST: type = "list"; break;
- case REDIS_SET: type = "set"; break;
- case REDIS_ZSET: type = "zset"; break;
- case REDIS_HASH: type = "hash"; break;
+ case OBJ_STRING: type = "string"; break;
+ case OBJ_LIST: type = "list"; break;
+ case OBJ_SET: type = "set"; break;
+ case OBJ_ZSET: type = "zset"; break;
+ case OBJ_HASH: type = "hash"; break;
+ case OBJ_STREAM: type = "stream"; break;
+ case OBJ_MODULE: {
+ moduleValue *mv = o->ptr;
+ type = mv->type->name;
+ }; break;
default: type = "unknown"; break;
}
}
addReplyStatus(c,type);
}
-void shutdownCommand(redisClient *c) {
+void shutdownCommand(client *c) {
int flags = 0;
if (c->argc > 2) {
@@ -665,9 +840,9 @@ void shutdownCommand(redisClient *c) {
return;
} else if (c->argc == 2) {
if (!strcasecmp(c->argv[1]->ptr,"nosave")) {
- flags |= REDIS_SHUTDOWN_NOSAVE;
+ flags |= SHUTDOWN_NOSAVE;
} else if (!strcasecmp(c->argv[1]->ptr,"save")) {
- flags |= REDIS_SHUTDOWN_SAVE;
+ flags |= SHUTDOWN_SAVE;
} else {
addReply(c,shared.syntaxerr);
return;
@@ -680,24 +855,28 @@ void shutdownCommand(redisClient *c) {
*
* Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
if (server.loading || server.sentinel_mode)
- flags = (flags & ~REDIS_SHUTDOWN_SAVE) | REDIS_SHUTDOWN_NOSAVE;
- if (prepareForShutdown(flags) == REDIS_OK) exit(0);
+ flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
+ if (prepareForShutdown(flags) == C_OK) exit(0);
addReplyError(c,"Errors trying to SHUTDOWN. Check logs.");
}
-void renameGenericCommand(redisClient *c, int nx) {
+void renameGenericCommand(client *c, int nx) {
robj *o;
long long expire;
+ int samekey = 0;
- /* To use the same key as src and dst is probably an error */
- if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
- addReply(c,shared.sameobjecterr);
- return;
- }
+ /* When source and dest key is the same, no operation is performed,
+ * if the key exists, however we still return an error on unexisting key. */
+ if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) samekey = 1;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
return;
+ if (samekey) {
+ addReply(c,nx ? shared.czero : shared.ok);
+ return;
+ }
+
incrRefCount(o);
expire = getExpire(c->db,c->argv[1]);
if (lookupKeyWrite(c->db,c->argv[2]) != NULL) {
@@ -711,31 +890,31 @@ void renameGenericCommand(redisClient *c, int nx) {
dbDelete(c->db,c->argv[2]);
}
dbAdd(c->db,c->argv[2],o);
- if (expire != -1) setExpire(c->db,c->argv[2],expire);
+ if (expire != -1) setExpire(c,c->db,c->argv[2],expire);
dbDelete(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[1]);
signalModifiedKey(c->db,c->argv[2]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"rename_from",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from",
c->argv[1],c->db->id);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"rename_to",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to",
c->argv[2],c->db->id);
server.dirty++;
addReply(c,nx ? shared.cone : shared.ok);
}
-void renameCommand(redisClient *c) {
+void renameCommand(client *c) {
renameGenericCommand(c,0);
}
-void renamenxCommand(redisClient *c) {
+void renamenxCommand(client *c) {
renameGenericCommand(c,1);
}
-void moveCommand(redisClient *c) {
+void moveCommand(client *c) {
robj *o;
redisDb *src, *dst;
int srcid;
- long long dbid;
+ long long dbid, expire;
if (server.cluster_enabled) {
addReplyError(c,"MOVE is not allowed in cluster mode");
@@ -746,9 +925,9 @@ void moveCommand(redisClient *c) {
src = c->db;
srcid = c->db->id;
- if (getLongLongFromObject(c->argv[2],&dbid) == REDIS_ERR ||
+ if (getLongLongFromObject(c->argv[2],&dbid) == C_ERR ||
dbid < INT_MIN || dbid > INT_MAX ||
- selectDb(c,dbid) == REDIS_ERR)
+ selectDb(c,dbid) == C_ERR)
{
addReply(c,shared.outofrangeerr);
return;
@@ -769,6 +948,7 @@ void moveCommand(redisClient *c) {
addReply(c,shared.czero);
return;
}
+ expire = getExpire(c->db,c->argv[1]);
/* Return zero if the key already exists in the target DB */
if (lookupKeyWrite(dst,c->argv[1]) != NULL) {
@@ -776,6 +956,7 @@ void moveCommand(redisClient *c) {
return;
}
dbAdd(dst,c->argv[1],o);
+ if (expire != -1) setExpire(c,dst,c->argv[1],expire);
incrRefCount(o);
/* OK! key moved, free the entry in the source DB */
@@ -784,6 +965,93 @@ void moveCommand(redisClient *c) {
addReply(c,shared.cone);
}
+/* Helper function for dbSwapDatabases(): scans the list of keys that have
+ * one or more blocked clients for B[LR]POP or other blocking commands
+ * and signal the keys as ready if they are of the right type. See the comment
+ * where the function is used for more info. */
+void scanDatabaseForReadyLists(redisDb *db) {
+ dictEntry *de;
+ dictIterator *di = dictGetSafeIterator(db->blocking_keys);
+ while((de = dictNext(di)) != NULL) {
+ robj *key = dictGetKey(de);
+ robj *value = lookupKey(db,key,LOOKUP_NOTOUCH);
+ if (value && (value->type == OBJ_LIST ||
+ value->type == OBJ_STREAM ||
+ value->type == OBJ_ZSET))
+ signalKeyAsReady(db, key);
+ }
+ dictReleaseIterator(di);
+}
+
+/* Swap two databases at runtime so that all clients will magically see
+ * the new database even if already connected. Note that the client
+ * structure c->db points to a given DB, so we need to be smarter and
+ * swap the underlying referenced structures, otherwise we would need
+ * to fix all the references to the Redis DB structure.
+ *
+ * Returns C_ERR if at least one of the DB ids are out of range, otherwise
+ * C_OK is returned. */
+int dbSwapDatabases(int id1, int id2) {
+ if (id1 < 0 || id1 >= server.dbnum ||
+ id2 < 0 || id2 >= server.dbnum) return C_ERR;
+ if (id1 == id2) return C_OK;
+ redisDb aux = server.db[id1];
+ redisDb *db1 = &server.db[id1], *db2 = &server.db[id2];
+
+ /* Swap hash tables. Note that we don't swap blocking_keys,
+ * ready_keys and watched_keys, since we want clients to
+ * remain in the same DB they were. */
+ db1->dict = db2->dict;
+ db1->expires = db2->expires;
+ db1->avg_ttl = db2->avg_ttl;
+
+ db2->dict = aux.dict;
+ db2->expires = aux.expires;
+ db2->avg_ttl = aux.avg_ttl;
+
+ /* Now we need to handle clients blocked on lists: as an effect
+ * of swapping the two DBs, a client that was waiting for list
+ * X in a given DB, may now actually be unblocked if X happens
+ * to exist in the new version of the DB, after the swap.
+ *
+ * However normally we only do this check for efficiency reasons
+ * in dbAdd() when a list is created. So here we need to rescan
+ * the list of clients blocked on lists and signal lists as ready
+ * if needed. */
+ scanDatabaseForReadyLists(db1);
+ scanDatabaseForReadyLists(db2);
+ return C_OK;
+}
+
+/* SWAPDB db1 db2 */
+void swapdbCommand(client *c) {
+ long id1, id2;
+
+ /* Not allowed in cluster mode: we have just DB 0 there. */
+ if (server.cluster_enabled) {
+ addReplyError(c,"SWAPDB is not allowed in cluster mode");
+ return;
+ }
+
+ /* Get the two DBs indexes. */
+ if (getLongFromObjectOrReply(c, c->argv[1], &id1,
+ "invalid first DB index") != C_OK)
+ return;
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &id2,
+ "invalid second DB index") != C_OK)
+ return;
+
+ /* Swap... */
+ if (dbSwapDatabases(id1,id2) == C_ERR) {
+ addReplyError(c,"DB index is out of range");
+ return;
+ } else {
+ server.dirty++;
+ addReply(c,shared.ok);
+ }
+}
+
/*-----------------------------------------------------------------------------
* Expires API
*----------------------------------------------------------------------------*/
@@ -791,18 +1059,26 @@ void moveCommand(redisClient *c) {
int removeExpire(redisDb *db, robj *key) {
/* An expire may only be removed if there is a corresponding entry in the
* main dict. Otherwise, the key will never be freed. */
- redisAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
+ serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
return dictDelete(db->expires,key->ptr) == DICT_OK;
}
-void setExpire(redisDb *db, robj *key, long long when) {
+/* Set an expire to the specified key. If the expire is set in the context
+ * of an user calling a command 'c' is the client, otherwise 'c' is set
+ * to NULL. The 'when' parameter is the absolute unix time in milliseconds
+ * after which the key will no longer be considered valid. */
+void setExpire(client *c, redisDb *db, robj *key, long long when) {
dictEntry *kde, *de;
/* Reuse the sds from the main dict in the expire dict */
kde = dictFind(db->dict,key->ptr);
- redisAssertWithInfo(NULL,key,kde != NULL);
- de = dictReplaceRaw(db->expires,dictGetKey(kde));
+ serverAssertWithInfo(NULL,key,kde != NULL);
+ de = dictAddOrFind(db->expires,dictGetKey(kde));
dictSetSignedIntegerVal(de,when);
+
+ int writable_slave = server.masterhost && server.repl_slave_ro == 0;
+ if (c && writable_slave && !(c->flags & CLIENT_MASTER))
+ rememberSlaveKeyWithExpire(db,key);
}
/* Return the expire time of the specified key, or -1 if no expire
@@ -816,7 +1092,7 @@ long long getExpire(redisDb *db, robj *key) {
/* The entry was found in the expire dict, this means it should also
* be present in the main dict (safety check). */
- redisAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
+ serverAssertWithInfo(NULL,key,dictFind(db->dict,key->ptr) != NULL);
return dictGetSignedIntegerVal(de);
}
@@ -828,15 +1104,15 @@ long long getExpire(redisDb *db, robj *key) {
* AOF and the master->slave link guarantee operation ordering, everything
* will be consistent even if we allow write operations against expiring
* keys. */
-void propagateExpire(redisDb *db, robj *key) {
+void propagateExpire(redisDb *db, robj *key, int lazy) {
robj *argv[2];
- argv[0] = shared.del;
+ argv[0] = lazy ? shared.unlink : shared.del;
argv[1] = key;
incrRefCount(argv[0]);
incrRefCount(argv[1]);
- if (server.aof_state != REDIS_AOF_OFF)
+ if (server.aof_state != AOF_OFF)
feedAppendOnlyFile(server.delCommand,db->id,argv,2);
replicationFeedSlaves(server.slaves,db->id,argv,2);
@@ -844,6 +1120,25 @@ void propagateExpire(redisDb *db, robj *key) {
decrRefCount(argv[1]);
}
+/* This function is called when we are going to perform some operation
+ * in a given key, but such key may be already logically expired even if
+ * it still exists in the database. The main way this function is called
+ * is via lookupKey*() family of functions.
+ *
+ * The behavior of the function depends on the replication role of the
+ * instance, because slave instances do not expire keys, they wait
+ * for DELs from the master for consistency matters. However even
+ * slaves will try to have a coherent return value for the function,
+ * so that read commands executed in the slave side will be able to
+ * behave like if the key is expired even if still present (because the
+ * master has yet to propagate the DEL).
+ *
+ * In masters as a side effect of finding a key which is expired, such
+ * key will be evicted from the database. Also this may trigger the
+ * propagation of a DEL/UNLINK command in AOF / replication stream.
+ *
+ * The return value of the function is 0 if the key is still valid,
+ * otherwise the function returns 1 if the key is expired. */
int expireIfNeeded(redisDb *db, robj *key) {
mstime_t when = getExpire(db,key);
mstime_t now;
@@ -853,7 +1148,7 @@ int expireIfNeeded(redisDb *db, robj *key) {
/* Don't expire anything while loading. It will be done later. */
if (server.loading) return 0;
- /* If we are in the context of a Lua script, we claim that time is
+ /* If we are in the context of a Lua script, we pretend that time is
* blocked to when the Lua script started. This way a key can expire
* only the first time it is accessed and not in the middle of the
* script execution, making propagation to slaves / AOF consistent.
@@ -874,129 +1169,11 @@ int expireIfNeeded(redisDb *db, robj *key) {
/* Delete the key */
server.stat_expiredkeys++;
- propagateExpire(db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_EXPIRED,
+ propagateExpire(db,key,server.lazyfree_lazy_expire);
+ notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",key,db->id);
- return dbDelete(db,key);
-}
-
-/*-----------------------------------------------------------------------------
- * Expires Commands
- *----------------------------------------------------------------------------*/
-
-/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
- * and PEXPIREAT. Because the commad second argument may be relative or absolute
- * the "basetime" argument is used to signal what the base time is (either 0
- * for *AT variants of the command, or the current time for relative expires).
- *
- * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
- * the argv[2] parameter. The basetime is always specified in milliseconds. */
-void expireGenericCommand(redisClient *c, long long basetime, int unit) {
- robj *key = c->argv[1], *param = c->argv[2];
- long long when; /* unix time in milliseconds when the key will expire. */
-
- if (getLongLongFromObjectOrReply(c, param, &when, NULL) != REDIS_OK)
- return;
-
- if (unit == UNIT_SECONDS) when *= 1000;
- when += basetime;
-
- /* No key, return zero. */
- if (lookupKeyWrite(c->db,key) == NULL) {
- addReply(c,shared.czero);
- return;
- }
-
- /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
- * should never be executed as a DEL when load the AOF or in the context
- * of a slave instance.
- *
- * Instead we take the other branch of the IF statement setting an expire
- * (possibly in the past) and wait for an explicit DEL from the master. */
- if (when <= mstime() && !server.loading && !server.masterhost) {
- robj *aux;
-
- redisAssertWithInfo(c,key,dbDelete(c->db,key));
- server.dirty++;
-
- /* Replicate/AOF this as an explicit DEL. */
- aux = createStringObject("DEL",3);
- rewriteClientCommandVector(c,2,aux,key);
- decrRefCount(aux);
- signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
- addReply(c, shared.cone);
- return;
- } else {
- setExpire(c->db,key,when);
- addReply(c,shared.cone);
- signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"expire",key,c->db->id);
- server.dirty++;
- return;
- }
-}
-
-void expireCommand(redisClient *c) {
- expireGenericCommand(c,mstime(),UNIT_SECONDS);
-}
-
-void expireatCommand(redisClient *c) {
- expireGenericCommand(c,0,UNIT_SECONDS);
-}
-
-void pexpireCommand(redisClient *c) {
- expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
-}
-
-void pexpireatCommand(redisClient *c) {
- expireGenericCommand(c,0,UNIT_MILLISECONDS);
-}
-
-void ttlGenericCommand(redisClient *c, int output_ms) {
- long long expire, ttl = -1;
-
- /* If the key does not exist at all, return -2 */
- if (lookupKeyRead(c->db,c->argv[1]) == NULL) {
- addReplyLongLong(c,-2);
- return;
- }
- /* The key exists. Return -1 if it has no expire, or the actual
- * TTL value otherwise. */
- expire = getExpire(c->db,c->argv[1]);
- if (expire != -1) {
- ttl = expire-mstime();
- if (ttl < 0) ttl = 0;
- }
- if (ttl == -1) {
- addReplyLongLong(c,-1);
- } else {
- addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
- }
-}
-
-void ttlCommand(redisClient *c) {
- ttlGenericCommand(c, 0);
-}
-
-void pttlCommand(redisClient *c) {
- ttlGenericCommand(c, 1);
-}
-
-void persistCommand(redisClient *c) {
- dictEntry *de;
-
- de = dictFind(c->db->dict,c->argv[1]->ptr);
- if (de == NULL) {
- addReply(c,shared.czero);
- } else {
- if (removeExpire(c->db,c->argv[1])) {
- addReply(c,shared.cone);
- server.dirty++;
- } else {
- addReply(c,shared.czero);
- }
- }
+ return server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
+ dbSyncDelete(db,key);
}
/* -----------------------------------------------------------------------------
@@ -1007,17 +1184,32 @@ void persistCommand(redisClient *c) {
* (firstkey, lastkey, step). */
int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) {
int j, i = 0, last, *keys;
- REDIS_NOTUSED(argv);
+ UNUSED(argv);
if (cmd->firstkey == 0) {
*numkeys = 0;
return NULL;
}
+
last = cmd->lastkey;
if (last < 0) last = argc+last;
keys = zmalloc(sizeof(int)*((last - cmd->firstkey)+1));
for (j = cmd->firstkey; j <= last; j += cmd->keystep) {
- redisAssert(j < argc);
+ if (j >= argc) {
+ /* Modules commands, and standard commands with a not fixed number
+ * of arguments (negative arity parameter) do not have dispatch
+ * time arity checks, so we need to handle the case where the user
+ * passed an invalid number of arguments here. In this case we
+ * return no keys and expect the command implementation to report
+ * an arity or syntax error. */
+ if (cmd->flags & CMD_MODULE || cmd->arity < 0) {
+ zfree(keys);
+ *numkeys = 0;
+ return NULL;
+ } else {
+ serverPanic("Redis built-in command declared keys positions not matching the arity requirements.");
+ }
+ }
keys[i++] = j;
}
*numkeys = i;
@@ -1036,7 +1228,9 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in
* This function uses the command table if a command-specific helper function
* is not required, otherwise it calls the command-specific function. */
int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
- if (cmd->getkeys_proc) {
+ if (cmd->flags & CMD_MODULE_GETKEYS) {
+ return moduleGetCommandKeysViaAPI(cmd,argv,argc,numkeys);
+ } else if (!(cmd->flags & CMD_MODULE) && cmd->getkeys_proc) {
return cmd->getkeys_proc(cmd,argv,argc,numkeys);
} else {
return getKeysUsingCommandTable(cmd,argv,argc,numkeys);
@@ -1053,12 +1247,12 @@ void getKeysFreeResult(int *result) {
* ZINTERSTORE <destkey> <num-keys> <key> <key> ... <key> <options> */
int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
- REDIS_NOTUSED(cmd);
+ UNUSED(cmd);
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
- if (num > (argc-3)) {
+ if (num < 1 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@@ -1082,12 +1276,12 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu
* EVALSHA <script> <num-keys> <key> <key> ... <key> [more stuff] */
int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, num, *keys;
- REDIS_NOTUSED(cmd);
+ UNUSED(cmd);
num = atoi(argv[2]->ptr);
/* Sanity check. Don't return any key if the command is going to
* reply with syntax error. */
- if (num > (argc-3)) {
+ if (num <= 0 || num > (argc-3)) {
*numkeys = 0;
return NULL;
}
@@ -1110,7 +1304,7 @@ int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
* correctly identify keys in the "STORE" option. */
int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
int i, j, num, *keys, found_store = 0;
- REDIS_NOTUSED(cmd);
+ UNUSED(cmd);
num = 0;
keys = zmalloc(sizeof(int)*2); /* Alloc 2 places for the worst case. */
@@ -1150,90 +1344,196 @@ int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
return keys;
}
+int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ int i, num, first, *keys;
+ UNUSED(cmd);
+
+ /* Assume the obvious form. */
+ first = 3;
+ num = 1;
+
+ /* But check for the extended one with the KEYS option. */
+ if (argc > 6) {
+ for (i = 6; i < argc; i++) {
+ if (!strcasecmp(argv[i]->ptr,"keys") &&
+ sdslen(argv[3]->ptr) == 0)
+ {
+ first = i+1;
+ num = argc-first;
+ break;
+ }
+ }
+ }
+
+ keys = zmalloc(sizeof(int)*num);
+ for (i = 0; i < num; i++) keys[i] = first+i;
+ *numkeys = num;
+ return keys;
+}
+
+/* Helper function to extract keys from following commands:
+ * GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count] [STORE key] [STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ... */
+int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ int i, num, *keys;
+ UNUSED(cmd);
+
+ /* Check for the presence of the stored key in the command */
+ int stored_key = -1;
+ for (i = 5; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ /* For the case when user specifies both "store" and "storedist" options, the
+ * second key specified would override the first key. This behavior is kept
+ * the same as in georadiusCommand method.
+ */
+ if ((!strcasecmp(arg, "store") || !strcasecmp(arg, "storedist")) && ((i+1) < argc)) {
+ stored_key = i+1;
+ i++;
+ }
+ }
+ num = 1 + (stored_key == -1 ? 0 : 1);
+
+ /* Keys in the command come from two places:
+ * argv[1] = key,
+ * argv[5...n] = stored key if present
+ */
+ keys = zmalloc(sizeof(int) * num);
+
+ /* Add all key positions to keys[] */
+ keys[0] = 1;
+ if(num > 1) {
+ keys[1] = stored_key;
+ }
+ *numkeys = num;
+ return keys;
+}
+
+/* XREAD [BLOCK <milliseconds>] [COUNT <count>] [GROUP <groupname> <ttl>]
+ * STREAMS key_1 key_2 ... key_N ID_1 ID_2 ... ID_N */
+int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ int i, num = 0, *keys;
+ UNUSED(cmd);
+
+ /* We need to parse the options of the command in order to seek the first
+ * "STREAMS" string which is actually the option. This is needed because
+ * "STREAMS" could also be the name of the consumer group and even the
+ * name of the stream key. */
+ int streams_pos = -1;
+ for (i = 1; i < argc; i++) {
+ char *arg = argv[i]->ptr;
+ if (!strcasecmp(arg, "block")) {
+ i++; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "count")) {
+ i++; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "group")) {
+ i += 2; /* Skip option argument. */
+ } else if (!strcasecmp(arg, "noack")) {
+ /* Nothing to do. */
+ } else if (!strcasecmp(arg, "streams")) {
+ streams_pos = i;
+ break;
+ } else {
+ break; /* Syntax error. */
+ }
+ }
+ if (streams_pos != -1) num = argc - streams_pos - 1;
+
+ /* Syntax error. */
+ if (streams_pos == -1 || num == 0 || num % 2 != 0) {
+ *numkeys = 0;
+ return NULL;
+ }
+ num /= 2; /* We have half the keys as there are arguments because
+ there are also the IDs, one per key. */
+
+ keys = zmalloc(sizeof(int) * num);
+ for (i = streams_pos+1; i < argc-num; i++) keys[i-streams_pos-1] = i;
+ *numkeys = num;
+ return keys;
+}
+
/* Slot to Key API. This is used by Redis Cluster in order to obtain in
* a fast way a key that belongs to a specified hash slot. This is useful
- * while rehashing the cluster. */
-void slotToKeyAdd(robj *key) {
+ * while rehashing the cluster and in other conditions when we need to
+ * understand if we have keys for a given hash slot. */
+void slotToKeyUpdateKey(robj *key, int add) {
unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr));
+ unsigned char buf[64];
+ unsigned char *indexed = buf;
+ size_t keylen = sdslen(key->ptr);
+
+ server.cluster->slots_keys_count[hashslot] += add ? 1 : -1;
+ if (keylen+2 > 64) indexed = zmalloc(keylen+2);
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ memcpy(indexed+2,key->ptr,keylen);
+ if (add) {
+ raxInsert(server.cluster->slots_to_keys,indexed,keylen+2,NULL,NULL);
+ } else {
+ raxRemove(server.cluster->slots_to_keys,indexed,keylen+2,NULL);
+ }
+ if (indexed != buf) zfree(indexed);
+}
- zslInsert(server.cluster->slots_to_keys,hashslot,key);
- incrRefCount(key);
+void slotToKeyAdd(robj *key) {
+ slotToKeyUpdateKey(key,1);
}
void slotToKeyDel(robj *key) {
- unsigned int hashslot = keyHashSlot(key->ptr,sdslen(key->ptr));
-
- zslDelete(server.cluster->slots_to_keys,hashslot,key);
+ slotToKeyUpdateKey(key,0);
}
void slotToKeyFlush(void) {
- zslFree(server.cluster->slots_to_keys);
- server.cluster->slots_to_keys = zslCreate();
+ raxFree(server.cluster->slots_to_keys);
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
}
+/* Pupulate the specified array of objects with keys in the specified slot.
+ * New objects are returned to represent keys, it's up to the caller to
+ * decrement the reference count to release the keys names. */
unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count) {
- zskiplistNode *n;
- zrangespec range;
+ raxIterator iter;
int j = 0;
-
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
-
- n = zslFirstInRange(server.cluster->slots_to_keys, &range);
- while(n && n->score == hashslot && count--) {
- keys[j++] = n->obj;
- n = n->level[0].forward;
+ unsigned char indexed[2];
+
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_keys);
+ raxSeek(&iter,">=",indexed,2);
+ while(count-- && raxNext(&iter)) {
+ if (iter.key[0] != indexed[0] || iter.key[1] != indexed[1]) break;
+ keys[j++] = createStringObject((char*)iter.key+2,iter.key_len-2);
}
+ raxStop(&iter);
return j;
}
/* Remove all the keys in the specified hash slot.
* The number of removed items is returned. */
unsigned int delKeysInSlot(unsigned int hashslot) {
- zskiplistNode *n;
- zrangespec range;
+ raxIterator iter;
int j = 0;
+ unsigned char indexed[2];
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
+ indexed[0] = (hashslot >> 8) & 0xff;
+ indexed[1] = hashslot & 0xff;
+ raxStart(&iter,server.cluster->slots_to_keys);
+ while(server.cluster->slots_keys_count[hashslot]) {
+ raxSeek(&iter,">=",indexed,2);
+ raxNext(&iter);
- n = zslFirstInRange(server.cluster->slots_to_keys, &range);
- while(n && n->score == hashslot) {
- robj *key = n->obj;
- n = n->level[0].forward; /* Go to the next item before freeing it. */
- incrRefCount(key); /* Protect the object while freeing it. */
+ robj *key = createStringObject((char*)iter.key+2,iter.key_len-2);
dbDelete(&server.db[0],key);
decrRefCount(key);
j++;
}
+ raxStop(&iter);
return j;
}
unsigned int countKeysInSlot(unsigned int hashslot) {
- zskiplist *zsl = server.cluster->slots_to_keys;
- zskiplistNode *zn;
- zrangespec range;
- int rank, count = 0;
-
- range.min = range.max = hashslot;
- range.minex = range.maxex = 0;
-
- /* Find first element in range */
- zn = zslFirstInRange(zsl, &range);
-
- /* Use rank of first element, if any, to determine preliminary count */
- if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
- count = (zsl->length - (rank - 1));
-
- /* Find last element in range */
- zn = zslLastInRange(zsl, &range);
-
- /* Use rank of last element, if any, to determine the actual count */
- if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
- count -= (zsl->length - rank);
- }
- }
- return count;
+ return server.cluster->slots_keys_count[hashslot];
}
diff --git a/src/debug.c b/src/debug.c
index 7783196a0..a66390dbb 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -27,18 +27,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
#include "crc64.h"
#include <arpa/inet.h>
#include <signal.h>
+#include <dlfcn.h>
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#include <ucontext.h>
#include <fcntl.h>
#include "bio.h"
+#include <unistd.h>
#endif /* HAVE_BACKTRACE */
#ifdef __CYGWIN__
@@ -124,7 +126,7 @@ void computeDatasetDigest(unsigned char *final) {
redisDb *db = server.db+j;
if (dictSize(db->dict) == 0) continue;
- di = dictGetIterator(db->dict);
+ di = dictGetSafeIterator(db->dict);
/* hash the DB id, so the same dataset moved in a different
* DB will lead to a different digest */
@@ -150,10 +152,10 @@ void computeDatasetDigest(unsigned char *final) {
expiretime = getExpire(db,keyobj);
/* Save the key and associated value */
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
mixObjectDigest(digest,o);
- } else if (o->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(o,0,REDIS_TAIL);
+ } else if (o->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(o,0,LIST_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
robj *eleobj = listTypeGet(&entry);
@@ -161,18 +163,18 @@ void computeDatasetDigest(unsigned char *final) {
decrRefCount(eleobj);
}
listTypeReleaseIterator(li);
- } else if (o->type == REDIS_SET) {
+ } else if (o->type == OBJ_SET) {
setTypeIterator *si = setTypeInitIterator(o);
- robj *ele;
- while((ele = setTypeNextObject(si)) != NULL) {
- xorObjectDigest(digest,ele);
- decrRefCount(ele);
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ xorDigest(digest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
}
setTypeReleaseIterator(si);
- } else if (o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_ZSET) {
unsigned char eledigest[20];
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = o->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -181,12 +183,12 @@ void computeDatasetDigest(unsigned char *final) {
double score;
eptr = ziplistIndex(zl,0);
- redisAssert(eptr != NULL);
+ serverAssert(eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
while (eptr != NULL) {
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vll));
score = zzlGetScore(sptr);
memset(eledigest,0,20);
@@ -202,45 +204,73 @@ void computeDatasetDigest(unsigned char *final) {
xorDigest(digest,eledigest,20);
zzlNext(zl,&eptr,&sptr);
}
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
dictIterator *di = dictGetIterator(zs->dict);
dictEntry *de;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
+ sds sdsele = dictGetKey(de);
double *score = dictGetVal(de);
snprintf(buf,sizeof(buf),"%.17g",*score);
memset(eledigest,0,20);
- mixObjectDigest(eledigest,eleobj);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
mixDigest(eledigest,buf,strlen(buf));
xorDigest(digest,eledigest,20);
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
- } else if (o->type == REDIS_HASH) {
- hashTypeIterator *hi;
- robj *obj;
-
- hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
+ } else if (o->type == OBJ_HASH) {
+ hashTypeIterator *hi = hashTypeInitIterator(o);
+ while (hashTypeNext(hi) != C_ERR) {
unsigned char eledigest[20];
+ sds sdsele;
memset(eledigest,0,20);
- obj = hashTypeCurrentObject(hi,REDIS_HASH_KEY);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
- obj = hashTypeCurrentObject(hi,REDIS_HASH_VALUE);
- mixObjectDigest(eledigest,obj);
- decrRefCount(obj);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
+ sdsele = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ mixDigest(eledigest,sdsele,sdslen(sdsele));
+ sdsfree(sdsele);
xorDigest(digest,eledigest,20);
}
hashTypeReleaseIterator(hi);
+ } else if (o->type == OBJ_STREAM) {
+ streamIterator si;
+ streamIteratorStart(&si,o->ptr,NULL,NULL,0);
+ streamID id;
+ int64_t numfields;
+
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ sds itemid = sdscatfmt(sdsempty(),"%U.%U",id.ms,id.seq);
+ mixDigest(digest,itemid,sdslen(itemid));
+ sdsfree(itemid);
+
+ while(numfields--) {
+ unsigned char *field, *value;
+ int64_t field_len, value_len;
+ streamIteratorGetField(&si,&field,&value,
+ &field_len,&value_len);
+ mixDigest(digest,field,field_len);
+ mixDigest(digest,value,value_len);
+ }
+ }
+ streamIteratorStop(&si);
+ } else if (o->type == OBJ_MODULE) {
+ RedisModuleDigest md;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitDigestContext(md);
+ if (mt->digest) {
+ mt->digest(&md,mv->value);
+ xorDigest(digest,md.x,sizeof(md.x));
+ }
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
/* If the key has an expire, add it to the mix */
if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
@@ -252,42 +282,84 @@ void computeDatasetDigest(unsigned char *final) {
}
}
-void inputCatSds(void *result, const char *str) {
- /* result is actually a (sds *), so re-cast it here */
- sds *info = (sds *)result;
- *info = sdscat(*info, str);
-}
-
-void debugCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
+void debugCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"ASSERT -- Crash by assertion failed.",
+"CHANGE-REPL-ID -- Change the replication IDs of the instance. Dangerous, should be used only for testing the replication subsystem.",
+"CRASH-AND-RECOVER <milliseconds> -- Hard crash and restart after <milliseconds> delay.",
+"DIGEST -- Output a hex signature representing the current DB content.",
+"ERROR <string> -- Return a Redis protocol error with <string> as message. Useful for clients unit tests to simulate Redis errors.",
+"LOG <message> -- write message to the server log.",
+"HTSTATS <dbid> -- Return hash table statistics of the specified Redis database.",
+"HTSTATS-KEY <key> -- Like htstats but for the hash table stored as key's value.",
+"LOADAOF -- Flush the AOF buffers on disk and reload the AOF in memory.",
+"LUA-ALWAYS-REPLICATE-COMMANDS <0|1> -- Setting it to 1 makes Lua replication defaulting to replicating single commands, without the script having to enable effects replication.",
+"OBJECT <key> -- Show low level info about key and associated value.",
+"PANIC -- Crash the server simulating a panic.",
+"POPULATE <count> [prefix] [size] -- Create <count> string keys named key:<num>. If a prefix is specified is used instead of the 'key' prefix.",
+"RELOAD -- Save the RDB on disk and reload it back in memory.",
+"RESTART -- Graceful restart: save config, db, restart.",
+"SDSLEN <key> -- Show low level SDS string info representing key and value.",
+"SEGFAULT -- Crash the server with sigsegv.",
+"SET-ACTIVE-EXPIRE <0|1> -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
+"SLEEP <seconds> -- Stop the server for <seconds>. Decimals allowed.",
+"STRUCTSIZE -- Return the size of different Redis core C structures.",
+"ZIPLIST <key> -- Show low level info about the ziplist encoding.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
*((char*)-1) = 'x';
+ } else if (!strcasecmp(c->argv[1]->ptr,"panic")) {
+ serverPanic("DEBUG PANIC called at Unix time %ld", time(NULL));
+ } else if (!strcasecmp(c->argv[1]->ptr,"restart") ||
+ !strcasecmp(c->argv[1]->ptr,"crash-and-recover"))
+ {
+ long long delay = 0;
+ if (c->argc >= 3) {
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &delay, NULL)
+ != C_OK) return;
+ if (delay < 0) delay = 0;
+ }
+ int flags = !strcasecmp(c->argv[1]->ptr,"restart") ?
+ (RESTART_SERVER_GRACEFULLY|RESTART_SERVER_CONFIG_REWRITE) :
+ RESTART_SERVER_NONE;
+ restartServer(flags,delay);
+ addReplyError(c,"failed to restart the server. Check server logs.");
} else if (!strcasecmp(c->argv[1]->ptr,"oom")) {
void *ptr = zmalloc(ULONG_MAX); /* Should trigger an out of memory. */
zfree(ptr);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"assert")) {
if (c->argc >= 3) c->argv[2] = tryObjectEncoding(c->argv[2]);
- redisAssertWithInfo(c,c->argv[0],1 == 2);
+ serverAssertWithInfo(c,c->argv[0],1 == 2);
+ } else if (!strcasecmp(c->argv[1]->ptr,"log") && c->argc == 3) {
+ serverLog(LL_WARNING, "DEBUG LOG: %s", (char*)c->argv[2]->ptr);
+ addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
- if (rdbSave(server.rdb_filename) != REDIS_OK) {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(server.rdb_filename,rsiptr) != C_OK) {
addReply(c,shared.err);
return;
}
- emptyDb(NULL);
- if (rdbLoad(server.rdb_filename) != REDIS_OK) {
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
+ if (rdbLoad(server.rdb_filename,NULL) != C_OK) {
addReplyError(c,"Error trying to load the RDB dump");
return;
}
- redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
+ serverLog(LL_WARNING,"DB reloaded by DEBUG RELOAD");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
- emptyDb(NULL);
- if (loadAppendOnlyFile(server.aof_filename) != REDIS_OK) {
+ if (server.aof_state != AOF_OFF) flushAppendOnlyFile(1);
+ emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);
+ if (loadAppendOnlyFile(server.aof_filename) != C_OK) {
addReply(c,shared.err);
return;
}
server.dirty = 0; /* Prevent AOF / replication */
- redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
+ serverLog(LL_WARNING,"Append Only File loaded by DEBUG LOADAOF");
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
dictEntry *de;
@@ -301,13 +373,13 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
strenc = strEncoding(val->encoding);
- char extra[128] = {0};
- if (val->encoding == REDIS_ENCODING_QUICKLIST) {
+ char extra[138] = {0};
+ if (val->encoding == OBJ_ENCODING_QUICKLIST) {
char *nextra = extra;
int remaining = sizeof(extra);
quicklist *ql = val->ptr;
/* Add number of quicklist nodes */
- int used = snprintf(nextra, remaining, " ql_nodes:%u", ql->len);
+ int used = snprintf(nextra, remaining, " ql_nodes:%lu", ql->len);
nextra += used;
remaining -= used;
/* Add average quicklist fill factor */
@@ -336,10 +408,10 @@ void debugCommand(redisClient *c) {
addReplyStatusFormat(c,
"Value at:%p refcount:%d "
- "encoding:%s serializedlength:%lld "
+ "encoding:%s serializedlength:%zu "
"lru:%d lru_seconds_idle:%llu%s",
(void*)val, val->refcount,
- strenc, (long long) rdbSavedObjectLen(val),
+ strenc, rdbSavedObjectLen(val),
val->lru, estimateObjectIdleTime(val)/1000, extra);
} else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
dictEntry *de;
@@ -353,37 +425,62 @@ void debugCommand(redisClient *c) {
val = dictGetVal(de);
key = dictGetKey(de);
- if (val->type != REDIS_STRING || !sdsEncodedObject(val)) {
+ if (val->type != OBJ_STRING || !sdsEncodedObject(val)) {
addReplyError(c,"Not an sds encoded string.");
} else {
addReplyStatusFormat(c,
- "key_sds_len:%lld, key_sds_avail:%lld, "
- "val_sds_len:%lld, val_sds_avail:%lld",
+ "key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, "
+ "val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld",
(long long) sdslen(key),
(long long) sdsavail(key),
+ (long long) sdsZmallocSize(key),
(long long) sdslen(val->ptr),
- (long long) sdsavail(val->ptr));
+ (long long) sdsavail(val->ptr),
+ (long long) getStringObjectSdsUsedMemory(val));
+ }
+ } else if (!strcasecmp(c->argv[1]->ptr,"ziplist") && c->argc == 3) {
+ robj *o;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ if (o->encoding != OBJ_ENCODING_ZIPLIST) {
+ addReplyError(c,"Not an sds encoded string.");
+ } else {
+ ziplistRepr(o->ptr);
+ addReplyStatus(c,"Ziplist structure printed on stdout");
}
} else if (!strcasecmp(c->argv[1]->ptr,"populate") &&
- (c->argc == 3 || c->argc == 4)) {
+ c->argc >= 3 && c->argc <= 5) {
long keys, j;
robj *key, *val;
char buf[128];
- if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != C_OK)
return;
dictExpand(c->db->dict,keys);
for (j = 0; j < keys; j++) {
+ long valsize = 0;
snprintf(buf,sizeof(buf),"%s:%lu",
(c->argc == 3) ? "key" : (char*)c->argv[3]->ptr, j);
key = createStringObject(buf,strlen(buf));
+ if (c->argc == 5)
+ if (getLongFromObjectOrReply(c, c->argv[4], &valsize, NULL) != C_OK)
+ return;
if (lookupKeyWrite(c->db,key) != NULL) {
decrRefCount(key);
continue;
}
snprintf(buf,sizeof(buf),"value:%lu",j);
- val = createStringObject(buf,strlen(buf));
+ if (valsize==0)
+ val = createStringObject(buf,strlen(buf));
+ else {
+ int buflen = strlen(buf);
+ val = createStringObject(NULL,valsize);
+ memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen);
+ }
dbAdd(c->db,key,val);
+ signalModifiedKey(c->db,key);
decrRefCount(key);
}
addReply(c,shared.ok);
@@ -411,6 +508,11 @@ void debugCommand(redisClient *c) {
{
server.active_expire_enabled = atoi(c->argv[2]->ptr);
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"lua-always-replicate-commands") &&
+ c->argc == 3)
+ {
+ server.lua_always_replicate_commands = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"error") && c->argc == 3) {
sds errstr = sdsnewlen("-",1);
@@ -418,115 +520,174 @@ void debugCommand(redisClient *c) {
errstr = sdsmapchars(errstr,"\n\r"," ",2); /* no newlines in errors. */
errstr = sdscatlen(errstr,"\r\n",2);
addReplySds(c,errstr);
- } else if (!strcasecmp(c->argv[1]->ptr,"jemalloc") && c->argc == 3) {
-#if defined(USE_JEMALLOC)
- if (!strcasecmp(c->argv[2]->ptr, "info")) {
- sds info = sdsempty();
- je_malloc_stats_print(inputCatSds, &info, NULL);
- addReplyBulkSds(c, info);
+ } else if (!strcasecmp(c->argv[1]->ptr,"structsize") && c->argc == 2) {
+ sds sizes = sdsempty();
+ sizes = sdscatprintf(sizes,"bits:%d ",(sizeof(void*) == 8)?64:32);
+ sizes = sdscatprintf(sizes,"robj:%d ",(int)sizeof(robj));
+ sizes = sdscatprintf(sizes,"dictentry:%d ",(int)sizeof(dictEntry));
+ sizes = sdscatprintf(sizes,"sdshdr5:%d ",(int)sizeof(struct sdshdr5));
+ sizes = sdscatprintf(sizes,"sdshdr8:%d ",(int)sizeof(struct sdshdr8));
+ sizes = sdscatprintf(sizes,"sdshdr16:%d ",(int)sizeof(struct sdshdr16));
+ sizes = sdscatprintf(sizes,"sdshdr32:%d ",(int)sizeof(struct sdshdr32));
+ sizes = sdscatprintf(sizes,"sdshdr64:%d ",(int)sizeof(struct sdshdr64));
+ addReplyBulkSds(c,sizes);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats") && c->argc == 3) {
+ long dbid;
+ sds stats = sdsempty();
+ char buf[4096];
+
+ if (getLongFromObjectOrReply(c, c->argv[2], &dbid, NULL) != C_OK)
+ return;
+ if (dbid < 0 || dbid >= server.dbnum) {
+ addReplyError(c,"Out of range database");
+ return;
+ }
+
+ stats = sdscatprintf(stats,"[Dictionary HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].dict);
+ stats = sdscat(stats,buf);
+
+ stats = sdscatprintf(stats,"[Expires HT]\n");
+ dictGetStats(buf,sizeof(buf),server.db[dbid].expires);
+ stats = sdscat(stats,buf);
+
+ addReplyBulkSds(c,stats);
+ } else if (!strcasecmp(c->argv[1]->ptr,"htstats-key") && c->argc == 3) {
+ robj *o;
+ dict *ht = NULL;
+
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
+ == NULL) return;
+
+ /* Get the hash table reference from the object, if possible. */
+ switch (o->encoding) {
+ case OBJ_ENCODING_SKIPLIST:
+ {
+ zset *zs = o->ptr;
+ ht = zs->dict;
+ }
+ break;
+ case OBJ_ENCODING_HT:
+ ht = o->ptr;
+ break;
+ }
+
+ if (ht == NULL) {
+ addReplyError(c,"The value stored at the specified key is not "
+ "represented using an hash table");
} else {
- addReplyErrorFormat(c, "Valid jemalloc debug fields: info");
+ char buf[4096];
+ dictGetStats(buf,sizeof(buf),ht);
+ addReplyBulkCString(c,buf);
}
-#else
- addReplyErrorFormat(c, "jemalloc support not available");
-#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"change-repl-id") && c->argc == 2) {
+ serverLog(LL_WARNING,"Changing replication IDs after receiving DEBUG change-repl-id");
+ changeReplicationId();
+ clearReplicationId2();
+ addReply(c,shared.ok);
} else {
- addReplyErrorFormat(c, "Unknown DEBUG subcommand or wrong number of arguments for '%s'",
- (char*)c->argv[1]->ptr);
+ addReplySubcommandSyntaxError(c);
+ return;
}
}
/* =========================== Crash handling ============================== */
-void _redisAssert(char *estr, char *file, int line) {
+void _serverAssert(const char *estr, const char *file, int line) {
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
- redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED ===");
+ serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
#ifdef HAVE_BACKTRACE
server.assert_failed = estr;
server.assert_file = file;
server.assert_line = line;
- redisLog(REDIS_WARNING,"(forcing SIGSEGV to print the bug report.)");
+ serverLog(LL_WARNING,"(forcing SIGSEGV to print the bug report.)");
#endif
*((char*)-1) = 'x';
}
-void _redisAssertPrintClientInfo(redisClient *c) {
+void _serverAssertPrintClientInfo(const client *c) {
int j;
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
- redisLog(REDIS_WARNING,"client->flags = %d", c->flags);
- redisLog(REDIS_WARNING,"client->fd = %d", c->fd);
- redisLog(REDIS_WARNING,"client->argc = %d", c->argc);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
+ serverLog(LL_WARNING,"client->flags = %d", c->flags);
+ serverLog(LL_WARNING,"client->fd = %d", c->fd);
+ serverLog(LL_WARNING,"client->argc = %d", c->argc);
for (j=0; j < c->argc; j++) {
char buf[128];
char *arg;
- if (c->argv[j]->type == REDIS_STRING && sdsEncodedObject(c->argv[j])) {
+ if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) {
arg = (char*) c->argv[j]->ptr;
} else {
- snprintf(buf,sizeof(buf),"Object type: %d, encoding: %d",
+ snprintf(buf,sizeof(buf),"Object type: %u, encoding: %u",
c->argv[j]->type, c->argv[j]->encoding);
arg = buf;
}
- redisLog(REDIS_WARNING,"client->argv[%d] = \"%s\" (refcount: %d)",
+ serverLog(LL_WARNING,"client->argv[%d] = \"%s\" (refcount: %d)",
j, arg, c->argv[j]->refcount);
}
}
-void redisLogObjectDebugInfo(robj *o) {
- redisLog(REDIS_WARNING,"Object type: %d", o->type);
- redisLog(REDIS_WARNING,"Object encoding: %d", o->encoding);
- redisLog(REDIS_WARNING,"Object refcount: %d", o->refcount);
- if (o->type == REDIS_STRING && sdsEncodedObject(o)) {
- redisLog(REDIS_WARNING,"Object raw string len: %zu", sdslen(o->ptr));
+void serverLogObjectDebugInfo(const robj *o) {
+ serverLog(LL_WARNING,"Object type: %d", o->type);
+ serverLog(LL_WARNING,"Object encoding: %d", o->encoding);
+ serverLog(LL_WARNING,"Object refcount: %d", o->refcount);
+ if (o->type == OBJ_STRING && sdsEncodedObject(o)) {
+ serverLog(LL_WARNING,"Object raw string len: %zu", sdslen(o->ptr));
if (sdslen(o->ptr) < 4096) {
sds repr = sdscatrepr(sdsempty(),o->ptr,sdslen(o->ptr));
- redisLog(REDIS_WARNING,"Object raw string content: %s", repr);
+ serverLog(LL_WARNING,"Object raw string content: %s", repr);
sdsfree(repr);
}
- } else if (o->type == REDIS_LIST) {
- redisLog(REDIS_WARNING,"List length: %d", (int) listTypeLength(o));
- } else if (o->type == REDIS_SET) {
- redisLog(REDIS_WARNING,"Set size: %d", (int) setTypeSize(o));
- } else if (o->type == REDIS_HASH) {
- redisLog(REDIS_WARNING,"Hash size: %d", (int) hashTypeLength(o));
- } else if (o->type == REDIS_ZSET) {
- redisLog(REDIS_WARNING,"Sorted set size: %d", (int) zsetLength(o));
- if (o->encoding == REDIS_ENCODING_SKIPLIST)
- redisLog(REDIS_WARNING,"Skiplist level: %d", (int) ((zset*)o->ptr)->zsl->level);
+ } else if (o->type == OBJ_LIST) {
+ serverLog(LL_WARNING,"List length: %d", (int) listTypeLength(o));
+ } else if (o->type == OBJ_SET) {
+ serverLog(LL_WARNING,"Set size: %d", (int) setTypeSize(o));
+ } else if (o->type == OBJ_HASH) {
+ serverLog(LL_WARNING,"Hash size: %d", (int) hashTypeLength(o));
+ } else if (o->type == OBJ_ZSET) {
+ serverLog(LL_WARNING,"Sorted set size: %d", (int) zsetLength(o));
+ if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level);
}
}
-void _redisAssertPrintObject(robj *o) {
+void _serverAssertPrintObject(const robj *o) {
bugReportStart();
- redisLog(REDIS_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ===");
- redisLogObjectDebugInfo(o);
+ serverLog(LL_WARNING,"=== ASSERTION FAILED OBJECT CONTEXT ===");
+ serverLogObjectDebugInfo(o);
}
-void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line) {
- if (c) _redisAssertPrintClientInfo(c);
- if (o) _redisAssertPrintObject(o);
- _redisAssert(estr,file,line);
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line) {
+ if (c) _serverAssertPrintClientInfo(c);
+ if (o) _serverAssertPrintObject(o);
+ _serverAssert(estr,file,line);
}
-void _redisPanic(char *msg, char *file, int line) {
+void _serverPanic(const char *file, int line, const char *msg, ...) {
+ va_list ap;
+ va_start(ap,msg);
+ char fmtmsg[256];
+ vsnprintf(fmtmsg,sizeof(fmtmsg),msg,ap);
+ va_end(ap);
+
bugReportStart();
- redisLog(REDIS_WARNING,"------------------------------------------------");
- redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
- redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
+ serverLog(LL_WARNING,"------------------------------------------------");
+ serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue");
+ serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line);
#ifdef HAVE_BACKTRACE
- redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
+ serverLog(LL_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
#endif
- redisLog(REDIS_WARNING,"------------------------------------------------");
+ serverLog(LL_WARNING,"------------------------------------------------");
*((char*)-1) = 'x';
}
void bugReportStart(void) {
if (server.bug_report_start == 0) {
- redisLog(REDIS_WARNING,
- "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n");
server.bug_report_start = 1;
}
}
@@ -557,6 +718,10 @@ static void *getMcontextEip(ucontext_t *uc) {
return (void*) uc->uc_mcontext.gregs[16]; /* Linux 64 */
#elif defined(__ia64__) /* Linux IA64 */
return (void*) uc->uc_mcontext.sc_ip;
+ #elif defined(__arm__) /* Linux ARM */
+ return (void*) uc->uc_mcontext.arm_pc;
+ #elif defined(__aarch64__) /* Linux AArch64 */
+ return (void*) uc->uc_mcontext.pc;
#endif
#else
return NULL;
@@ -570,20 +735,20 @@ void logStackContent(void **sp) {
unsigned long val = (unsigned long) sp[i];
if (sizeof(long) == 4)
- redisLog(REDIS_WARNING, "(%08lx) -> %08lx", addr, val);
+ serverLog(LL_WARNING, "(%08lx) -> %08lx", addr, val);
else
- redisLog(REDIS_WARNING, "(%016lx) -> %016lx", addr, val);
+ serverLog(LL_WARNING, "(%016lx) -> %016lx", addr, val);
}
}
void logRegisters(ucontext_t *uc) {
- redisLog(REDIS_WARNING, "--- REGISTERS");
+ serverLog(LL_WARNING|LL_RAW, "\n------ REGISTERS ------\n");
/* OSX */
#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
/* OSX AMD64 */
#if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
"RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
@@ -615,7 +780,7 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext->__ss.__rsp);
#else
/* OSX x86 */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
"EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
@@ -644,7 +809,7 @@ void logRegisters(ucontext_t *uc) {
#elif defined(__linux__)
/* Linux x86 */
#if defined(__i386__)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"
"EDI:%08lx ESI:%08lx EBP:%08lx ESP:%08lx\n"
@@ -670,7 +835,7 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext.gregs[7]);
#elif defined(__X86_64__) || defined(__x86_64__)
/* Linux AMD64 */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"\n"
"RAX:%016lx RBX:%016lx\nRCX:%016lx RDX:%016lx\n"
"RDI:%016lx RSI:%016lx\nRBP:%016lx RSP:%016lx\n"
@@ -700,36 +865,56 @@ void logRegisters(ucontext_t *uc) {
logStackContent((void**)uc->uc_mcontext.gregs[15]);
#endif
#else
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
" Dumping of registers not supported for this OS/arch");
#endif
}
+/* Return a file descriptor to write directly to the Redis log with the
+ * write(2) syscall, that can be used in critical sections of the code
+ * where the rest of Redis can't be trusted (for example during the memory
+ * test) or when an API call requires a raw fd.
+ *
+ * Close it with closeDirectLogFiledes(). */
+int openDirectLogFiledes(void) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ int fd = log_to_stdout ?
+ STDOUT_FILENO :
+ open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
+ return fd;
+}
+
+/* Used to close what closeDirectLogFiledes() returns. */
+void closeDirectLogFiledes(int fd) {
+ int log_to_stdout = server.logfile[0] == '\0';
+ if (!log_to_stdout) close(fd);
+}
+
/* Logs the stack trace using the backtrace() call. This function is designed
* to be called from signal handlers safely. */
void logStackTrace(ucontext_t *uc) {
- void *trace[100];
- int trace_size = 0, fd;
- int log_to_stdout = server.logfile[0] == '\0';
+ void *trace[101];
+ int trace_size = 0, fd = openDirectLogFiledes();
- /* Open the log file in append mode. */
- fd = log_to_stdout ?
- STDOUT_FILENO :
- open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
- if (fd == -1) return;
+ if (fd == -1) return; /* If we can't log there is anything to do. */
/* Generate the stack trace */
- trace_size = backtrace(trace, 100);
-
- /* overwrite sigaction with caller's address */
- if (getMcontextEip(uc) != NULL)
- trace[1] = getMcontextEip(uc);
+ trace_size = backtrace(trace+1, 100);
+
+ if (getMcontextEip(uc) != NULL) {
+ char *msg1 = "EIP:\n";
+ char *msg2 = "\nBacktrace:\n";
+ if (write(fd,msg1,strlen(msg1)) == -1) {/* Avoid warning. */};
+ trace[0] = getMcontextEip(uc);
+ backtrace_symbols_fd(trace, 1, fd);
+ if (write(fd,msg2,strlen(msg2)) == -1) {/* Avoid warning. */};
+ }
/* Write symbols to log file */
- backtrace_symbols_fd(trace, trace_size, fd);
+ backtrace_symbols_fd(trace+1, trace_size, fd);
/* Cleanup */
- if (!log_to_stdout) close(fd);
+ closeDirectLogFiledes(fd);
}
/* Log information about the "current" client, that is, the client that is
@@ -738,19 +923,20 @@ void logStackTrace(ucontext_t *uc) {
void logCurrentClient(void) {
if (server.current_client == NULL) return;
- redisClient *cc = server.current_client;
+ client *cc = server.current_client;
sds client;
int j;
- redisLog(REDIS_WARNING, "--- CURRENT CLIENT INFO");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CURRENT CLIENT INFO ------\n");
client = catClientInfoString(sdsempty(),cc);
- redisLog(REDIS_WARNING,"client: %s", client);
+ serverLog(LL_WARNING|LL_RAW,"%s\n", client);
sdsfree(client);
for (j = 0; j < cc->argc; j++) {
robj *decoded;
decoded = getDecodedObject(cc->argv[j]);
- redisLog(REDIS_WARNING,"argv[%d]: '%s'", j, (char*)decoded->ptr);
+ serverLog(LL_WARNING|LL_RAW,"argv[%d]: '%s'\n", j,
+ (char*)decoded->ptr);
decrRefCount(decoded);
}
/* Check if the first argument, usually a key, is found inside the
@@ -763,27 +949,32 @@ void logCurrentClient(void) {
de = dictFind(cc->db->dict, key->ptr);
if (de) {
val = dictGetVal(de);
- redisLog(REDIS_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
- redisLogObjectDebugInfo(val);
+ serverLog(LL_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
+ serverLogObjectDebugInfo(val);
}
decrRefCount(key);
}
}
#if defined(HAVE_PROC_MAPS)
-void memtest_non_destructive_invert(void *addr, size_t size);
-void memtest_non_destructive_swap(void *addr, size_t size);
+
#define MEMTEST_MAX_REGIONS 128
+/* A non destructive memory test executed during segfauls. */
int memtest_test_linux_anonymous_maps(void) {
- FILE *fp = fopen("/proc/self/maps","r");
+ FILE *fp;
char line[1024];
+ char logbuf[1024];
size_t start_addr, end_addr, size;
size_t start_vect[MEMTEST_MAX_REGIONS];
size_t size_vect[MEMTEST_MAX_REGIONS];
int regions = 0, j;
- uint64_t crc1 = 0, crc2 = 0, crc3 = 0;
+ int fd = openDirectLogFiledes();
+ if (!fd) return 0;
+
+ fp = fopen("/proc/self/maps","r");
+ if (!fp) return 0;
while(fgets(line,sizeof(line),fp) != NULL) {
char *start, *end, *p = line;
@@ -807,78 +998,90 @@ int memtest_test_linux_anonymous_maps(void) {
start_vect[regions] = start_addr;
size_vect[regions] = size;
- printf("Testing %lx %lu\n", (unsigned long) start_vect[regions],
- (unsigned long) size_vect[regions]);
+ snprintf(logbuf,sizeof(logbuf),
+ "*** Preparing to test memory region %lx (%lu bytes)\n",
+ (unsigned long) start_vect[regions],
+ (unsigned long) size_vect[regions]);
+ if (write(fd,logbuf,strlen(logbuf)) == -1) { /* Nothing to do. */ }
regions++;
}
- /* Test all the regions as an unique sequential region.
- * 1) Take the CRC64 of the memory region. */
+ int errors = 0;
for (j = 0; j < regions; j++) {
- crc1 = crc64(crc1,(void*)start_vect[j],size_vect[j]);
+ if (write(fd,".",1) == -1) { /* Nothing to do. */ }
+ errors += memtest_preserving_test((void*)start_vect[j],size_vect[j],1);
+ if (write(fd, errors ? "E" : "O",1) == -1) { /* Nothing to do. */ }
}
-
- /* 2) Invert bits, swap adjacent words, swap again, invert bits.
- * This is the error amplification step. */
- for (j = 0; j < regions; j++)
- memtest_non_destructive_invert((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_invert((void*)start_vect[j],size_vect[j]);
-
- /* 3) Take the CRC64 sum again. */
- for (j = 0; j < regions; j++)
- crc2 = crc64(crc2,(void*)start_vect[j],size_vect[j]);
-
- /* 4) Swap + Swap again */
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
- for (j = 0; j < regions; j++)
- memtest_non_destructive_swap((void*)start_vect[j],size_vect[j]);
-
- /* 5) Take the CRC64 sum again. */
- for (j = 0; j < regions; j++)
- crc3 = crc64(crc3,(void*)start_vect[j],size_vect[j]);
+ if (write(fd,"\n",1) == -1) { /* Nothing to do. */ }
/* NOTE: It is very important to close the file descriptor only now
* because closing it before may result into unmapping of some memory
* region that we are testing. */
fclose(fp);
-
- /* If the two CRC are not the same, we trapped a memory error. */
- return crc1 != crc2 || crc2 != crc3;
+ closeDirectLogFiledes(fd);
+ return errors;
}
#endif
+/* Scans the (assumed) x86 code starting at addr, for a max of `len`
+ * bytes, searching for E8 (callq) opcodes, and dumping the symbols
+ * and the call offset if they appear to be valid. */
+void dumpX86Calls(void *addr, size_t len) {
+ size_t j;
+ unsigned char *p = addr;
+ Dl_info info;
+ /* Hash table to best-effort avoid printing the same symbol
+ * multiple times. */
+ unsigned long ht[256] = {0};
+
+ if (len < 5) return;
+ for (j = 0; j < len-4; j++) {
+ if (p[j] != 0xE8) continue; /* Not an E8 CALL opcode. */
+ unsigned long target = (unsigned long)addr+j+5;
+ target += *((int32_t*)(p+j+1));
+ if (dladdr((void*)target, &info) != 0 && info.dli_sname != NULL) {
+ if (ht[target&0xff] != target) {
+ printf("Function at 0x%lx is %s\n",target,info.dli_sname);
+ ht[target&0xff] = target;
+ }
+ j += 4; /* Skip the 32 bit immediate. */
+ }
+ }
+}
+
void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
ucontext_t *uc = (ucontext_t*) secret;
+ void *eip = getMcontextEip(uc);
sds infostring, clients;
struct sigaction act;
- REDIS_NOTUSED(info);
+ UNUSED(info);
bugReportStart();
- redisLog(REDIS_WARNING,
- " Redis %s crashed by signal: %d", REDIS_VERSION, sig);
- redisLog(REDIS_WARNING,
- " Failed assertion: %s (%s:%d)", server.assert_failed,
+ serverLog(LL_WARNING,
+ "Redis %s crashed by signal: %d", REDIS_VERSION, sig);
+ if (eip != NULL) {
+ serverLog(LL_WARNING,
+ "Crashed running the instruction at: %p", eip);
+ }
+ if (sig == SIGSEGV || sig == SIGBUS) {
+ serverLog(LL_WARNING,
+ "Accessing address: %p", (void*)info->si_addr);
+ }
+ serverLog(LL_WARNING,
+ "Failed assertion: %s (%s:%d)", server.assert_failed,
server.assert_file, server.assert_line);
/* Log the stack trace */
- redisLog(REDIS_WARNING, "--- STACK TRACE");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ STACK TRACE ------\n");
logStackTrace(uc);
/* Log INFO and CLIENT LIST */
- redisLog(REDIS_WARNING, "--- INFO OUTPUT");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n");
infostring = genRedisInfoString("all");
- infostring = sdscatprintf(infostring, "hash_init_value: %u\n",
- dictGetHashFunctionSeed());
- redisLogRaw(REDIS_WARNING, infostring);
- redisLog(REDIS_WARNING, "--- CLIENT LIST OUTPUT");
- clients = getAllClientsInfoString();
- redisLogRaw(REDIS_WARNING, clients);
+ serverLogRaw(LL_WARNING|LL_RAW, infostring);
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
+ clients = getAllClientsInfoString(-1);
+ serverLogRaw(LL_WARNING|LL_RAW, clients);
sdsfree(infostring);
sdsfree(clients);
@@ -890,23 +1093,53 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
#if defined(HAVE_PROC_MAPS)
/* Test memory */
- redisLog(REDIS_WARNING, "--- FAST MEMORY TEST");
+ serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n");
bioKillThreads();
if (memtest_test_linux_anonymous_maps()) {
- redisLog(REDIS_WARNING,
- "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n");
} else {
- redisLog(REDIS_WARNING,
- "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.");
+ serverLogRaw(LL_WARNING|LL_RAW,
+ "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n");
}
#endif
- redisLog(REDIS_WARNING,
+ if (eip != NULL) {
+ Dl_info info;
+ if (dladdr(eip, &info) != 0) {
+ serverLog(LL_WARNING|LL_RAW,
+ "\n------ DUMPING CODE AROUND EIP ------\n"
+ "Symbol: %s (base: %p)\n"
+ "Module: %s (base %p)\n"
+ "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n"
+ "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n"
+ "------\n",
+ info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase,
+ info.dli_saddr);
+ size_t len = (long)eip - (long)info.dli_saddr;
+ unsigned long sz = sysconf(_SC_PAGESIZE);
+ if (len < 1<<13) { /* we don't have functions over 8k (verified) */
+ /* Find the address of the next page, which is our "safety"
+ * limit when dumping. Then try to dump just 128 bytes more
+ * than EIP if there is room, or stop sooner. */
+ unsigned long next = ((unsigned long)eip + sz) & ~(sz-1);
+ unsigned long end = (unsigned long)eip + 128;
+ if (end > next) end = next;
+ len = end - (unsigned long)info.dli_saddr;
+ serverLogHexDump(LL_WARNING, "dump of function",
+ info.dli_saddr ,len);
+ dumpX86Calls(info.dli_saddr,len);
+ }
+ }
+ }
+
+ serverLogRaw(LL_WARNING|LL_RAW,
"\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n"
" Please report the crash by opening an issue on github:\n\n"
" http://github.com/antirez/redis/issues\n\n"
" Suspect RAM error? Use redis-server --test-memory to verify it.\n\n"
);
+
/* free(messages); Don't call free() with possibly corrupted memory. */
if (server.daemonize && server.supervised == 0) unlink(server.pidfile);
@@ -922,12 +1155,12 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
/* ==================== Logging functions for debugging ===================== */
-void redisLogHexDump(int level, char *descr, void *value, size_t len) {
+void serverLogHexDump(int level, char *descr, void *value, size_t len) {
char buf[65], *b;
unsigned char *v = value;
char charset[] = "0123456789abcdef";
- redisLog(level,"%s (hexdump):", descr);
+ serverLog(level,"%s (hexdump of %zu bytes):", descr, len);
b = buf;
while(len) {
b[0] = charset[(*v)>>4];
@@ -937,11 +1170,11 @@ void redisLogHexDump(int level, char *descr, void *value, size_t len) {
len--;
v++;
if (b-buf == 64 || len == 0) {
- redisLogRaw(level|REDIS_LOG_RAW,buf);
+ serverLogRaw(level|LL_RAW,buf);
b = buf;
}
}
- redisLogRaw(level|REDIS_LOG_RAW,"\n");
+ serverLogRaw(level|LL_RAW,"\n");
}
/* =========================== Software Watchdog ============================ */
@@ -951,16 +1184,16 @@ void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) {
#ifdef HAVE_BACKTRACE
ucontext_t *uc = (ucontext_t*) secret;
#endif
- REDIS_NOTUSED(info);
- REDIS_NOTUSED(sig);
+ UNUSED(info);
+ UNUSED(sig);
- redisLogFromHandler(REDIS_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
+ serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---");
#ifdef HAVE_BACKTRACE
logStackTrace(uc);
#else
- redisLogFromHandler(REDIS_WARNING,"Sorry: no support for backtrace().");
+ serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace().");
#endif
- redisLogFromHandler(REDIS_WARNING,"--------\n");
+ serverLogFromHandler(LL_WARNING,"--------\n");
}
/* Schedule a SIGALRM delivery after the specified period in milliseconds.
diff --git a/src/debugmacro.h b/src/debugmacro.h
new file mode 100644
index 000000000..ded2d2667
--- /dev/null
+++ b/src/debugmacro.h
@@ -0,0 +1,41 @@
+/* This file contains debugging macros to be used when investigating issues.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#define D(...) \
+ do { \
+ FILE *fp = fopen("/tmp/log.txt","a"); \
+ fprintf(fp,"%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
+ fprintf(fp,__VA_ARGS__); \
+ fprintf(fp,"\n"); \
+ fclose(fp); \
+ } while (0);
diff --git a/src/defrag.c b/src/defrag.c
new file mode 100644
index 000000000..d67b6e253
--- /dev/null
+++ b/src/defrag.c
@@ -0,0 +1,1140 @@
+/*
+ * Active memory defragmentation
+ * Try to find key / value allocations that need to be re-allocated in order
+ * to reduce external fragmentation.
+ * We do that by scanning the keyspace and for each pointer we have, we can try to
+ * ask the allocator if moving it to a new address will help reduce fragmentation.
+ *
+ * Copyright (c) 2017, Oran Agra
+ * Copyright (c) 2017, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include <time.h>
+#include <assert.h>
+#include <stddef.h>
+
+#ifdef HAVE_DEFRAG
+
+/* this method was added to jemalloc in order to help us understand which
+ * pointers are worthwhile moving and which aren't */
+int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util);
+
+/* forward declarations*/
+void defragDictBucketCallback(void *privdata, dictEntry **bucketref);
+dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged);
+
+/* Defrag helper for generic allocations.
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+void* activeDefragAlloc(void *ptr) {
+ int bin_util, run_util;
+ size_t size;
+ void *newptr;
+ if(!je_get_defrag_hint(ptr, &bin_util, &run_util)) {
+ server.stat_active_defrag_misses++;
+ return NULL;
+ }
+ /* if this run is more utilized than the average utilization in this bin
+ * (or it is full), skip it. This will eventually move all the allocations
+ * from relatively empty runs into relatively full runs. */
+ if (run_util > bin_util || run_util == 1<<16) {
+ server.stat_active_defrag_misses++;
+ return NULL;
+ }
+ /* move this allocation to a new allocation.
+ * make sure not to use the thread cache. so that we don't get back the same
+ * pointers we try to free */
+ size = zmalloc_size(ptr);
+ newptr = zmalloc_no_tcache(size);
+ memcpy(newptr, ptr, size);
+ zfree_no_tcache(ptr);
+ return newptr;
+}
+
+/*Defrag helper for sds strings
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+sds activeDefragSds(sds sdsptr) {
+ void* ptr = sdsAllocPtr(sdsptr);
+ void* newptr = activeDefragAlloc(ptr);
+ if (newptr) {
+ size_t offset = sdsptr - (char*)ptr;
+ sdsptr = (char*)newptr + offset;
+ return sdsptr;
+ }
+ return NULL;
+}
+
+/* Defrag helper for robj and/or string objects
+ *
+ * returns NULL in case the allocatoin wasn't moved.
+ * when it returns a non-null value, the old pointer was already released
+ * and should NOT be accessed. */
+robj *activeDefragStringOb(robj* ob, long *defragged) {
+ robj *ret = NULL;
+ if (ob->refcount!=1)
+ return NULL;
+
+ /* try to defrag robj (only if not an EMBSTR type (handled below). */
+ if (ob->type!=OBJ_STRING || ob->encoding!=OBJ_ENCODING_EMBSTR) {
+ if ((ret = activeDefragAlloc(ob))) {
+ ob = ret;
+ (*defragged)++;
+ }
+ }
+
+ /* try to defrag string object */
+ if (ob->type == OBJ_STRING) {
+ if(ob->encoding==OBJ_ENCODING_RAW) {
+ sds newsds = activeDefragSds((sds)ob->ptr);
+ if (newsds) {
+ ob->ptr = newsds;
+ (*defragged)++;
+ }
+ } else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
+ /* The sds is embedded in the object allocation, calculate the
+ * offset and update the pointer in the new allocation. */
+ long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
+ if ((ret = activeDefragAlloc(ob))) {
+ ret->ptr = (void*)((intptr_t)ret + ofs);
+ (*defragged)++;
+ }
+ } else if (ob->encoding!=OBJ_ENCODING_INT) {
+ serverPanic("Unknown string encoding");
+ }
+ }
+ return ret;
+}
+
+/* Defrag helper for dictEntries to be used during dict iteration (called on
+ * each step). Teturns a stat of how many pointers were moved. */
+long dictIterDefragEntry(dictIterator *iter) {
+ /* This function is a little bit dirty since it messes with the internals
+ * of the dict and it's iterator, but the benefit is that it is very easy
+ * to use, and require no other chagnes in the dict. */
+ long defragged = 0;
+ dictht *ht;
+ /* Handle the next entry (if there is one), and update the pointer in the
+ * current entry. */
+ if (iter->nextEntry) {
+ dictEntry *newde = activeDefragAlloc(iter->nextEntry);
+ if (newde) {
+ defragged++;
+ iter->nextEntry = newde;
+ iter->entry->next = newde;
+ }
+ }
+ /* handle the case of the first entry in the hash bucket. */
+ ht = &iter->d->ht[iter->table];
+ if (ht->table[iter->index] == iter->entry) {
+ dictEntry *newde = activeDefragAlloc(iter->entry);
+ if (newde) {
+ iter->entry = newde;
+ ht->table[iter->index] = newde;
+ defragged++;
+ }
+ }
+ return defragged;
+}
+
+/* Defrag helper for dict main allocations (dict struct, and hash tables).
+ * receives a pointer to the dict* and implicitly updates it when the dict
+ * struct itself was moved. Returns a stat of how many pointers were moved. */
+long dictDefragTables(dict* d) {
+ dictEntry **newtable;
+ long defragged = 0;
+ /* handle the first hash table */
+ newtable = activeDefragAlloc(d->ht[0].table);
+ if (newtable)
+ defragged++, d->ht[0].table = newtable;
+ /* handle the second hash table */
+ if (d->ht[1].table) {
+ newtable = activeDefragAlloc(d->ht[1].table);
+ if (newtable)
+ defragged++, d->ht[1].table = newtable;
+ }
+ return defragged;
+}
+
+/* Internal function used by zslDefrag */
+void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnode, zskiplistNode **update) {
+ int i;
+ for (i = 0; i < zsl->level; i++) {
+ if (update[i]->level[i].forward == oldnode)
+ update[i]->level[i].forward = newnode;
+ }
+ serverAssert(zsl->header!=oldnode);
+ if (newnode->level[0].forward) {
+ serverAssert(newnode->level[0].forward->backward==oldnode);
+ newnode->level[0].forward->backward = newnode;
+ } else {
+ serverAssert(zsl->tail==oldnode);
+ zsl->tail = newnode;
+ }
+}
+
+/* Defrag helper for sorted set.
+ * Update the robj pointer, defrag the skiplist struct and return the new score
+ * reference. We may not access oldele pointer (not even the pointer stored in
+ * the skiplist), as it was already freed. Newele may be null, in which case we
+ * only need to defrag the skiplist, but not update the obj pointer.
+ * When return value is non-NULL, it is the score reference that must be updated
+ * in the dict record. */
+double *zslDefrag(zskiplist *zsl, double score, sds oldele, sds newele) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x, *newx;
+ int i;
+ sds ele = newele? newele: oldele;
+
+ /* find the skiplist node referring to the object that was moved,
+ * and all pointers that need to be updated if we'll end up moving the skiplist node. */
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ x->level[i].forward->ele != oldele && /* make sure not to access the
+ ->obj pointer if it matches
+ oldele */
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ x = x->level[i].forward;
+ update[i] = x;
+ }
+
+ /* update the robj pointer inside the skip list record. */
+ x = x->level[0].forward;
+ serverAssert(x && score == x->score && x->ele==oldele);
+ if (newele)
+ x->ele = newele;
+
+ /* try to defrag the skiplist record itself */
+ newx = activeDefragAlloc(x);
+ if (newx) {
+ zslUpdateNode(zsl, x, newx, update);
+ return &newx->score;
+ }
+ return NULL;
+}
+
+/* Defrag helpler for sorted set.
+ * Defrag a single dict entry key name, and corresponding skiplist struct */
+long activeDefragZsetEntry(zset *zs, dictEntry *de) {
+ sds newsds;
+ double* newscore;
+ long defragged = 0;
+ sds sdsele = dictGetKey(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ defragged++, de->key = newsds;
+ newscore = zslDefrag(zs->zsl, *(double*)dictGetVal(de), sdsele, newsds);
+ if (newscore) {
+ dictSetVal(zs->dict, de, newscore);
+ defragged++;
+ }
+ return defragged;
+}
+
+#define DEFRAG_SDS_DICT_NO_VAL 0
+#define DEFRAG_SDS_DICT_VAL_IS_SDS 1
+#define DEFRAG_SDS_DICT_VAL_IS_STROB 2
+#define DEFRAG_SDS_DICT_VAL_VOID_PTR 3
+
+/* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */
+long activeDefragSdsDict(dict* d, int val_type) {
+ dictIterator *di;
+ dictEntry *de;
+ long defragged = 0;
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL) {
+ sds sdsele = dictGetKey(de), newsds;
+ if ((newsds = activeDefragSds(sdsele)))
+ de->key = newsds, defragged++;
+ /* defrag the value */
+ if (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) {
+ sdsele = dictGetVal(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ de->v.val = newsds, defragged++;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) {
+ robj *newele, *ele = dictGetVal(de);
+ if ((newele = activeDefragStringOb(ele, &defragged)))
+ de->v.val = newele;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
+ void *newptr, *ptr = dictGetVal(de);
+ if ((newptr = activeDefragAlloc(ptr)))
+ de->v.val = newptr, defragged++;
+ }
+ defragged += dictIterDefragEntry(di);
+ }
+ dictReleaseIterator(di);
+ return defragged;
+}
+
+/* Defrag a list of ptr, sds or robj string values */
+long activeDefragList(list *l, int val_type) {
+ long defragged = 0;
+ listNode *ln, *newln;
+ for (ln = l->head; ln; ln = ln->next) {
+ if ((newln = activeDefragAlloc(ln))) {
+ if (newln->prev)
+ newln->prev->next = newln;
+ else
+ l->head = newln;
+ if (newln->next)
+ newln->next->prev = newln;
+ else
+ l->tail = newln;
+ ln = newln;
+ defragged++;
+ }
+ if (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) {
+ sds newsds, sdsele = ln->value;
+ if ((newsds = activeDefragSds(sdsele)))
+ ln->value = newsds, defragged++;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) {
+ robj *newele, *ele = ln->value;
+ if ((newele = activeDefragStringOb(ele, &defragged)))
+ ln->value = newele;
+ } else if (val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
+ void *newptr, *ptr = ln->value;
+ if ((newptr = activeDefragAlloc(ptr)))
+ ln->value = newptr, defragged++;
+ }
+ }
+ return defragged;
+}
+
+/* Defrag a list of sds values and a dict with the same sds keys */
+long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) {
+ long defragged = 0;
+ sds newsds, sdsele;
+ listNode *ln, *newln;
+ dictIterator *di;
+ dictEntry *de;
+ /* Defrag the list and it's sds values */
+ for (ln = l->head; ln; ln = ln->next) {
+ if ((newln = activeDefragAlloc(ln))) {
+ if (newln->prev)
+ newln->prev->next = newln;
+ else
+ l->head = newln;
+ if (newln->next)
+ newln->next->prev = newln;
+ else
+ l->tail = newln;
+ ln = newln;
+ defragged++;
+ }
+ sdsele = ln->value;
+ if ((newsds = activeDefragSds(sdsele))) {
+ /* When defragging an sds value, we need to update the dict key */
+ unsigned int hash = dictGetHash(d, sdsele);
+ replaceSateliteDictKeyPtrAndOrDefragDictEntry(d, sdsele, newsds, hash, &defragged);
+ ln->value = newsds;
+ defragged++;
+ }
+ }
+
+ /* Defrag the dict values (keys were already handled) */
+ di = dictGetIterator(d);
+ while((de = dictNext(di)) != NULL) {
+ if (dict_val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) {
+ sds newsds, sdsele = dictGetVal(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ de->v.val = newsds, defragged++;
+ } else if (dict_val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) {
+ robj *newele, *ele = dictGetVal(de);
+ if ((newele = activeDefragStringOb(ele, &defragged)))
+ de->v.val = newele, defragged++;
+ } else if (dict_val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
+ void *newptr, *ptr = ln->value;
+ if ((newptr = activeDefragAlloc(ptr)))
+ ln->value = newptr, defragged++;
+ }
+ defragged += dictIterDefragEntry(di);
+ }
+ dictReleaseIterator(di);
+
+ return defragged;
+}
+
+/* Utility function that replaces an old key pointer in the dictionary with a
+ * new pointer. Additionally, we try to defrag the dictEntry in that dict.
+ * Oldkey mey be a dead pointer and should not be accessed (we get a
+ * pre-calculated hash value). Newkey may be null if the key pointer wasn't
+ * moved. Return value is the the dictEntry if found, or NULL if not found.
+ * NOTE: this is very ugly code, but it let's us avoid the complication of
+ * doing a scan on another dict. */
+dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged) {
+ dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash);
+ if (deref) {
+ dictEntry *de = *deref;
+ dictEntry *newde = activeDefragAlloc(de);
+ if (newde) {
+ de = *deref = newde;
+ (*defragged)++;
+ }
+ if (newkey)
+ de->key = newkey;
+ return de;
+ }
+ return NULL;
+}
+
+long activeDefragQuickListNodes(quicklist *ql) {
+ quicklistNode *node = ql->head, *newnode;
+ long defragged = 0;
+ unsigned char *newzl;
+ while (node) {
+ if ((newnode = activeDefragAlloc(node))) {
+ if (newnode->prev)
+ newnode->prev->next = newnode;
+ else
+ ql->head = newnode;
+ if (newnode->next)
+ newnode->next->prev = newnode;
+ else
+ ql->tail = newnode;
+ node = newnode;
+ defragged++;
+ }
+ if ((newzl = activeDefragAlloc(node->zl)))
+ defragged++, node->zl = newzl;
+ node = node->next;
+ }
+ return defragged;
+}
+
+/* when the value has lots of elements, we want to handle it later and not as
+ * oart of the main dictionary scan. this is needed in order to prevent latency
+ * spikes when handling large items */
+void defragLater(redisDb *db, dictEntry *kde) {
+ sds key = sdsdup(dictGetKey(kde));
+ listAddNodeTail(db->defrag_later, key);
+}
+
+long scanLaterList(robj *ob) {
+ quicklist *ql = ob->ptr;
+ if (ob->type != OBJ_LIST || ob->encoding != OBJ_ENCODING_QUICKLIST)
+ return 0;
+ server.stat_active_defrag_scanned+=ql->len;
+ return activeDefragQuickListNodes(ql);
+}
+
+typedef struct {
+ zset *zs;
+ long defragged;
+} scanLaterZsetData;
+
+void scanLaterZsetCallback(void *privdata, const dictEntry *_de) {
+ dictEntry *de = (dictEntry*)_de;
+ scanLaterZsetData *data = privdata;
+ data->defragged += activeDefragZsetEntry(data->zs, de);
+ server.stat_active_defrag_scanned++;
+}
+
+long scanLaterZset(robj *ob, unsigned long *cursor) {
+ if (ob->type != OBJ_ZSET || ob->encoding != OBJ_ENCODING_SKIPLIST)
+ return 0;
+ zset *zs = (zset*)ob->ptr;
+ dict *d = zs->dict;
+ scanLaterZsetData data = {zs, 0};
+ *cursor = dictScan(d, *cursor, scanLaterZsetCallback, defragDictBucketCallback, &data);
+ return data.defragged;
+}
+
+void scanLaterSetCallback(void *privdata, const dictEntry *_de) {
+ dictEntry *de = (dictEntry*)_de;
+ long *defragged = privdata;
+ sds sdsele = dictGetKey(de), newsds;
+ if ((newsds = activeDefragSds(sdsele)))
+ (*defragged)++, de->key = newsds;
+ server.stat_active_defrag_scanned++;
+}
+
+long scanLaterSet(robj *ob, unsigned long *cursor) {
+ long defragged = 0;
+ if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT)
+ return 0;
+ dict *d = ob->ptr;
+ *cursor = dictScan(d, *cursor, scanLaterSetCallback, defragDictBucketCallback, &defragged);
+ return defragged;
+}
+
+void scanLaterHashCallback(void *privdata, const dictEntry *_de) {
+ dictEntry *de = (dictEntry*)_de;
+ long *defragged = privdata;
+ sds sdsele = dictGetKey(de), newsds;
+ if ((newsds = activeDefragSds(sdsele)))
+ (*defragged)++, de->key = newsds;
+ sdsele = dictGetVal(de);
+ if ((newsds = activeDefragSds(sdsele)))
+ (*defragged)++, de->v.val = newsds;
+ server.stat_active_defrag_scanned++;
+}
+
+long scanLaterHash(robj *ob, unsigned long *cursor) {
+ long defragged = 0;
+ if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT)
+ return 0;
+ dict *d = ob->ptr;
+ *cursor = dictScan(d, *cursor, scanLaterHashCallback, defragDictBucketCallback, &defragged);
+ return defragged;
+}
+
+long defragQuicklist(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ long defragged = 0;
+ quicklist *ql = ob->ptr, *newql;
+ serverAssert(ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST);
+ if ((newql = activeDefragAlloc(ql)))
+ defragged++, ob->ptr = ql = newql;
+ if (ql->len > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ defragged += activeDefragQuickListNodes(ql);
+ return defragged;
+}
+
+long defragZsetSkiplist(redisDb *db, dictEntry *kde) {
+ robj *ob = dictGetVal(kde);
+ long defragged = 0;
+ zset *zs = (zset*)ob->ptr;
+ zset *newzs;
+ zskiplist *newzsl;
+ dict *newdict;
+ dictEntry *de;
+ struct zskiplistNode *newheader;
+ serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST);
+ if ((newzs = activeDefragAlloc(zs)))
+ defragged++, ob->ptr = zs = newzs;
+ if ((newzsl = activeDefragAlloc(zs->zsl)))
+ defragged++, zs->zsl = newzsl;
+ if ((newheader = activeDefragAlloc(zs->zsl->header)))
+ defragged++, zs->zsl->header = newheader;
+ if (dictSize(zs->dict) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else {
+ dictIterator *di = dictGetIterator(zs->dict);
+ while((de = dictNext(di)) != NULL) {
+ defragged += activeDefragZsetEntry(zs, de);
+ }
+ dictReleaseIterator(di);
+ }
+ /* handle the dict struct */
+ if ((newdict = activeDefragAlloc(zs->dict)))
+ defragged++, zs->dict = newdict;
+ /* defrag the dict tables */
+ defragged += dictDefragTables(zs->dict);
+ return defragged;
+}
+
+long defragHash(redisDb *db, dictEntry *kde) {
+ long defragged = 0;
+ robj *ob = dictGetVal(kde);
+ dict *d, *newd;
+ serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT);
+ d = ob->ptr;
+ if (dictSize(d) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ defragged += activeDefragSdsDict(d, DEFRAG_SDS_DICT_VAL_IS_SDS);
+ /* handle the dict struct */
+ if ((newd = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newd;
+ /* defrag the dict tables */
+ defragged += dictDefragTables(ob->ptr);
+ return defragged;
+}
+
+long defragSet(redisDb *db, dictEntry *kde) {
+ long defragged = 0;
+ robj *ob = dictGetVal(kde);
+ dict *d, *newd;
+ serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT);
+ d = ob->ptr;
+ if (dictSize(d) > server.active_defrag_max_scan_fields)
+ defragLater(db, kde);
+ else
+ defragged += activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
+ /* handle the dict struct */
+ if ((newd = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newd;
+ /* defrag the dict tables */
+ defragged += dictDefragTables(ob->ptr);
+ return defragged;
+}
+
+/* Defrag callback for radix tree iterator, called for each node,
+ * used in order to defrag the nodes allocations. */
+int defragRaxNode(raxNode **noderef) {
+ raxNode *newnode = activeDefragAlloc(*noderef);
+ if (newnode) {
+ *noderef = newnode;
+ return 1;
+ }
+ return 0;
+}
+
+/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
+int scanLaterStraemListpacks(robj *ob, unsigned long *cursor, long long endtime, long long *defragged) {
+ static unsigned char last[sizeof(streamID)];
+ raxIterator ri;
+ long iterations = 0;
+ if (ob->type != OBJ_STREAM || ob->encoding != OBJ_ENCODING_STREAM) {
+ *cursor = 0;
+ return 0;
+ }
+
+ stream *s = ob->ptr;
+ raxStart(&ri,s->rax);
+ if (*cursor == 0) {
+ /* if cursor is 0, we start new iteration */
+ defragRaxNode(&s->rax->head);
+ /* assign the iterator node callback before the seek, so that the
+ * initial nodes that are processed till the first item are covered */
+ ri.node_cb = defragRaxNode;
+ raxSeek(&ri,"^",NULL,0);
+ } else {
+ /* if cursor is non-zero, we seek to the static 'last' */
+ if (!raxSeek(&ri,">", last, sizeof(last))) {
+ *cursor = 0;
+ return 0;
+ }
+ /* assign the iterator node callback after the seek, so that the
+ * initial nodes that are processed till now aren't covered */
+ ri.node_cb = defragRaxNode;
+ }
+
+ (*cursor)++;
+ while (raxNext(&ri)) {
+ void *newdata = activeDefragAlloc(ri.data);
+ if (newdata)
+ raxSetData(ri.node, ri.data=newdata), (*defragged)++;
+ if (++iterations > 16) {
+ if (ustime() > endtime) {
+ serverAssert(ri.key_len==sizeof(last));
+ memcpy(last,ri.key,ri.key_len);
+ raxStop(&ri);
+ return 1;
+ }
+ iterations = 0;
+ }
+ }
+ raxStop(&ri);
+ *cursor = 0;
+ return 0;
+}
+
+/* optional callback used defrag each rax element (not including the element pointer itself) */
+typedef void *(raxDefragFunction)(raxIterator *ri, void *privdata, long *defragged);
+
+/* defrag radix tree including:
+ * 1) rax struct
+ * 2) rax nodes
+ * 3) rax entry data (only if defrag_data is specified)
+ * 4) call a callback per element, and allow the callback to return a new pointer for the element */
+long defragRadixTree(rax **raxref, int defrag_data, raxDefragFunction *element_cb, void *element_cb_data) {
+ long defragged = 0;
+ raxIterator ri;
+ rax* rax;
+ if ((rax = activeDefragAlloc(*raxref)))
+ defragged++, *raxref = rax;
+ rax = *raxref;
+ raxStart(&ri,rax);
+ ri.node_cb = defragRaxNode;
+ defragRaxNode(&rax->head);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ void *newdata = NULL;
+ if (element_cb)
+ newdata = element_cb(&ri, element_cb_data, &defragged);
+ if (defrag_data && !newdata)
+ newdata = activeDefragAlloc(ri.data);
+ if (newdata)
+ raxSetData(ri.node, ri.data=newdata), defragged++;
+ }
+ raxStop(&ri);
+ return defragged;
+}
+
+typedef struct {
+ streamCG *cg;
+ streamConsumer *c;
+} PendingEntryContext;
+
+void* defragStreamConsumerPendingEntry(raxIterator *ri, void *privdata, long *defragged) {
+ UNUSED(defragged);
+ PendingEntryContext *ctx = privdata;
+ streamNACK *nack = ri->data, *newnack;
+ nack->consumer = ctx->c; /* update nack pointer to consumer */
+ newnack = activeDefragAlloc(nack);
+ if (newnack) {
+ /* update consumer group pointer to the nack */
+ void *prev;
+ raxInsert(ctx->cg->pel, ri->key, ri->key_len, newnack, &prev);
+ serverAssert(prev==nack);
+ /* note: we don't increment 'defragged' that's done by the caller */
+ }
+ return newnack;
+}
+
+void* defragStreamConsumer(raxIterator *ri, void *privdata, long *defragged) {
+ streamConsumer *c = ri->data;
+ streamCG *cg = privdata;
+ void *newc = activeDefragAlloc(c);
+ if (newc) {
+ /* note: we don't increment 'defragged' that's done by the caller */
+ c = newc;
+ }
+ sds newsds = activeDefragSds(c->name);
+ if (newsds)
+ (*defragged)++, c->name = newsds;
+ if (c->pel) {
+ PendingEntryContext pel_ctx = {cg, c};
+ *defragged += defragRadixTree(&c->pel, 0, defragStreamConsumerPendingEntry, &pel_ctx);
+ }
+ return newc; /* returns NULL if c was not defragged */
+}
+
+void* defragStreamConsumerGroup(raxIterator *ri, void *privdata, long *defragged) {
+ streamCG *cg = ri->data;
+ UNUSED(privdata);
+ if (cg->consumers)
+ *defragged += defragRadixTree(&cg->consumers, 0, defragStreamConsumer, cg);
+ if (cg->pel)
+ *defragged += defragRadixTree(&cg->pel, 0, NULL, NULL);
+ return NULL;
+}
+
+long defragStream(redisDb *db, dictEntry *kde) {
+ long defragged = 0;
+ robj *ob = dictGetVal(kde);
+ serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
+ stream *s = ob->ptr, *news;
+
+ /* handle the main struct */
+ if ((news = activeDefragAlloc(s)))
+ defragged++, ob->ptr = s = news;
+
+ if (raxSize(s->rax) > server.active_defrag_max_scan_fields) {
+ rax *newrax = activeDefragAlloc(s->rax);
+ if (newrax)
+ defragged++, s->rax = newrax;
+ defragLater(db, kde);
+ } else
+ defragged += defragRadixTree(&s->rax, 1, NULL, NULL);
+
+ if (s->cgroups)
+ defragged += defragRadixTree(&s->cgroups, 1, defragStreamConsumerGroup, NULL);
+ return defragged;
+}
+
+/* for each key we scan in the main dict, this function will attempt to defrag
+ * all the various pointers it has. Returns a stat of how many pointers were
+ * moved. */
+long defragKey(redisDb *db, dictEntry *de) {
+ sds keysds = dictGetKey(de);
+ robj *newob, *ob;
+ unsigned char *newzl;
+ long defragged = 0;
+ sds newsds;
+
+ /* Try to defrag the key name. */
+ newsds = activeDefragSds(keysds);
+ if (newsds)
+ defragged++, de->key = newsds;
+ if (dictSize(db->expires)) {
+ /* Dirty code:
+ * I can't search in db->expires for that key after i already released
+ * the pointer it holds it won't be able to do the string compare */
+ uint64_t hash = dictGetHash(db->dict, de->key);
+ replaceSateliteDictKeyPtrAndOrDefragDictEntry(db->expires, keysds, newsds, hash, &defragged);
+ }
+
+ /* Try to defrag robj and / or string value. */
+ ob = dictGetVal(de);
+ if ((newob = activeDefragStringOb(ob, &defragged))) {
+ de->v.val = newob;
+ ob = newob;
+ }
+
+ if (ob->type == OBJ_STRING) {
+ /* Already handled in activeDefragStringOb. */
+ } else if (ob->type == OBJ_LIST) {
+ if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
+ defragged += defragQuicklist(db, de);
+ } else if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (ob->type == OBJ_SET) {
+ if (ob->encoding == OBJ_ENCODING_HT) {
+ defragged += defragSet(db, de);
+ } else if (ob->encoding == OBJ_ENCODING_INTSET) {
+ intset *newis, *is = ob->ptr;
+ if ((newis = activeDefragAlloc(is)))
+ defragged++, ob->ptr = newis;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (ob->type == OBJ_ZSET) {
+ if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_SKIPLIST) {
+ defragged += defragZsetSkiplist(db, de);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (ob->type == OBJ_HASH) {
+ if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
+ if ((newzl = activeDefragAlloc(ob->ptr)))
+ defragged++, ob->ptr = newzl;
+ } else if (ob->encoding == OBJ_ENCODING_HT) {
+ defragged += defragHash(db, de);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (ob->type == OBJ_STREAM) {
+ defragged += defragStream(db, de);
+ } else if (ob->type == OBJ_MODULE) {
+ /* Currently defragmenting modules private data types
+ * is not supported. */
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return defragged;
+}
+
+/* Defrag scan callback for the main db dictionary. */
+void defragScanCallback(void *privdata, const dictEntry *de) {
+ long defragged = defragKey((redisDb*)privdata, (dictEntry*)de);
+ server.stat_active_defrag_hits += defragged;
+ if(defragged)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ server.stat_active_defrag_scanned++;
+}
+
+/* Defrag scan callback for each hash table bicket,
+ * used in order to defrag the dictEntry allocations. */
+void defragDictBucketCallback(void *privdata, dictEntry **bucketref) {
+ UNUSED(privdata); /* NOTE: this function is also used by both activeDefragCycle and scanLaterHash, etc. don't use privdata */
+ while(*bucketref) {
+ dictEntry *de = *bucketref, *newde;
+ if ((newde = activeDefragAlloc(de))) {
+ *bucketref = newde;
+ }
+ bucketref = &(*bucketref)->next;
+ }
+}
+
+/* Utility function to get the fragmentation ratio from jemalloc.
+ * It is critical to do that by comparing only heap maps that belong to
+ * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this
+ * fragmentation ratio in order to decide if a defrag action should be taken
+ * or not, a false detection can cause the defragmenter to waste a lot of CPU
+ * without the possibility of getting any results. */
+float getAllocatorFragmentation(size_t *out_frag_bytes) {
+ size_t resident, active, allocated;
+ zmalloc_get_allocator_info(&allocated, &active, &resident);
+ float frag_pct = ((float)active / allocated)*100 - 100;
+ size_t frag_bytes = active - allocated;
+ float rss_pct = ((float)resident / allocated)*100 - 100;
+ size_t rss_bytes = resident - allocated;
+ if(out_frag_bytes)
+ *out_frag_bytes = frag_bytes;
+ serverLog(LL_DEBUG,
+ "allocated=%zu, active=%zu, resident=%zu, frag=%.0f%% (%.0f%% rss), frag_bytes=%zu (%zu rss)",
+ allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes);
+ return frag_pct;
+}
+
+/* We may need to defrag other globals, one small allcation can hold a full allocator run.
+ * so although small, it is still important to defrag these */
+long defragOtherGlobals() {
+ long defragged = 0;
+
+ /* there are many more pointers to defrag (e.g. client argv, output / aof buffers, etc.
+ * but we assume most of these are short lived, we only need to defrag allocations
+ * that remain static for a long time */
+ defragged += activeDefragSdsDict(server.lua_scripts, DEFRAG_SDS_DICT_VAL_IS_STROB);
+ defragged += activeDefragSdsListAndDict(server.repl_scriptcache_fifo, server.repl_scriptcache_dict, DEFRAG_SDS_DICT_NO_VAL);
+ return defragged;
+}
+
+/* returns 0 more work may or may not be needed (see non-zero cursor),
+ * and 1 if time is up and more work is needed. */
+int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime) {
+ if (de) {
+ robj *ob = dictGetVal(de);
+ if (ob->type == OBJ_LIST) {
+ server.stat_active_defrag_hits += scanLaterList(ob);
+ *cursor = 0; /* list has no scan, we must finish it in one go */
+ } else if (ob->type == OBJ_SET) {
+ server.stat_active_defrag_hits += scanLaterSet(ob, cursor);
+ } else if (ob->type == OBJ_ZSET) {
+ server.stat_active_defrag_hits += scanLaterZset(ob, cursor);
+ } else if (ob->type == OBJ_HASH) {
+ server.stat_active_defrag_hits += scanLaterHash(ob, cursor);
+ } else if (ob->type == OBJ_STREAM) {
+ return scanLaterStraemListpacks(ob, cursor, endtime, &server.stat_active_defrag_hits);
+ } else {
+ *cursor = 0; /* object type may have changed since we schedule it for later */
+ }
+ } else {
+ *cursor = 0; /* object may have been deleted already */
+ }
+ return 0;
+}
+
+/* returns 0 if no more work needs to be been done, and 1 if time is up and more work is needed. */
+int defragLaterStep(redisDb *db, long long endtime) {
+ static sds current_key = NULL;
+ static unsigned long cursor = 0;
+ unsigned int iterations = 0;
+ unsigned long long prev_defragged = server.stat_active_defrag_hits;
+ unsigned long long prev_scanned = server.stat_active_defrag_scanned;
+ long long key_defragged;
+
+ do {
+ /* if we're not continuing a scan from the last call or loop, start a new one */
+ if (!cursor) {
+ listNode *head = listFirst(db->defrag_later);
+
+ /* Move on to next key */
+ if (current_key) {
+ serverAssert(current_key == head->value);
+ sdsfree(head->value);
+ listDelNode(db->defrag_later, head);
+ cursor = 0;
+ current_key = NULL;
+ }
+
+ /* stop if we reached the last one. */
+ head = listFirst(db->defrag_later);
+ if (!head)
+ return 0;
+
+ /* start a new key */
+ current_key = head->value;
+ cursor = 0;
+ }
+
+ /* each time we enter this function we need to fetch the key from the dict again (if it still exists) */
+ dictEntry *de = dictFind(db->dict, current_key);
+ key_defragged = server.stat_active_defrag_hits;
+ do {
+ int quit = 0;
+ if (defragLaterItem(de, &cursor, endtime))
+ quit = 1; /* time is up, we didn't finish all the work */
+
+ /* Don't start a new BIG key in this loop, this is because the
+ * next key can be a list, and scanLaterList must be done in once cycle */
+ if (!cursor)
+ quit = 1;
+
+ /* Once in 16 scan iterations, 512 pointer reallocations, or 64 fields
+ * (if we have a lot of pointers in one hash bucket, or rehashing),
+ * check if we reached the time limit. */
+ if (quit || (++iterations > 16 ||
+ server.stat_active_defrag_hits - prev_defragged > 512 ||
+ server.stat_active_defrag_scanned - prev_scanned > 64)) {
+ if (quit || ustime() > endtime) {
+ if(key_defragged != server.stat_active_defrag_hits)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ return 1;
+ }
+ iterations = 0;
+ prev_defragged = server.stat_active_defrag_hits;
+ prev_scanned = server.stat_active_defrag_scanned;
+ }
+ } while(cursor);
+ if(key_defragged != server.stat_active_defrag_hits)
+ server.stat_active_defrag_key_hits++;
+ else
+ server.stat_active_defrag_key_misses++;
+ } while(1);
+}
+
+#define INTERPOLATE(x, x1, x2, y1, y2) ( (y1) + ((x)-(x1)) * ((y2)-(y1)) / ((x2)-(x1)) )
+#define LIMIT(y, min, max) ((y)<(min)? min: ((y)>(max)? max: (y)))
+
+/* decide if defrag is needed, and at what CPU effort to invest in it */
+void computeDefragCycles() {
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ /* If we're not already running, and below the threshold, exit. */
+ if (!server.active_defrag_running) {
+ if(frag_pct < server.active_defrag_threshold_lower || frag_bytes < server.active_defrag_ignore_bytes)
+ return;
+ }
+
+ /* Calculate the adaptive aggressiveness of the defrag */
+ int cpu_pct = INTERPOLATE(frag_pct,
+ server.active_defrag_threshold_lower,
+ server.active_defrag_threshold_upper,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ cpu_pct = LIMIT(cpu_pct,
+ server.active_defrag_cycle_min,
+ server.active_defrag_cycle_max);
+ /* We allow increasing the aggressiveness during a scan, but don't
+ * reduce it. */
+ if (!server.active_defrag_running ||
+ cpu_pct > server.active_defrag_running)
+ {
+ server.active_defrag_running = cpu_pct;
+ serverLog(LL_VERBOSE,
+ "Starting active defrag, frag=%.0f%%, frag_bytes=%zu, cpu=%d%%",
+ frag_pct, frag_bytes, cpu_pct);
+ }
+}
+
+/* Perform incremental defragmentation work from the serverCron.
+ * This works in a similar way to activeExpireCycle, in the sense that
+ * we do incremental work across calls. */
+void activeDefragCycle(void) {
+ static int current_db = -1;
+ static unsigned long cursor = 0;
+ static redisDb *db = NULL;
+ static long long start_scan, start_stat;
+ unsigned int iterations = 0;
+ unsigned long long prev_defragged = server.stat_active_defrag_hits;
+ unsigned long long prev_scanned = server.stat_active_defrag_scanned;
+ long long start, timelimit, endtime;
+ mstime_t latency;
+ int quit = 0;
+
+ if (server.aof_child_pid!=-1 || server.rdb_child_pid!=-1)
+ return; /* Defragging memory while there's a fork will just do damage. */
+
+ /* Once a second, check if we the fragmentation justfies starting a scan
+ * or making it more aggressive. */
+ run_with_period(1000) {
+ computeDefragCycles();
+ }
+ if (!server.active_defrag_running)
+ return;
+
+ /* See activeExpireCycle for how timelimit is handled. */
+ start = ustime();
+ timelimit = 1000000*server.active_defrag_running/server.hz/100;
+ if (timelimit <= 0) timelimit = 1;
+ endtime = start + timelimit;
+ latencyStartMonitor(latency);
+
+ do {
+ /* if we're not continuing a scan from the last call or loop, start a new one */
+ if (!cursor) {
+ /* finish any leftovers from previous db before moving to the next one */
+ if (db && defragLaterStep(db, endtime)) {
+ quit = 1; /* time is up, we didn't finish all the work */
+ break; /* this will exit the function and we'll continue on the next cycle */
+ }
+
+ /* Move on to next database, and stop if we reached the last one. */
+ if (++current_db >= server.dbnum) {
+ /* defrag other items not part of the db / keys */
+ defragOtherGlobals();
+
+ long long now = ustime();
+ size_t frag_bytes;
+ float frag_pct = getAllocatorFragmentation(&frag_bytes);
+ serverLog(LL_VERBOSE,
+ "Active defrag done in %dms, reallocated=%d, frag=%.0f%%, frag_bytes=%zu",
+ (int)((now - start_scan)/1000), (int)(server.stat_active_defrag_hits - start_stat), frag_pct, frag_bytes);
+
+ start_scan = now;
+ current_db = -1;
+ cursor = 0;
+ db = NULL;
+ server.active_defrag_running = 0;
+
+ computeDefragCycles(); /* if another scan is needed, start it right away */
+ if (server.active_defrag_running != 0 && ustime() < endtime)
+ continue;
+ break;
+ }
+ else if (current_db==0) {
+ /* Start a scan from the first database. */
+ start_scan = ustime();
+ start_stat = server.stat_active_defrag_hits;
+ }
+
+ db = &server.db[current_db];
+ cursor = 0;
+ }
+
+ do {
+ /* before scanning the next bucket, see if we have big keys left from the previous bucket to scan */
+ if (defragLaterStep(db, endtime)) {
+ quit = 1; /* time is up, we didn't finish all the work */
+ break; /* this will exit the function and we'll continue on the next cycle */
+ }
+
+ cursor = dictScan(db->dict, cursor, defragScanCallback, defragDictBucketCallback, db);
+
+ /* Once in 16 scan iterations, 512 pointer reallocations. or 64 keys
+ * (if we have a lot of pointers in one hash bucket or rehasing),
+ * check if we reached the time limit.
+ * But regardless, don't start a new db in this loop, this is because after
+ * the last db we call defragOtherGlobals, which must be done in once cycle */
+ if (!cursor || (++iterations > 16 ||
+ server.stat_active_defrag_hits - prev_defragged > 512 ||
+ server.stat_active_defrag_scanned - prev_scanned > 64)) {
+ if (!cursor || ustime() > endtime) {
+ quit = 1;
+ break;
+ }
+ iterations = 0;
+ prev_defragged = server.stat_active_defrag_hits;
+ prev_scanned = server.stat_active_defrag_scanned;
+ }
+ } while(cursor && !quit);
+ } while(!quit);
+
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("active-defrag-cycle",latency);
+}
+
+#else /* HAVE_DEFRAG */
+
+void activeDefragCycle(void) {
+ /* Not implemented yet. */
+}
+
+#endif
diff --git a/src/dict.c b/src/dict.c
index 29d400099..2cf9d4839 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -37,15 +37,19 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
-#include <ctype.h>
#include "dict.h"
#include "zmalloc.h"
+#ifndef DICT_BENCHMARK_MAIN
#include "redisassert.h"
+#else
+#include <assert.h>
+#endif
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
@@ -62,94 +66,33 @@ static unsigned int dict_force_resize_ratio = 5;
static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
-static int _dictKeyIndex(dict *ht, const void *key);
+static long _dictKeyIndex(dict *ht, const void *key, uint64_t hash, dictEntry **existing);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
/* -------------------------- hash functions -------------------------------- */
-/* Thomas Wang's 32 bit Mix Function */
-unsigned int dictIntHashFunction(unsigned int key)
-{
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
-}
-
-static uint32_t dict_hash_function_seed = 5381;
+static uint8_t dict_hash_function_seed[16];
-void dictSetHashFunctionSeed(uint32_t seed) {
- dict_hash_function_seed = seed;
+void dictSetHashFunctionSeed(uint8_t *seed) {
+ memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}
-uint32_t dictGetHashFunctionSeed(void) {
+uint8_t *dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
-/* MurmurHash2, by Austin Appleby
- * Note - This code makes a few assumptions about how your machine behaves -
- * 1. We can read a 4-byte value from any address without crashing
- * 2. sizeof(int) == 4
- *
- * And it has a few limitations -
- *
- * 1. It will not work incrementally.
- * 2. It will not produce the same results on little-endian and big-endian
- * machines.
- */
-unsigned int dictGenHashFunction(const void *key, int len) {
- /* 'm' and 'r' are mixing constants generated offline.
- They're not really 'magic', they just happen to work well. */
- uint32_t seed = dict_hash_function_seed;
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- /* Initialize the hash to a 'random' value */
- uint32_t h = seed ^ len;
-
- /* Mix 4 bytes at a time into the hash */
- const unsigned char *data = (const unsigned char *)key;
-
- while(len >= 4) {
- uint32_t k = *(uint32_t*)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
+/* The default hashing function uses SipHash implementation
+ * in siphash.c. */
- data += 4;
- len -= 4;
- }
-
- /* Handle the last few bytes of the input array */
- switch(len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0]; h *= m;
- };
-
- /* Do a few final mixes of the hash to ensure the last few
- * bytes are well-incorporated. */
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
- return (unsigned int)h;
+uint64_t dictGenHashFunction(const void *key, int len) {
+ return siphash(key,len,dict_hash_function_seed);
}
-/* And a case insensitive hash function (based on djb hash) */
-unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
- unsigned int hash = (unsigned int)dict_hash_function_seed;
-
- while (len--)
- hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
- return hash;
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
+ return siphash_nocase(buf,len,dict_hash_function_seed);
}
/* ----------------------------- API implementation ------------------------- */
@@ -203,14 +146,17 @@ int dictResize(dict *d)
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
- dictht n; /* the new hash table */
- unsigned long realsize = _dictNextPower(size);
-
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
+ dictht n; /* the new hash table */
+ unsigned long realsize = _dictNextPower(size);
+
+ /* Rehashing to the same table size is not useful. */
+ if (realsize == d->ht[0].size) return DICT_ERR;
+
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
@@ -232,31 +178,31 @@ int dictExpand(dict *d, unsigned long size)
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
+ *
* Note that a rehashing step consists in moving a bucket (that may have more
- * than one key as we use chaining) from the old to the new hash table. */
+ * than one key as we use chaining) from the old to the new hash table, however
+ * since part of the hash table may be composed of empty spaces, it is not
+ * guaranteed that this function will rehash even a single bucket, since it
+ * will visit at max N*10 empty buckets in total, otherwise the amount of
+ * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
+ int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
- while(n--) {
+ while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
- /* Check if we already rehashed the whole table... */
- if (d->ht[0].used == 0) {
- zfree(d->ht[0].table);
- d->ht[0] = d->ht[1];
- _dictReset(&d->ht[1]);
- d->rehashidx = -1;
- return 0;
- }
-
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
- while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
+ while(d->ht[0].table[d->rehashidx] == NULL) {
+ d->rehashidx++;
+ if (--empty_visits == 0) return 1;
+ }
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
- unsigned int h;
+ uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
@@ -270,6 +216,17 @@ int dictRehash(dict *d, int n) {
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
+
+ /* Check if we already rehashed the whole table... */
+ if (d->ht[0].used == 0) {
+ zfree(d->ht[0].table);
+ d->ht[0] = d->ht[1];
+ _dictReset(&d->ht[1]);
+ d->rehashidx = -1;
+ return 0;
+ }
+
+ /* More to rehash... */
return 1;
}
@@ -307,31 +264,34 @@ static void _dictRehashStep(dict *d) {
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
- dictEntry *entry = dictAddRaw(d,key);
+ dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
-/* Low level add. This function adds the entry but instead of setting
- * a value returns the dictEntry structure to the user, that will make
- * sure to fill the value field as he wishes.
+/* Low level add or find:
+ * This function adds the entry but instead of setting a value returns the
+ * dictEntry structure to the user, that will make sure to fill the value
+ * field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
- * entry = dictAddRaw(dict,mykey);
+ * entry = dictAddRaw(dict,mykey,NULL);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
- * If key already exists NULL is returned.
+ * If key already exists NULL is returned, and "*existing" is populated
+ * with the existing entry if existing is not NULL.
+ *
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
-dictEntry *dictAddRaw(dict *d, void *key)
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
- int index;
+ long index;
dictEntry *entry;
dictht *ht;
@@ -339,10 +299,13 @@ dictEntry *dictAddRaw(dict *d, void *key)
/* Get the index of the new element, or -1 if
* the element already exists. */
- if ((index = _dictKeyIndex(d, key)) == -1)
+ if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
- /* Allocate the memory and store the new entry */
+ /* Allocate the memory and store the new entry.
+ * Insert the element in top, with the assumption that in a database
+ * system it is more likely that recently added entries are accessed
+ * more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
@@ -354,51 +317,57 @@ dictEntry *dictAddRaw(dict *d, void *key)
return entry;
}
-/* Add an element, discarding the old if the key already exists.
+/* Add or Overwrite:
+ * Add an element, discarding the old value if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
- dictEntry *entry, auxentry;
+ dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
- * does not exists dictAdd will suceed. */
- if (dictAdd(d, key, val) == DICT_OK)
+ * does not exists dictAdd will succeed. */
+ entry = dictAddRaw(d,key,&existing);
+ if (entry) {
+ dictSetVal(d, entry, val);
return 1;
- /* It already exists, get the entry */
- entry = dictFind(d, key);
+ }
+
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
- auxentry = *entry;
- dictSetVal(d, entry, val);
+ auxentry = *existing;
+ dictSetVal(d, existing, val);
dictFreeVal(d, &auxentry);
return 0;
}
-/* dictReplaceRaw() is simply a version of dictAddRaw() that always
+/* Add or Find:
+ * dictAddOrFind() is simply a version of dictAddRaw() that always
* returns the hash entry of the specified key, even if the key already
* exists and can't be added (in that case the entry of the already
* existing key is returned.)
*
* See dictAddRaw() for more information. */
-dictEntry *dictReplaceRaw(dict *d, void *key) {
- dictEntry *entry = dictFind(d,key);
-
- return entry ? entry : dictAddRaw(d,key);
+dictEntry *dictAddOrFind(dict *d, void *key) {
+ dictEntry *entry, *existing;
+ entry = dictAddRaw(d,key,&existing);
+ return entry ? entry : existing;
}
-/* Search and remove an element */
-static int dictGenericDelete(dict *d, const void *key, int nofree)
-{
- unsigned int h, idx;
+/* Search and remove an element. This is an helper function for
+ * dictDelete() and dictUnlink(), please check the top comment
+ * of those functions. */
+static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
+ uint64_t h, idx;
dictEntry *he, *prevHe;
int table;
- if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
+ if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
+
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
@@ -407,7 +376,7 @@ static int dictGenericDelete(dict *d, const void *key, int nofree)
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
- if (dictCompareKeys(d, key, he->key)) {
+ if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
@@ -416,27 +385,59 @@ static int dictGenericDelete(dict *d, const void *key, int nofree)
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
+ zfree(he);
}
- zfree(he);
d->ht[table].used--;
- return DICT_OK;
+ return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
- return DICT_ERR; /* not found */
+ return NULL; /* not found */
}
+/* Remove an element, returning DICT_OK on success or DICT_ERR if the
+ * element was not found. */
int dictDelete(dict *ht, const void *key) {
- return dictGenericDelete(ht,key,0);
+ return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
-int dictDeleteNoFree(dict *ht, const void *key) {
+/* Remove an element from the table, but without actually releasing
+ * the key, value and dictionary entry. The dictionary entry is returned
+ * if the element was found (and unlinked from the table), and the user
+ * should later call `dictFreeUnlinkedEntry()` with it in order to release it.
+ * Otherwise if the key is not found, NULL is returned.
+ *
+ * This function is useful when we want to remove something from the hash
+ * table but want to use its value before actually deleting the entry.
+ * Without this function the pattern would require two lookups:
+ *
+ * entry = dictFind(...);
+ * // Do something with entry
+ * dictDelete(dictionary,entry);
+ *
+ * Thanks to this function it is possible to avoid this, and use
+ * instead:
+ *
+ * entry = dictUnlink(dictionary,entry);
+ * // Do something with entry
+ * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
+ */
+dictEntry *dictUnlink(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}
+/* You need to call this function to really free the entry after a call
+ * to dictUnlink(). It's safe to call this function with 'he' = NULL. */
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
+ if (he == NULL) return;
+ dictFreeKey(d, he);
+ dictFreeVal(d, he);
+ zfree(he);
+}
+
/* Destroy an entire dictionary */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
unsigned long i;
@@ -475,16 +476,16 @@ void dictRelease(dict *d)
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
- unsigned int h, idx, table;
+ uint64_t h, idx, table;
- if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
+ if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareKeys(d, key, he->key))
+ if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
@@ -609,14 +610,18 @@ void dictReleaseIterator(dictIterator *iter)
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
- unsigned int h;
+ unsigned long h;
int listlen, listele;
if (dictSize(d) == 0) return NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);
if (dictIsRehashing(d)) {
do {
- h = random() % (d->ht[0].size+d->ht[1].size);
+ /* We are sure there are no elements in indexes from 0
+ * to rehashidx-1 */
+ h = d->rehashidx + (random() % (d->ht[0].size +
+ d->ht[1].size -
+ d->rehashidx));
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
} while(he == NULL);
@@ -643,9 +648,12 @@ dictEntry *dictGetRandomKey(dict *d)
return he;
}
-/* This is a version of dictGetRandomKey() that is modified in order to
- * return multiple entries by jumping at a random place of the hash table
- * and scanning linearly for entries.
+/* This function samples the dictionary to return a few keys from random
+ * locations.
+ *
+ * It does not guarantee to return all the keys specified in 'count', nor
+ * it does guarantee to return non-duplicated elements, however it will make
+ * some effort to do both things.
*
* Returned pointers to hash table entries are stored into 'des' that
* points to an array of dictEntry pointers. The array must have room for
@@ -654,28 +662,67 @@ dictEntry *dictGetRandomKey(dict *d)
*
* The function returns the number of items stored into 'des', that may
* be less than 'count' if the hash table has less than 'count' elements
- * inside.
+ * inside, or if not enough elements were found in a reasonable amount of
+ * steps.
*
* Note that this function is not suitable when you need a good distribution
* of the returned items, but only when you need to "sample" a given number
* of continuous elements to run some kind of algorithm or to produce
* statistics. However the function is much faster than dictGetRandomKey()
- * at producing N elements, and the elements are guaranteed to be non
- * repeating. */
-unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
- int j; /* internal hash table id, 0 or 1. */
- unsigned int stored = 0;
+ * at producing N elements. */
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
+ unsigned long j; /* internal hash table id, 0 or 1. */
+ unsigned long tables; /* 1 or 2 tables? */
+ unsigned long stored = 0, maxsizemask;
+ unsigned long maxsteps;
if (dictSize(d) < count) count = dictSize(d);
- while(stored < count) {
- for (j = 0; j < 2; j++) {
- /* Pick a random point inside the hash table 0 or 1. */
- unsigned int i = random() & d->ht[j].sizemask;
- int size = d->ht[j].size;
-
- /* Make sure to visit every bucket by iterating 'size' times. */
- while(size--) {
- dictEntry *he = d->ht[j].table[i];
+ maxsteps = count*10;
+
+ /* Try to do a rehashing work proportional to 'count'. */
+ for (j = 0; j < count; j++) {
+ if (dictIsRehashing(d))
+ _dictRehashStep(d);
+ else
+ break;
+ }
+
+ tables = dictIsRehashing(d) ? 2 : 1;
+ maxsizemask = d->ht[0].sizemask;
+ if (tables > 1 && maxsizemask < d->ht[1].sizemask)
+ maxsizemask = d->ht[1].sizemask;
+
+ /* Pick a random point inside the larger table. */
+ unsigned long i = random() & maxsizemask;
+ unsigned long emptylen = 0; /* Continuous empty entries so far. */
+ while(stored < count && maxsteps--) {
+ for (j = 0; j < tables; j++) {
+ /* Invariant of the dict.c rehashing: up to the indexes already
+ * visited in ht[0] during the rehashing, there are no populated
+ * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
+ if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
+ /* Moreover, if we are currently out of range in the second
+ * table, there will be no elements in both tables up to
+ * the current rehashing index, so we jump if possible.
+ * (this happens when going from big to small table). */
+ if (i >= d->ht[1].size)
+ i = d->rehashidx;
+ else
+ continue;
+ }
+ if (i >= d->ht[j].size) continue; /* Out of range for this table. */
+ dictEntry *he = d->ht[j].table[i];
+
+ /* Count contiguous empty buckets, and jump to other
+ * locations if they reach 'count' (with a minimum of 5). */
+ if (he == NULL) {
+ emptylen++;
+ if (emptylen >= 5 && emptylen > count) {
+ i = random() & maxsizemask;
+ emptylen = 0;
+ }
+ } else {
+ emptylen = 0;
while (he) {
/* Collect all the elements of the buckets found non
* empty while iterating. */
@@ -685,14 +732,11 @@ unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count) {
stored++;
if (stored == count) return stored;
}
- i = (i+1) & d->ht[j].sizemask;
}
- /* If there is only one table and we iterated it all, we should
- * already have 'count' elements. Assert this condition. */
- assert(dictIsRehashing(d) != 0);
}
+ i = (i+1) & maxsizemask;
}
- return stored; /* Never reached. */
+ return stored;
}
/* Function to reverse bits. Algorithm from:
@@ -794,10 +838,11 @@ static unsigned long rev(unsigned long v) {
unsigned long dictScan(dict *d,
unsigned long v,
dictScanFunction *fn,
+ dictScanBucketFunction* bucketfn,
void *privdata)
{
dictht *t0, *t1;
- const dictEntry *de;
+ const dictEntry *de, *next;
unsigned long m0, m1;
if (dictSize(d) == 0) return 0;
@@ -807,12 +852,23 @@ unsigned long dictScan(dict *d,
m0 = t0->sizemask;
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
+ /* Set unmasked bits so incrementing the reversed cursor
+ * operates on the masked bits */
+ v |= ~m0;
+
+ /* Increment the reverse cursor */
+ v = rev(v);
+ v++;
+ v = rev(v);
+
} else {
t0 = &d->ht[0];
t1 = &d->ht[1];
@@ -827,38 +883,36 @@ unsigned long dictScan(dict *d,
m1 = t1->sizemask;
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t0->table[v & m0]);
de = t0->table[v & m0];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
/* Iterate over indices in larger table that are the expansion
* of the index pointed to by the cursor in the smaller table */
do {
/* Emit entries at cursor */
+ if (bucketfn) bucketfn(privdata, &t1->table[v & m1]);
de = t1->table[v & m1];
while (de) {
+ next = de->next;
fn(privdata, de);
- de = de->next;
+ de = next;
}
- /* Increment bits not covered by the smaller mask */
- v = (((v | m0) + 1) & ~m0) | (v & m0);
+ /* Increment the reverse cursor not covered by the smaller mask.*/
+ v |= ~m1;
+ v = rev(v);
+ v++;
+ v = rev(v);
/* Continue while bits covered by mask difference is non-zero */
} while (v & (m0 ^ m1));
}
- /* Set unmasked bits so incrementing the reversed cursor
- * operates on the masked bits of the smaller table */
- v |= ~m0;
-
- /* Increment the reverse cursor */
- v = rev(v);
- v++;
- v = rev(v);
-
return v;
}
@@ -891,7 +945,7 @@ static unsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE;
- if (size >= LONG_MAX) return LONG_MAX;
+ if (size >= LONG_MAX) return LONG_MAX + 1LU;
while(1) {
if (i >= size)
return i;
@@ -901,27 +955,29 @@ static unsigned long _dictNextPower(unsigned long size)
/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
- * If the key already exists, -1 is returned.
+ * If the key already exists, -1 is returned
+ * and the optional output parameter may be filled.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
-static int _dictKeyIndex(dict *d, const void *key)
+static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
- unsigned int h, idx, table;
+ unsigned long idx, table;
dictEntry *he;
+ if (existing) *existing = NULL;
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
- /* Compute the key hash value */
- h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
- idx = h & d->ht[table].sizemask;
+ idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
- if (dictCompareKeys(d, key, he->key))
+ if (key==he->key || dictCompareKeys(d, key, he->key)) {
+ if (existing) *existing = he;
return -1;
+ }
he = he->next;
}
if (!dictIsRehashing(d)) break;
@@ -944,24 +1000,50 @@ void dictDisableResize(void) {
dict_can_resize = 0;
}
-#if 0
+uint64_t dictGetHash(dict *d, const void *key) {
+ return dictHashKey(d, key);
+}
+
+/* Finds the dictEntry reference by using pointer and pre-calculated hash.
+ * oldkey is a dead pointer and should not be accessed.
+ * the hash value should be provided using dictGetHash.
+ * no string / key comparison is performed.
+ * return value is the reference to the dictEntry if found, or NULL if not found. */
+dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
+ dictEntry *he, **heref;
+ unsigned long idx, table;
-/* The following is code that we don't use for Redis currently, but that is part
-of the library. */
+ if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
+ for (table = 0; table <= 1; table++) {
+ idx = hash & d->ht[table].sizemask;
+ heref = &d->ht[table].table[idx];
+ he = *heref;
+ while(he) {
+ if (oldptr==he->key)
+ return heref;
+ heref = &he->next;
+ he = *heref;
+ }
+ if (!dictIsRehashing(d)) return NULL;
+ }
+ return NULL;
+}
-/* ----------------------- Debugging ------------------------*/
+/* ------------------------------- Debugging ---------------------------------*/
#define DICT_STATS_VECTLEN 50
-static void _dictPrintStatsHt(dictht *ht) {
+size_t _dictGetStatsHt(char *buf, size_t bufsize, dictht *ht, int tableid) {
unsigned long i, slots = 0, chainlen, maxchainlen = 0;
unsigned long totchainlen = 0;
unsigned long clvector[DICT_STATS_VECTLEN];
+ size_t l = 0;
if (ht->used == 0) {
- printf("No stats available for empty dictionaries\n");
- return;
+ return snprintf(buf,bufsize,
+ "No stats available for empty dictionaries\n");
}
+ /* Compute stats. */
for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
for (i = 0; i < ht->size; i++) {
dictEntry *he;
@@ -982,89 +1064,163 @@ static void _dictPrintStatsHt(dictht *ht) {
if (chainlen > maxchainlen) maxchainlen = chainlen;
totchainlen += chainlen;
}
- printf("Hash table stats:\n");
- printf(" table size: %ld\n", ht->size);
- printf(" number of elements: %ld\n", ht->used);
- printf(" different slots: %ld\n", slots);
- printf(" max chain length: %ld\n", maxchainlen);
- printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
- printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
- printf(" Chain length distribution:\n");
+
+ /* Generate human readable stats. */
+ l += snprintf(buf+l,bufsize-l,
+ "Hash table %d stats (%s):\n"
+ " table size: %ld\n"
+ " number of elements: %ld\n"
+ " different slots: %ld\n"
+ " max chain length: %ld\n"
+ " avg chain length (counted): %.02f\n"
+ " avg chain length (computed): %.02f\n"
+ " Chain length distribution:\n",
+ tableid, (tableid == 0) ? "main hash table" : "rehashing target",
+ ht->size, ht->used, slots, maxchainlen,
+ (float)totchainlen/slots, (float)ht->used/slots);
+
for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
if (clvector[i] == 0) continue;
- printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
+ if (l >= bufsize) break;
+ l += snprintf(buf+l,bufsize-l,
+ " %s%ld: %ld (%.02f%%)\n",
+ (i == DICT_STATS_VECTLEN-1)?">= ":"",
+ i, clvector[i], ((float)clvector[i]/ht->size)*100);
}
+
+ /* Unlike snprintf(), teturn the number of characters actually written. */
+ if (bufsize) buf[bufsize-1] = '\0';
+ return strlen(buf);
}
-void dictPrintStats(dict *d) {
- _dictPrintStatsHt(&d->ht[0]);
- if (dictIsRehashing(d)) {
- printf("-- Rehashing into ht[1]:\n");
- _dictPrintStatsHt(&d->ht[1]);
+void dictGetStats(char *buf, size_t bufsize, dict *d) {
+ size_t l;
+ char *orig_buf = buf;
+ size_t orig_bufsize = bufsize;
+
+ l = _dictGetStatsHt(buf,bufsize,&d->ht[0],0);
+ buf += l;
+ bufsize -= l;
+ if (dictIsRehashing(d) && bufsize > 0) {
+ _dictGetStatsHt(buf,bufsize,&d->ht[1],1);
}
+ /* Make sure there is a NULL term at the end. */
+ if (orig_bufsize) orig_buf[orig_bufsize-1] = '\0';
}
-/* ----------------------- StringCopy Hash Table Type ------------------------*/
+/* ------------------------------- Benchmark ---------------------------------*/
-static unsigned int _dictStringCopyHTHashFunction(const void *key)
-{
- return dictGenHashFunction(key, strlen(key));
-}
+#ifdef DICT_BENCHMARK_MAIN
-static void *_dictStringDup(void *privdata, const void *key)
-{
- int len = strlen(key);
- char *copy = zmalloc(len+1);
- DICT_NOTUSED(privdata);
+#include "sds.h"
- memcpy(copy, key, len);
- copy[len] = '\0';
- return copy;
+uint64_t hashCallback(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}
-static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
- const void *key2)
-{
+int compareCallback(void *privdata, const void *key1, const void *key2) {
+ int l1,l2;
DICT_NOTUSED(privdata);
- return strcmp(key1, key2) == 0;
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
}
-static void _dictStringDestructor(void *privdata, void *key)
-{
+void freeCallback(void *privdata, void *val) {
DICT_NOTUSED(privdata);
- zfree(key);
+ sdsfree(val);
}
-dictType dictTypeHeapStringCopyKey = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
+dictType BenchmarkDictType = {
+ hashCallback,
+ NULL,
+ NULL,
+ compareCallback,
+ freeCallback,
+ NULL
};
-/* This is like StringCopy but does not auto-duplicate the key.
- * It's used for intepreter's shared strings. */
-dictType dictTypeHeapStrings = {
- _dictStringCopyHTHashFunction, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- NULL /* val destructor */
-};
+#define start_benchmark() start = timeInMilliseconds()
+#define end_benchmark(msg) do { \
+ elapsed = timeInMilliseconds()-start; \
+ printf(msg ": %ld items in %lld ms\n", count, elapsed); \
+} while(0);
+
+/* dict-benchmark [count] */
+int main(int argc, char **argv) {
+ long j;
+ long long start, elapsed;
+ dict *dict = dictCreate(&BenchmarkDictType,NULL);
+ long count = 0;
+
+ if (argc == 2) {
+ count = strtol(argv[1],NULL,10);
+ } else {
+ count = 5000000;
+ }
-/* This is like StringCopy but also automatically handle dynamic
- * allocated C strings as values. */
-dictType dictTypeHeapStringCopyKeyValue = {
- _dictStringCopyHTHashFunction, /* hash function */
- _dictStringDup, /* key dup */
- _dictStringDup, /* val dup */
- _dictStringCopyHTKeyCompare, /* key compare */
- _dictStringDestructor, /* key destructor */
- _dictStringDestructor, /* val destructor */
-};
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ int retval = dictAdd(dict,sdsfromlonglong(j),(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Inserting");
+ assert((long)dictSize(dict) == count);
+
+ /* Wait for rehashing. */
+ while (dictIsRehashing(dict)) {
+ dictRehashMilliseconds(dict,100);
+ }
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Linear access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Linear access of existing elements (2nd round)");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(rand() % count);
+ dictEntry *de = dictFind(dict,key);
+ assert(de != NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Random access of existing elements");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(rand() % count);
+ key[0] = 'X';
+ dictEntry *de = dictFind(dict,key);
+ assert(de == NULL);
+ sdsfree(key);
+ }
+ end_benchmark("Accessing missing");
+
+ start_benchmark();
+ for (j = 0; j < count; j++) {
+ sds key = sdsfromlonglong(j);
+ int retval = dictDelete(dict,key);
+ assert(retval == DICT_OK);
+ key[0] += 17; /* Change first number to letter. */
+ retval = dictAdd(dict,key,(void*)j);
+ assert(retval == DICT_OK);
+ }
+ end_benchmark("Removing and adding");
+}
#endif
diff --git a/src/dict.h b/src/dict.h
index 7421078f8..62018cc44 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -56,7 +56,7 @@ typedef struct dictEntry {
} dictEntry;
typedef struct dictType {
- unsigned int (*hashFunction)(const void *key);
+ uint64_t (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
@@ -78,7 +78,7 @@ typedef struct dict {
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
- int iterators; /* number of iterators currently running */
+ unsigned long iterators; /* number of iterators currently running */
} dict;
/* If safe is set to 1 this is a safe iterator, that means, you can call
@@ -95,6 +95,7 @@ typedef struct dictIterator {
} dictIterator;
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
+typedef void (dictScanBucketFunction)(void *privdata, dictEntry **bucketref);
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4
@@ -106,19 +107,19 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
- entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
+ (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \
else \
- entry->v.val = (_val_); \
+ (entry)->v.val = (_val_); \
} while(0)
#define dictSetSignedIntegerVal(entry, _val_) \
- do { entry->v.s64 = _val_; } while(0)
+ do { (entry)->v.s64 = _val_; } while(0)
#define dictSetUnsignedIntegerVal(entry, _val_) \
- do { entry->v.u64 = _val_; } while(0)
+ do { (entry)->v.u64 = _val_; } while(0)
#define dictSetDoubleVal(entry, _val_) \
- do { entry->v.d = _val_; } while(0)
+ do { (entry)->v.d = _val_; } while(0)
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
@@ -126,9 +127,9 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
#define dictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
- entry->key = (d)->type->keyDup((d)->privdata, _key_); \
+ (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \
else \
- entry->key = (_key_); \
+ (entry)->key = (_key_); \
} while(0)
#define dictCompareKeys(d, key1, key2) \
@@ -150,11 +151,12 @@ typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
-dictEntry *dictAddRaw(dict *d, void *key);
+dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
+dictEntry *dictAddOrFind(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
-dictEntry *dictReplaceRaw(dict *d, void *key);
int dictDelete(dict *d, const void *key);
-int dictDeleteNoFree(dict *d, const void *key);
+dictEntry *dictUnlink(dict *ht, const void *key);
+void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
@@ -164,18 +166,20 @@ dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
-unsigned int dictGetRandomKeys(dict *d, dictEntry **des, unsigned int count);
-void dictPrintStats(dict *d);
-unsigned int dictGenHashFunction(const void *key, int len);
-unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
+unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
+void dictGetStats(char *buf, size_t bufsize, dict *d);
+uint64_t dictGenHashFunction(const void *key, int len);
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
-void dictSetHashFunctionSeed(unsigned int initval);
-unsigned int dictGetHashFunctionSeed(void);
-unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
+void dictSetHashFunctionSeed(uint8_t *seed);
+uint8_t *dictGetHashFunctionSeed(void);
+unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
+uint64_t dictGetHash(dict *d, const void *key);
+dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
diff --git a/src/endianconv.h b/src/endianconv.h
index 08f553136..475f72b08 100644
--- a/src/endianconv.h
+++ b/src/endianconv.h
@@ -43,12 +43,12 @@ uint16_t intrev16(uint16_t v);
uint32_t intrev32(uint32_t v);
uint64_t intrev64(uint64_t v);
-/* variants of the function doing the actual convertion only if the target
+/* variants of the function doing the actual conversion only if the target
* host is big endian */
#if (BYTE_ORDER == LITTLE_ENDIAN)
-#define memrev16ifbe(p)
-#define memrev32ifbe(p)
-#define memrev64ifbe(p)
+#define memrev16ifbe(p) ((void)(0))
+#define memrev32ifbe(p) ((void)(0))
+#define memrev64ifbe(p) ((void)(0))
#define intrev16ifbe(v) (v)
#define intrev32ifbe(v) (v)
#define intrev64ifbe(v) (v)
diff --git a/src/evict.c b/src/evict.c
new file mode 100644
index 000000000..ecc25dd8e
--- /dev/null
+++ b/src/evict.c
@@ -0,0 +1,620 @@
+/* Maxmemory directive handling (LRU eviction and other policies).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+
+/* ----------------------------------------------------------------------------
+ * Data structures
+ * --------------------------------------------------------------------------*/
+
+/* To improve the quality of the LRU approximation we take a set of keys
+ * that are good candidate for eviction across freeMemoryIfNeeded() calls.
+ *
+ * Entries inside the eviciton pool are taken ordered by idle time, putting
+ * greater idle times to the right (ascending order).
+ *
+ * When an LFU policy is used instead, a reverse frequency indication is used
+ * instead of the idle time, so that we still evict by larger value (larger
+ * inverse frequency means to evict keys with the least frequent accesses).
+ *
+ * Empty entries have the key pointer set to NULL. */
+#define EVPOOL_SIZE 16
+#define EVPOOL_CACHED_SDS_SIZE 255
+struct evictionPoolEntry {
+ unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
+ sds key; /* Key name. */
+ sds cached; /* Cached SDS object for key name. */
+ int dbid; /* Key DB number. */
+};
+
+static struct evictionPoolEntry *EvictionPoolLRU;
+
+/* ----------------------------------------------------------------------------
+ * Implementation of eviction, aging and LRU
+ * --------------------------------------------------------------------------*/
+
+/* Return the LRU clock, based on the clock resolution. This is a time
+ * in a reduced-bits format that can be used to set and check the
+ * object->lru field of redisObject structures. */
+unsigned int getLRUClock(void) {
+ return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
+}
+
+/* This function is used to obtain the current LRU clock.
+ * If the current resolution is lower than the frequency we refresh the
+ * LRU clock (as it should be in production servers) we return the
+ * precomputed value, otherwise we need to resort to a system call. */
+unsigned int LRU_CLOCK(void) {
+ unsigned int lruclock;
+ if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
+ atomicGet(server.lruclock,lruclock);
+ } else {
+ lruclock = getLRUClock();
+ }
+ return lruclock;
+}
+
+/* Given an object returns the min number of milliseconds the object was never
+ * requested, using an approximated LRU algorithm. */
+unsigned long long estimateObjectIdleTime(robj *o) {
+ unsigned long long lruclock = LRU_CLOCK();
+ if (lruclock >= o->lru) {
+ return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
+ } else {
+ return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
+ LRU_CLOCK_RESOLUTION;
+ }
+}
+
+/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
+ * file to limit the max memory used by the server, before processing a
+ * command.
+ *
+ * The goal of the function is to free enough memory to keep Redis under the
+ * configured memory limit.
+ *
+ * The function starts calculating how many bytes should be freed to keep
+ * Redis under the limit, and enters a loop selecting the best keys to
+ * evict accordingly to the configured policy.
+ *
+ * If all the bytes needed to return back under the limit were freed the
+ * function returns C_OK, otherwise C_ERR is returned, and the caller
+ * should block the execution of commands that will result in more memory
+ * used by the server.
+ *
+ * ------------------------------------------------------------------------
+ *
+ * LRU approximation algorithm
+ *
+ * Redis uses an approximation of the LRU algorithm that runs in constant
+ * memory. Every time there is a key to expire, we sample N keys (with
+ * N very small, usually in around 5) to populate a pool of best keys to
+ * evict of M keys (the pool size is defined by EVPOOL_SIZE).
+ *
+ * The N keys sampled are added in the pool of good keys to expire (the one
+ * with an old access time) if they are better than one of the current keys
+ * in the pool.
+ *
+ * After the pool is populated, the best key we have in the pool is expired.
+ * However note that we don't remove keys from the pool when they are deleted
+ * so the pool may contain keys that no longer exist.
+ *
+ * When we try to evict a key, and all the entries in the pool don't exist
+ * we populate it again. This time we'll be sure that the pool has at least
+ * one key that can be evicted, if there is at least one key that can be
+ * evicted in the whole database. */
+
+/* Create a new eviction pool. */
+void evictionPoolAlloc(void) {
+ struct evictionPoolEntry *ep;
+ int j;
+
+ ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
+ for (j = 0; j < EVPOOL_SIZE; j++) {
+ ep[j].idle = 0;
+ ep[j].key = NULL;
+ ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
+ ep[j].dbid = 0;
+ }
+ EvictionPoolLRU = ep;
+}
+
+/* This is an helper function for freeMemoryIfNeeded(), it is used in order
+ * to populate the evictionPool with a few entries every time we want to
+ * expire a key. Keys with idle time smaller than one of the current
+ * keys are added. Keys are always added if there are free entries.
+ *
+ * We insert keys on place in ascending order, so keys with the smaller
+ * idle time are on the left, and keys with the higher idle time on the
+ * right. */
+
+void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
+ int j, k, count;
+ dictEntry *samples[server.maxmemory_samples];
+
+ count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
+ for (j = 0; j < count; j++) {
+ unsigned long long idle;
+ sds key;
+ robj *o;
+ dictEntry *de;
+
+ de = samples[j];
+ key = dictGetKey(de);
+
+ /* If the dictionary we are sampling from is not the main
+ * dictionary (but the expires one) we need to lookup the key
+ * again in the key dictionary to obtain the value object. */
+ if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
+ if (sampledict != keydict) de = dictFind(keydict, key);
+ o = dictGetVal(de);
+ }
+
+ /* Calculate the idle time according to the policy. This is called
+ * idle just because the code initially handled LRU, but is in fact
+ * just a score where an higher score means better candidate. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
+ idle = estimateObjectIdleTime(o);
+ } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ /* When we use an LRU policy, we sort the keys by idle time
+ * so that we expire keys starting from greater idle time.
+ * However when the policy is an LFU one, we have a frequency
+ * estimation, and we want to evict keys with lower frequency
+ * first. So inside the pool we put objects using the inverted
+ * frequency subtracting the actual frequency to the maximum
+ * frequency of 255. */
+ idle = 255-LFUDecrAndReturn(o);
+ } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
+ /* In this case the sooner the expire the better. */
+ idle = ULLONG_MAX - (long)dictGetVal(de);
+ } else {
+ serverPanic("Unknown eviction policy in evictionPoolPopulate()");
+ }
+
+ /* Insert the element inside the pool.
+ * First, find the first empty bucket or the first populated
+ * bucket that has an idle time smaller than our idle time. */
+ k = 0;
+ while (k < EVPOOL_SIZE &&
+ pool[k].key &&
+ pool[k].idle < idle) k++;
+ if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
+ /* Can't insert if the element is < the worst element we have
+ * and there are no empty buckets. */
+ continue;
+ } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
+ /* Inserting into empty position. No setup needed before insert. */
+ } else {
+ /* Inserting in the middle. Now k points to the first element
+ * greater than the element to insert. */
+ if (pool[EVPOOL_SIZE-1].key == NULL) {
+ /* Free space on the right? Insert at k shifting
+ * all the elements from k to end to the right. */
+
+ /* Save SDS before overwriting. */
+ sds cached = pool[EVPOOL_SIZE-1].cached;
+ memmove(pool+k+1,pool+k,
+ sizeof(pool[0])*(EVPOOL_SIZE-k-1));
+ pool[k].cached = cached;
+ } else {
+ /* No free space on right? Insert at k-1 */
+ k--;
+ /* Shift all elements on the left of k (included) to the
+ * left, so we discard the element with smaller idle time. */
+ sds cached = pool[0].cached; /* Save SDS before overwriting. */
+ if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
+ memmove(pool,pool+1,sizeof(pool[0])*k);
+ pool[k].cached = cached;
+ }
+ }
+
+ /* Try to reuse the cached SDS string allocated in the pool entry,
+ * because allocating and deallocating this object is costly
+ * (according to the profiler, not my fantasy. Remember:
+ * premature optimizbla bla bla bla. */
+ int klen = sdslen(key);
+ if (klen > EVPOOL_CACHED_SDS_SIZE) {
+ pool[k].key = sdsdup(key);
+ } else {
+ memcpy(pool[k].cached,key,klen+1);
+ sdssetlen(pool[k].cached,klen);
+ pool[k].key = pool[k].cached;
+ }
+ pool[k].idle = idle;
+ pool[k].dbid = dbid;
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * LFU (Least Frequently Used) implementation.
+
+ * We have 24 total bits of space in each object in order to implement
+ * an LFU (Least Frequently Used) eviction policy, since we re-use the
+ * LRU field for this purpose.
+ *
+ * We split the 24 bits into two fields:
+ *
+ * 16 bits 8 bits
+ * +----------------+--------+
+ * + Last decr time | LOG_C |
+ * +----------------+--------+
+ *
+ * LOG_C is a logarithmic counter that provides an indication of the access
+ * frequency. However this field must also be decremented otherwise what used
+ * to be a frequently accessed key in the past, will remain ranked like that
+ * forever, while we want the algorithm to adapt to access pattern changes.
+ *
+ * So the remaining 16 bits are used in order to store the "decrement time",
+ * a reduced-precision Unix time (we take 16 bits of the time converted
+ * in minutes since we don't care about wrapping around) where the LOG_C
+ * counter is halved if it has an high value, or just decremented if it
+ * has a low value.
+ *
+ * New keys don't start at zero, in order to have the ability to collect
+ * some accesses before being trashed away, so they start at COUNTER_INIT_VAL.
+ * The logarithmic increment performed on LOG_C takes care of COUNTER_INIT_VAL
+ * when incrementing the key, so that keys starting at COUNTER_INIT_VAL
+ * (or having a smaller value) have a very high chance of being incremented
+ * on access.
+ *
+ * During decrement, the value of the logarithmic counter is halved if
+ * its current value is greater than two times the COUNTER_INIT_VAL, otherwise
+ * it is just decremented by one.
+ * --------------------------------------------------------------------------*/
+
+/* Return the current time in minutes, just taking the least significant
+ * 16 bits. The returned time is suitable to be stored as LDT (last decrement
+ * time) for the LFU implementation. */
+unsigned long LFUGetTimeInMinutes(void) {
+ return (server.unixtime/60) & 65535;
+}
+
+/* Given an object last access time, compute the minimum number of minutes
+ * that elapsed since the last access. Handle overflow (ldt greater than
+ * the current 16 bits minutes time) considering the time as wrapping
+ * exactly once. */
+unsigned long LFUTimeElapsed(unsigned long ldt) {
+ unsigned long now = LFUGetTimeInMinutes();
+ if (now >= ldt) return now-ldt;
+ return 65535-ldt+now;
+}
+
+/* Logarithmically increment a counter. The greater is the current counter value
+ * the less likely is that it gets really implemented. Saturate it at 255. */
+uint8_t LFULogIncr(uint8_t counter) {
+ if (counter == 255) return 255;
+ double r = (double)rand()/RAND_MAX;
+ double baseval = counter - LFU_INIT_VAL;
+ if (baseval < 0) baseval = 0;
+ double p = 1.0/(baseval*server.lfu_log_factor+1);
+ if (r < p) counter++;
+ return counter;
+}
+
+/* If the object decrement time is reached decrement the LFU counter but
+ * do not update LFU fields of the object, we update the access time
+ * and counter in an explicit way when the object is really accessed.
+ * And we will times halve the counter according to the times of
+ * elapsed time than server.lfu_decay_time.
+ * Return the object frequency counter.
+ *
+ * This function is used in order to scan the dataset for the best object
+ * to fit: as we check for the candidate, we incrementally decrement the
+ * counter of the scanned objects if needed. */
+unsigned long LFUDecrAndReturn(robj *o) {
+ unsigned long ldt = o->lru >> 8;
+ unsigned long counter = o->lru & 255;
+ unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
+ if (num_periods)
+ counter = (num_periods > counter) ? 0 : counter - num_periods;
+ return counter;
+}
+
+/* ----------------------------------------------------------------------------
+ * The external API for eviction: freeMemroyIfNeeded() is called by the
+ * server when there is data to add in order to make space if needed.
+ * --------------------------------------------------------------------------*/
+
+/* We don't want to count AOF buffers and slaves output buffers as
+ * used memory: the eviction should use mostly data size. This function
+ * returns the sum of AOF and slaves buffer. */
+size_t freeMemoryGetNotCountedMemory(void) {
+ size_t overhead = 0;
+ int slaves = listLength(server.slaves);
+
+ if (slaves) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = listNodeValue(ln);
+ overhead += getClientOutputBufferMemoryUsage(slave);
+ }
+ }
+ if (server.aof_state != AOF_OFF) {
+ overhead += sdslen(server.aof_buf)+aofRewriteBufferSize();
+ }
+ return overhead;
+}
+
+/* Get the memory status from the point of view of the maxmemory directive:
+ * if the memory used is under the maxmemory setting then C_OK is returned.
+ * Otherwise, if we are over the memory limit, the function returns
+ * C_ERR.
+ *
+ * The function may return additional info via reference, only if the
+ * pointers to the respective arguments is not NULL. Certain fields are
+ * populated only when C_ERR is returned:
+ *
+ * 'total' total amount of bytes used.
+ * (Populated both for C_ERR and C_OK)
+ *
+ * 'logical' the amount of memory used minus the slaves/AOF buffers.
+ * (Populated when C_ERR is returned)
+ *
+ * 'tofree' the amount of memory that should be released
+ * in order to return back into the memory limits.
+ * (Populated when C_ERR is returned)
+ *
+ * 'level' this usually ranges from 0 to 1, and reports the amount of
+ * memory currently used. May be > 1 if we are over the memory
+ * limit.
+ * (Populated both for C_ERR and C_OK)
+ */
+int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
+ size_t mem_reported, mem_used, mem_tofree;
+
+ /* Check if we are over the memory usage limit. If we are not, no need
+ * to subtract the slaves output buffers. We can just return ASAP. */
+ mem_reported = zmalloc_used_memory();
+ if (total) *total = mem_reported;
+
+ /* We may return ASAP if there is no need to compute the level. */
+ int return_ok_asap = !server.maxmemory || mem_reported <= server.maxmemory;
+ if (return_ok_asap && !level) return C_OK;
+
+ /* Remove the size of slaves output buffers and AOF buffer from the
+ * count of used memory. */
+ mem_used = mem_reported;
+ size_t overhead = freeMemoryGetNotCountedMemory();
+ mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
+
+ /* Compute the ratio of memory usage. */
+ if (level) {
+ if (!server.maxmemory) {
+ *level = 0;
+ } else {
+ *level = (float)mem_used / (float)server.maxmemory;
+ }
+ }
+
+ if (return_ok_asap) return C_OK;
+
+ /* Check if we are still over the memory limit. */
+ if (mem_used <= server.maxmemory) return C_OK;
+
+ /* Compute how much memory we need to free. */
+ mem_tofree = mem_used - server.maxmemory;
+
+ if (logical) *logical = mem_used;
+ if (tofree) *tofree = mem_tofree;
+
+ return C_ERR;
+}
+
+/* This function is periodically called to see if there is memory to free
+ * according to the current "maxmemory" settings. In case we are over the
+ * memory limit, the function will try to free some memory to return back
+ * under the limit.
+ *
+ * The function returns C_OK if we are under the memory limit or if we
+ * were over the limit, but the attempt to free memory was successful.
+ * Otehrwise if we are over the memory limit, but not enough memory
+ * was freed to return back under the limit, the function returns C_ERR. */
+int freeMemoryIfNeeded(void) {
+ size_t mem_reported, mem_tofree, mem_freed;
+ mstime_t latency, eviction_latency;
+ long long delta;
+ int slaves = listLength(server.slaves);
+
+ /* When clients are paused the dataset should be static not just from the
+ * POV of clients not being able to write, but also from the POV of
+ * expires and evictions of keys not being performed. */
+ if (clientsArePaused()) return C_OK;
+ if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
+ return C_OK;
+
+ mem_freed = 0;
+
+ if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
+ goto cant_free; /* We need to free memory, but policy forbids. */
+
+ latencyStartMonitor(latency);
+ while (mem_freed < mem_tofree) {
+ int j, k, i, keys_freed = 0;
+ static unsigned int next_db = 0;
+ sds bestkey = NULL;
+ int bestdbid;
+ redisDb *db;
+ dict *dict;
+ dictEntry *de;
+
+ if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
+ {
+ struct evictionPoolEntry *pool = EvictionPoolLRU;
+
+ while(bestkey == NULL) {
+ unsigned long total_keys = 0, keys;
+
+ /* We don't want to make local-db choices when expiring keys,
+ * so to start populate the eviction pool sampling keys from
+ * every DB. */
+ for (i = 0; i < server.dbnum; i++) {
+ db = server.db+i;
+ dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
+ db->dict : db->expires;
+ if ((keys = dictSize(dict)) != 0) {
+ evictionPoolPopulate(i, dict, db->dict, pool);
+ total_keys += keys;
+ }
+ }
+ if (!total_keys) break; /* No keys to evict. */
+
+ /* Go backward from best to worst element to evict. */
+ for (k = EVPOOL_SIZE-1; k >= 0; k--) {
+ if (pool[k].key == NULL) continue;
+ bestdbid = pool[k].dbid;
+
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
+ de = dictFind(server.db[pool[k].dbid].dict,
+ pool[k].key);
+ } else {
+ de = dictFind(server.db[pool[k].dbid].expires,
+ pool[k].key);
+ }
+
+ /* Remove the entry from the pool. */
+ if (pool[k].key != pool[k].cached)
+ sdsfree(pool[k].key);
+ pool[k].key = NULL;
+ pool[k].idle = 0;
+
+ /* If the key exists, is our pick. Otherwise it is
+ * a ghost and we need to try the next element. */
+ if (de) {
+ bestkey = dictGetKey(de);
+ break;
+ } else {
+ /* Ghost... Iterate again. */
+ }
+ }
+ }
+ }
+
+ /* volatile-random and allkeys-random policy */
+ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
+ server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
+ {
+ /* When evicting a random key, we try to evict a key for
+ * each DB, so we use the static 'next_db' variable to
+ * incrementally visit all DBs. */
+ for (i = 0; i < server.dbnum; i++) {
+ j = (++next_db) % server.dbnum;
+ db = server.db+j;
+ dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
+ db->dict : db->expires;
+ if (dictSize(dict) != 0) {
+ de = dictGetRandomKey(dict);
+ bestkey = dictGetKey(de);
+ bestdbid = j;
+ break;
+ }
+ }
+ }
+
+ /* Finally remove the selected key. */
+ if (bestkey) {
+ db = server.db+bestdbid;
+ robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
+ propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
+ /* We compute the amount of memory freed by db*Delete() alone.
+ * It is possible that actually the memory needed to propagate
+ * the DEL in AOF and replication link is greater than the one
+ * we are freeing removing the key, but we can't account for
+ * that otherwise we would never exit the loop.
+ *
+ * AOF and Output buffer memory will be freed eventually so
+ * we only care about memory used by the key space. */
+ delta = (long long) zmalloc_used_memory();
+ latencyStartMonitor(eviction_latency);
+ if (server.lazyfree_lazy_eviction)
+ dbAsyncDelete(db,keyobj);
+ else
+ dbSyncDelete(db,keyobj);
+ latencyEndMonitor(eviction_latency);
+ latencyAddSampleIfNeeded("eviction-del",eviction_latency);
+ latencyRemoveNestedEvent(latency,eviction_latency);
+ delta -= (long long) zmalloc_used_memory();
+ mem_freed += delta;
+ server.stat_evictedkeys++;
+ notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
+ keyobj, db->id);
+ decrRefCount(keyobj);
+ keys_freed++;
+
+ /* When the memory to free starts to be big enough, we may
+ * start spending so much time here that is impossible to
+ * deliver data to the slaves fast enough, so we force the
+ * transmission here inside the loop. */
+ if (slaves) flushSlavesOutputBuffers();
+
+ /* Normally our stop condition is the ability to release
+ * a fixed, pre-computed amount of memory. However when we
+ * are deleting objects in another thread, it's better to
+ * check, from time to time, if we already reached our target
+ * memory, since the "mem_freed" amount is computed only
+ * across the dbAsyncDelete() call, while the thread can
+ * release the memory all the time. */
+ if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
+ if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
+ /* Let's satisfy our stop condition. */
+ mem_freed = mem_tofree;
+ }
+ }
+ }
+
+ if (!keys_freed) {
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ goto cant_free; /* nothing to free... */
+ }
+ }
+ latencyEndMonitor(latency);
+ latencyAddSampleIfNeeded("eviction-cycle",latency);
+ return C_OK;
+
+cant_free:
+ /* We are here if we are not able to reclaim memory. There is only one
+ * last thing we can try: check if the lazyfree thread has jobs in queue
+ * and wait... */
+ while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
+ if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
+ break;
+ usleep(1000);
+ }
+ return C_ERR;
+}
+
diff --git a/src/expire.c b/src/expire.c
new file mode 100644
index 000000000..0b92ee3fe
--- /dev/null
+++ b/src/expire.c
@@ -0,0 +1,526 @@
+/* Implementation of EXPIRE (keys with fixed time to live).
+ *
+ * ----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+
+/*-----------------------------------------------------------------------------
+ * Incremental collection of expired keys.
+ *
+ * When keys are accessed they are expired on-access. However we need a
+ * mechanism in order to ensure keys are eventually removed when expired even
+ * if no access is performed on them.
+ *----------------------------------------------------------------------------*/
+
+/* Helper function for the activeExpireCycle() function.
+ * This function will try to expire the key that is stored in the hash table
+ * entry 'de' of the 'expires' hash table of a Redis database.
+ *
+ * If the key is found to be expired, it is removed from the database and
+ * 1 is returned. Otherwise no operation is performed and 0 is returned.
+ *
+ * When a key is expired, server.stat_expiredkeys is incremented.
+ *
+ * The parameter 'now' is the current time in milliseconds as is passed
+ * to the function to avoid too many gettimeofday() syscalls. */
+int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
+ long long t = dictGetSignedIntegerVal(de);
+ if (now > t) {
+ sds key = dictGetKey(de);
+ robj *keyobj = createStringObject(key,sdslen(key));
+
+ propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
+ if (server.lazyfree_lazy_expire)
+ dbAsyncDelete(db,keyobj);
+ else
+ dbSyncDelete(db,keyobj);
+ notifyKeyspaceEvent(NOTIFY_EXPIRED,
+ "expired",keyobj,db->id);
+ decrRefCount(keyobj);
+ server.stat_expiredkeys++;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Try to expire a few timed out keys. The algorithm used is adaptive and
+ * will use few CPU cycles if there are few expiring keys, otherwise
+ * it will get more aggressive to avoid that too much memory is used by
+ * keys that can be removed from the keyspace.
+ *
+ * No more than CRON_DBS_PER_CALL databases are tested at every
+ * iteration.
+ *
+ * This kind of call is used when Redis detects that timelimit_exit is
+ * true, so there is more work to do, and we do it more incrementally from
+ * the beforeSleep() function of the event loop.
+ *
+ * Expire cycle type:
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
+ * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
+ * microseconds, and is not repeated again before the same amount of time.
+ *
+ * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
+ * executed, where the time limit is a percentage of the REDIS_HZ period
+ * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */
+
+void activeExpireCycle(int type) {
+ /* This function has some global state in order to continue the work
+ * incrementally across calls. */
+ static unsigned int current_db = 0; /* Last DB tested. */
+ static int timelimit_exit = 0; /* Time limit hit in previous call? */
+ static long long last_fast_cycle = 0; /* When last fast cycle ran. */
+
+ int j, iteration = 0;
+ int dbs_per_call = CRON_DBS_PER_CALL;
+ long long start = ustime(), timelimit, elapsed;
+
+ /* When clients are paused the dataset should be static not just from the
+ * POV of clients not being able to write, but also from the POV of
+ * expires and evictions of keys not being performed. */
+ if (clientsArePaused()) return;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
+ /* Don't start a fast cycle if the previous cycle did not exit
+ * for time limit. Also don't repeat a fast cycle for the same period
+ * as the fast cycle total duration itself. */
+ if (!timelimit_exit) return;
+ if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
+ last_fast_cycle = start;
+ }
+
+ /* We usually should test CRON_DBS_PER_CALL per iteration, with
+ * two exceptions:
+ *
+ * 1) Don't test more DBs than we have.
+ * 2) If last time we hit the time limit, we want to scan all DBs
+ * in this iteration, as there is work to do in some DB and we don't want
+ * expired keys to use memory for too much time. */
+ if (dbs_per_call > server.dbnum || timelimit_exit)
+ dbs_per_call = server.dbnum;
+
+ /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
+ * per iteration. Since this function gets called with a frequency of
+ * server.hz times per second, the following is the max amount of
+ * microseconds we can spend in this function. */
+ timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
+ timelimit_exit = 0;
+ if (timelimit <= 0) timelimit = 1;
+
+ if (type == ACTIVE_EXPIRE_CYCLE_FAST)
+ timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
+
+ /* Accumulate some global stats as we expire keys, to have some idea
+ * about the number of keys that are already logically expired, but still
+ * existing inside the database. */
+ long total_sampled = 0;
+ long total_expired = 0;
+
+ for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
+ int expired;
+ redisDb *db = server.db+(current_db % server.dbnum);
+
+ /* Increment the DB now so we are sure if we run out of time
+ * in the current DB we'll restart from the next. This allows to
+ * distribute the time evenly across DBs. */
+ current_db++;
+
+ /* Continue to expire if at the end of the cycle more than 25%
+ * of the keys were expired. */
+ do {
+ unsigned long num, slots;
+ long long now, ttl_sum;
+ int ttl_samples;
+ iteration++;
+
+ /* If there is nothing to expire try next DB ASAP. */
+ if ((num = dictSize(db->expires)) == 0) {
+ db->avg_ttl = 0;
+ break;
+ }
+ slots = dictSlots(db->expires);
+ now = mstime();
+
+ /* When there are less than 1% filled slots getting random
+ * keys is expensive, so stop here waiting for better times...
+ * The dictionary will be resized asap. */
+ if (num && slots > DICT_HT_INITIAL_SIZE &&
+ (num*100/slots < 1)) break;
+
+ /* The main collection cycle. Sample random keys among keys
+ * with an expire set, checking for expired ones. */
+ expired = 0;
+ ttl_sum = 0;
+ ttl_samples = 0;
+
+ if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
+ num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
+
+ while (num--) {
+ dictEntry *de;
+ long long ttl;
+
+ if ((de = dictGetRandomKey(db->expires)) == NULL) break;
+ ttl = dictGetSignedIntegerVal(de)-now;
+ if (activeExpireCycleTryExpire(db,de,now)) expired++;
+ if (ttl > 0) {
+ /* We want the average TTL of keys yet not expired. */
+ ttl_sum += ttl;
+ ttl_samples++;
+ }
+ total_sampled++;
+ }
+ total_expired += expired;
+
+ /* Update the average TTL stats for this database. */
+ if (ttl_samples) {
+ long long avg_ttl = ttl_sum/ttl_samples;
+
+ /* Do a simple running average with a few samples.
+ * We just use the current estimate with a weight of 2%
+ * and the previous estimate with a weight of 98%. */
+ if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
+ db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
+ }
+
+ /* We can't block forever here even if there are many keys to
+ * expire. So after a given amount of milliseconds return to the
+ * caller waiting for the other active expire cycle. */
+ if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
+ elapsed = ustime()-start;
+ if (elapsed > timelimit) {
+ timelimit_exit = 1;
+ server.stat_expired_time_cap_reached_count++;
+ break;
+ }
+ }
+ /* We don't repeat the cycle if there are less than 25% of keys
+ * found expired in the current DB. */
+ } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
+ }
+
+ elapsed = ustime()-start;
+ latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
+
+ /* Update our estimate of keys existing but yet to be expired.
+ * Running average with this sample accounting for 5%. */
+ double current_perc;
+ if (total_sampled) {
+ current_perc = (double)total_expired/total_sampled;
+ } else
+ current_perc = 0;
+ server.stat_expired_stale_perc = (current_perc*0.05)+
+ (server.stat_expired_stale_perc*0.95);
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires of keys created in writable slaves
+ *
+ * Normally slaves do not process expires: they wait the masters to synthesize
+ * DEL operations in order to retain consistency. However writable slaves are
+ * an exception: if a key is created in the slave and an expire is assigned
+ * to it, we need a way to expire such a key, since the master does not know
+ * anything about such a key.
+ *
+ * In order to do so, we track keys created in the slave side with an expire
+ * set, and call the expireSlaveKeys() function from time to time in order to
+ * reclaim the keys if they already expired.
+ *
+ * Note that the use case we are trying to cover here, is a popular one where
+ * slaves are put in writable mode in order to compute slow operations in
+ * the slave side that are mostly useful to actually read data in a more
+ * processed way. Think at sets intersections in a tmp key, with an expire so
+ * that it is also used as a cache to avoid intersecting every time.
+ *
+ * This implementation is currently not perfect but a lot better than leaking
+ * the keys as implemented in 3.2.
+ *----------------------------------------------------------------------------*/
+
+/* The dictionary where we remember key names and database ID of keys we may
+ * want to expire from the slave. Since this function is not often used we
+ * don't even care to initialize the database at startup. We'll do it once
+ * the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
+ * is called.
+ *
+ * The dictionary has an SDS string representing the key as the hash table
+ * key, while the value is a 64 bit unsigned integer with the bits corresponding
+ * to the DB where the keys may exist set to 1. Currently the keys created
+ * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
+ * to the max 64 bit unsigned value when we know there is a key with a DB
+ * ID greater than 63, and check all the configured DBs in such a case. */
+dict *slaveKeysWithExpire = NULL;
+
+/* Check the set of keys created by the master with an expire set in order to
+ * check if they should be evicted. */
+void expireSlaveKeys(void) {
+ if (slaveKeysWithExpire == NULL ||
+ dictSize(slaveKeysWithExpire) == 0) return;
+
+ int cycles = 0, noexpire = 0;
+ mstime_t start = mstime();
+ while(1) {
+ dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
+ sds keyname = dictGetKey(de);
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ uint64_t new_dbids = 0;
+
+ /* Check the key against every database corresponding to the
+ * bits set in the value bitmap. */
+ int dbid = 0;
+ while(dbids && dbid < server.dbnum) {
+ if ((dbids & 1) != 0) {
+ redisDb *db = server.db+dbid;
+ dictEntry *expire = dictFind(db->expires,keyname);
+ int expired = 0;
+
+ if (expire &&
+ activeExpireCycleTryExpire(server.db+dbid,expire,start))
+ {
+ expired = 1;
+ }
+
+ /* If the key was not expired in this DB, we need to set the
+ * corresponding bit in the new bitmap we set as value.
+ * At the end of the loop if the bitmap is zero, it means we
+ * no longer need to keep track of this key. */
+ if (expire && !expired) {
+ noexpire++;
+ new_dbids |= (uint64_t)1 << dbid;
+ }
+ }
+ dbid++;
+ dbids >>= 1;
+ }
+
+ /* Set the new bitmap as value of the key, in the dictionary
+ * of keys with an expire set directly in the writable slave. Otherwise
+ * if the bitmap is zero, we no longer need to keep track of it. */
+ if (new_dbids)
+ dictSetUnsignedIntegerVal(de,new_dbids);
+ else
+ dictDelete(slaveKeysWithExpire,keyname);
+
+ /* Stop conditions: found 3 keys we cna't expire in a row or
+ * time limit was reached. */
+ cycles++;
+ if (noexpire > 3) break;
+ if ((cycles % 64) == 0 && mstime()-start > 1) break;
+ if (dictSize(slaveKeysWithExpire) == 0) break;
+ }
+}
+
+/* Track keys that received an EXPIRE or similar command in the context
+ * of a writable slave. */
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
+ if (slaveKeysWithExpire == NULL) {
+ static dictType dt = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
+ };
+ slaveKeysWithExpire = dictCreate(&dt,NULL);
+ }
+ if (db->id > 63) return;
+
+ dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
+ /* If the entry was just created, set it to a copy of the SDS string
+ * representing the key: we don't want to need to take those keys
+ * in sync with the main DB. The keys will be removed by expireSlaveKeys()
+ * as it scans to find keys to remove. */
+ if (de->key == key->ptr) {
+ de->key = sdsdup(key->ptr);
+ dictSetUnsignedIntegerVal(de,0);
+ }
+
+ uint64_t dbids = dictGetUnsignedIntegerVal(de);
+ dbids |= (uint64_t)1 << db->id;
+ dictSetUnsignedIntegerVal(de,dbids);
+}
+
+/* Return the number of keys we are tracking. */
+size_t getSlaveKeyWithExpireCount(void) {
+ if (slaveKeysWithExpire == NULL) return 0;
+ return dictSize(slaveKeysWithExpire);
+}
+
+/* Remove the keys in the hash table. We need to do that when data is
+ * flushed from the server. We may receive new keys from the master with
+ * the same name/db and it is no longer a good idea to expire them.
+ *
+ * Note: technically we should handle the case of a single DB being flushed
+ * but it is not worth it since anyway race conditions using the same set
+ * of key names in a wriatable slave and in its master will lead to
+ * inconsistencies. This is just a best-effort thing we do. */
+void flushSlaveKeysWithExpireList(void) {
+ if (slaveKeysWithExpire) {
+ dictRelease(slaveKeysWithExpire);
+ slaveKeysWithExpire = NULL;
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ * Expires Commands
+ *----------------------------------------------------------------------------*/
+
+/* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
+ * and PEXPIREAT. Because the commad second argument may be relative or absolute
+ * the "basetime" argument is used to signal what the base time is (either 0
+ * for *AT variants of the command, or the current time for relative expires).
+ *
+ * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
+ * the argv[2] parameter. The basetime is always specified in milliseconds. */
+void expireGenericCommand(client *c, long long basetime, int unit) {
+ robj *key = c->argv[1], *param = c->argv[2];
+ long long when; /* unix time in milliseconds when the key will expire. */
+
+ if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
+ return;
+
+ if (unit == UNIT_SECONDS) when *= 1000;
+ when += basetime;
+
+ /* No key, return zero. */
+ if (lookupKeyWrite(c->db,key) == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
+ * should never be executed as a DEL when load the AOF or in the context
+ * of a slave instance.
+ *
+ * Instead we take the other branch of the IF statement setting an expire
+ * (possibly in the past) and wait for an explicit DEL from the master. */
+ if (when <= mstime() && !server.loading && !server.masterhost) {
+ robj *aux;
+
+ int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
+ dbSyncDelete(c->db,key);
+ serverAssertWithInfo(c,key,deleted);
+ server.dirty++;
+
+ /* Replicate/AOF this as an explicit DEL or UNLINK. */
+ aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
+ rewriteClientCommandVector(c,2,aux,key);
+ signalModifiedKey(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ addReply(c, shared.cone);
+ return;
+ } else {
+ setExpire(c,c->db,key,when);
+ addReply(c,shared.cone);
+ signalModifiedKey(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
+ server.dirty++;
+ return;
+ }
+}
+
+/* EXPIRE key seconds */
+void expireCommand(client *c) {
+ expireGenericCommand(c,mstime(),UNIT_SECONDS);
+}
+
+/* EXPIREAT key time */
+void expireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_SECONDS);
+}
+
+/* PEXPIRE key milliseconds */
+void pexpireCommand(client *c) {
+ expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
+}
+
+/* PEXPIREAT key ms_time */
+void pexpireatCommand(client *c) {
+ expireGenericCommand(c,0,UNIT_MILLISECONDS);
+}
+
+/* Implements TTL and PTTL */
+void ttlGenericCommand(client *c, int output_ms) {
+ long long expire, ttl = -1;
+
+ /* If the key does not exist at all, return -2 */
+ if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
+ addReplyLongLong(c,-2);
+ return;
+ }
+ /* The key exists. Return -1 if it has no expire, or the actual
+ * TTL value otherwise. */
+ expire = getExpire(c->db,c->argv[1]);
+ if (expire != -1) {
+ ttl = expire-mstime();
+ if (ttl < 0) ttl = 0;
+ }
+ if (ttl == -1) {
+ addReplyLongLong(c,-1);
+ } else {
+ addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
+ }
+}
+
+/* TTL key */
+void ttlCommand(client *c) {
+ ttlGenericCommand(c, 0);
+}
+
+/* PTTL key */
+void pttlCommand(client *c) {
+ ttlGenericCommand(c, 1);
+}
+
+/* PERSIST key */
+void persistCommand(client *c) {
+ if (lookupKeyWrite(c->db,c->argv[1])) {
+ if (removeExpire(c->db,c->argv[1])) {
+ addReply(c,shared.cone);
+ server.dirty++;
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else {
+ addReply(c,shared.czero);
+ }
+}
+
+/* TOUCH key1 [key2 key3 ... keyN] */
+void touchCommand(client *c) {
+ int touched = 0;
+ for (int j = 1; j < c->argc; j++)
+ if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++;
+ addReplyLongLong(c,touched);
+}
+
diff --git a/src/geo.c b/src/geo.c
new file mode 100644
index 000000000..c78fadfcf
--- /dev/null
+++ b/src/geo.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "geo.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+
+/* Things exported from t_zset.c only for geo.c, since it is the only other
+ * part of Redis that requires close zset introspection. */
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+int zslValueLteMax(double value, zrangespec *spec);
+
+/* ====================================================================
+ * This file implements the following commands:
+ *
+ * - geoadd - add coordinates for value to geoset
+ * - georadius - search radius by coordinates in geoset
+ * - georadiusbymember - search radius based on geoset member position
+ * ==================================================================== */
+
+/* ====================================================================
+ * geoArray implementation
+ * ==================================================================== */
+
+/* Create a new array of geoPoints. */
+geoArray *geoArrayCreate(void) {
+ geoArray *ga = zmalloc(sizeof(*ga));
+ /* It gets allocated on first geoArrayAppend() call. */
+ ga->array = NULL;
+ ga->buckets = 0;
+ ga->used = 0;
+ return ga;
+}
+
+/* Add a new entry and return its pointer so that the caller can populate
+ * it with data. */
+geoPoint *geoArrayAppend(geoArray *ga) {
+ if (ga->used == ga->buckets) {
+ ga->buckets = (ga->buckets == 0) ? 8 : ga->buckets*2;
+ ga->array = zrealloc(ga->array,sizeof(geoPoint)*ga->buckets);
+ }
+ geoPoint *gp = ga->array+ga->used;
+ ga->used++;
+ return gp;
+}
+
+/* Destroy a geoArray created with geoArrayCreate(). */
+void geoArrayFree(geoArray *ga) {
+ size_t i;
+ for (i = 0; i < ga->used; i++) sdsfree(ga->array[i].member);
+ zfree(ga->array);
+ zfree(ga);
+}
+
+/* ====================================================================
+ * Helpers
+ * ==================================================================== */
+int decodeGeohash(double bits, double *xy) {
+ GeoHashBits hash = { .bits = (uint64_t)bits, .step = GEO_STEP_MAX };
+ return geohashDecodeToLongLatWGS84(hash, xy);
+}
+
+/* Input Argument Helper */
+/* Take a pointer to the latitude arg then use the next arg for longitude.
+ * On parse error C_ERR is returned, otherwise C_OK. */
+int extractLongLatOrReply(client *c, robj **argv, double *xy) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ if (getDoubleFromObjectOrReply(c, argv[i], xy + i, NULL) !=
+ C_OK) {
+ return C_ERR;
+ }
+ }
+ if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX ||
+ xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) {
+ addReplySds(c, sdscatprintf(sdsempty(),
+ "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]));
+ return C_ERR;
+ }
+ return C_OK;
+}
+
+/* Input Argument Helper */
+/* Decode lat/long from a zset member's score.
+ * Returns C_OK on successful decoding, otherwise C_ERR is returned. */
+int longLatFromMember(robj *zobj, robj *member, double *xy) {
+ double score = 0;
+
+ if (zsetScore(zobj, member->ptr, &score) == C_ERR) return C_ERR;
+ if (!decodeGeohash(score, xy)) return C_ERR;
+ return C_OK;
+}
+
+/* Check that the unit argument matches one of the known units, and returns
+ * the conversion factor to meters (you need to divide meters by the conversion
+ * factor to convert to the right unit).
+ *
+ * If the unit is not valid, an error is reported to the client, and a value
+ * less than zero is returned. */
+double extractUnitOrReply(client *c, robj *unit) {
+ char *u = unit->ptr;
+
+ if (!strcmp(u, "m")) {
+ return 1;
+ } else if (!strcmp(u, "km")) {
+ return 1000;
+ } else if (!strcmp(u, "ft")) {
+ return 0.3048;
+ } else if (!strcmp(u, "mi")) {
+ return 1609.34;
+ } else {
+ addReplyError(c,
+ "unsupported unit provided. please use m, km, ft, mi");
+ return -1;
+ }
+}
+
+/* Input Argument Helper.
+ * Extract the dinstance from the specified two arguments starting at 'argv'
+ * that shouldbe in the form: <number> <unit> and return the dinstance in the
+ * specified unit on success. *conversions is populated with the coefficient
+ * to use in order to convert meters to the unit.
+ *
+ * On error a value less than zero is returned. */
+double extractDistanceOrReply(client *c, robj **argv,
+ double *conversion) {
+ double distance;
+ if (getDoubleFromObjectOrReply(c, argv[0], &distance,
+ "need numeric radius") != C_OK) {
+ return -1;
+ }
+
+ if (distance < 0) {
+ addReplyError(c,"radius cannot be negative");
+ return -1;
+ }
+
+ double to_meters = extractUnitOrReply(c,argv[1]);
+ if (to_meters < 0) {
+ return -1;
+ }
+
+ if (conversion) *conversion = to_meters;
+ return distance * to_meters;
+}
+
+/* The default addReplyDouble has too much accuracy. We use this
+ * for returning location distances. "5.2145 meters away" is nicer
+ * than "5.2144992818115 meters away." We provide 4 digits after the dot
+ * so that the returned value is decently accurate even when the unit is
+ * the kilometer. */
+void addReplyDoubleDistance(client *c, double d) {
+ char dbuf[128];
+ int dlen = snprintf(dbuf, sizeof(dbuf), "%.4f", d);
+ addReplyBulkCBuffer(c, dbuf, dlen);
+}
+
+/* Helper function for geoGetPointsInRange(): given a sorted set score
+ * representing a point, and another point (the center of our search) and
+ * a radius, appends this entry as a geoPoint into the specified geoArray
+ * only if the point is within the search area.
+ *
+ * returns C_OK if the point is included, or REIDS_ERR if it is outside. */
+int geoAppendIfWithinRadius(geoArray *ga, double lon, double lat, double radius, double score, sds member) {
+ double distance, xy[2];
+
+ if (!decodeGeohash(score,xy)) return C_ERR; /* Can't decode. */
+ /* Note that geohashGetDistanceIfInRadiusWGS84() takes arguments in
+ * reverse order: longitude first, latitude later. */
+ if (!geohashGetDistanceIfInRadiusWGS84(lon,lat, xy[0], xy[1],
+ radius, &distance))
+ {
+ return C_ERR;
+ }
+
+ /* Append the new element. */
+ geoPoint *gp = geoArrayAppend(ga);
+ gp->longitude = xy[0];
+ gp->latitude = xy[1];
+ gp->dist = distance;
+ gp->member = member;
+ gp->score = score;
+ return C_OK;
+}
+
+/* Query a Redis sorted set to extract all the elements between 'min' and
+ * 'max', appending them into the array of geoPoint structures 'gparray'.
+ * The command returns the number of elements added to the array.
+ *
+ * Elements which are farest than 'radius' from the specified 'x' and 'y'
+ * coordinates are not included.
+ *
+ * The ability of this function to append to an existing set of points is
+ * important for good performances because querying by radius is performed
+ * using multiple queries to the sorted set, that we later need to sort
+ * via qsort. Similarly we need to be able to reject points outside the search
+ * radius area ASAP in order to allocate and process more points than needed. */
+int geoGetPointsInRange(robj *zobj, double min, double max, double lon, double lat, double radius, geoArray *ga) {
+ /* minex 0 = include min in range; maxex 1 = exclude max in range */
+ /* That's: min <= val < max */
+ zrangespec range = { .min = min, .max = max, .minex = 0, .maxex = 1 };
+ size_t origincount = ga->used;
+ sds member;
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr = NULL;
+ unsigned int vlen = 0;
+ long long vlong = 0;
+ double score = 0;
+
+ if ((eptr = zzlFirstInRange(zl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ sptr = ziplistNext(zl, eptr);
+ while (eptr) {
+ score = zzlGetScore(sptr);
+
+ /* If we fell out of range, break. */
+ if (!zslValueLteMax(score, &range))
+ break;
+
+ /* We know the element exists. ziplistGet should always succeed */
+ ziplistGet(eptr, &vstr, &vlen, &vlong);
+ member = (vstr == NULL) ? sdsfromlonglong(vlong) :
+ sdsnewlen(vstr,vlen);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,score,member)
+ == C_ERR) sdsfree(member);
+ zzlNext(zl, &eptr, &sptr);
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *ln;
+
+ if ((ln = zslFirstInRange(zsl, &range)) == NULL) {
+ /* Nothing exists starting at our min. No results. */
+ return 0;
+ }
+
+ while (ln) {
+ sds ele = ln->ele;
+ /* Abort when the node is no longer in range. */
+ if (!zslValueLteMax(ln->score, &range))
+ break;
+
+ ele = sdsdup(ele);
+ if (geoAppendIfWithinRadius(ga,lon,lat,radius,ln->score,ele)
+ == C_ERR) sdsfree(ele);
+ ln = ln->level[0].forward;
+ }
+ }
+ return ga->used - origincount;
+}
+
+/* Compute the sorted set scores min (inclusive), max (exclusive) we should
+ * query in order to retrieve all the elements inside the specified area
+ * 'hash'. The two scores are returned by reference in *min and *max. */
+void scoresOfGeoHashBox(GeoHashBits hash, GeoHashFix52Bits *min, GeoHashFix52Bits *max) {
+ /* We want to compute the sorted set scores that will include all the
+ * elements inside the specified Geohash 'hash', which has as many
+ * bits as specified by hash.step * 2.
+ *
+ * So if step is, for example, 3, and the hash value in binary
+ * is 101010, since our score is 52 bits we want every element which
+ * is in binary: 101010?????????????????????????????????????????????
+ * Where ? can be 0 or 1.
+ *
+ * To get the min score we just use the initial hash value left
+ * shifted enough to get the 52 bit value. Later we increment the
+ * 6 bit prefis (see the hash.bits++ statement), and get the new
+ * prefix: 101011, which we align again to 52 bits to get the maximum
+ * value (which is excluded from the search). So we get everything
+ * between the two following scores (represented in binary):
+ *
+ * 1010100000000000000000000000000000000000000000000000 (included)
+ * and
+ * 1010110000000000000000000000000000000000000000000000 (excluded).
+ */
+ *min = geohashAlign52Bits(hash);
+ hash.bits++;
+ *max = geohashAlign52Bits(hash);
+}
+
+/* Obtain all members between the min/max of this geohash bounding box.
+ * Populate a geoArray of GeoPoints by calling geoGetPointsInRange().
+ * Return the number of points added to the array. */
+int membersOfGeoHashBox(robj *zobj, GeoHashBits hash, geoArray *ga, double lon, double lat, double radius) {
+ GeoHashFix52Bits min, max;
+
+ scoresOfGeoHashBox(hash,&min,&max);
+ return geoGetPointsInRange(zobj, min, max, lon, lat, radius, ga);
+}
+
+/* Search all eight neighbors + self geohash box */
+int membersOfAllNeighbors(robj *zobj, GeoHashRadius n, double lon, double lat, double radius, geoArray *ga) {
+ GeoHashBits neighbors[9];
+ unsigned int i, count = 0, last_processed = 0;
+ int debugmsg = 0;
+
+ neighbors[0] = n.hash;
+ neighbors[1] = n.neighbors.north;
+ neighbors[2] = n.neighbors.south;
+ neighbors[3] = n.neighbors.east;
+ neighbors[4] = n.neighbors.west;
+ neighbors[5] = n.neighbors.north_east;
+ neighbors[6] = n.neighbors.north_west;
+ neighbors[7] = n.neighbors.south_east;
+ neighbors[8] = n.neighbors.south_west;
+
+ /* For each neighbor (*and* our own hashbox), get all the matching
+ * members and add them to the potential result list. */
+ for (i = 0; i < sizeof(neighbors) / sizeof(*neighbors); i++) {
+ if (HASHISZERO(neighbors[i])) {
+ if (debugmsg) D("neighbors[%d] is zero",i);
+ continue;
+ }
+
+ /* Debugging info. */
+ if (debugmsg) {
+ GeoHashRange long_range, lat_range;
+ geohashGetCoordRange(&long_range,&lat_range);
+ GeoHashArea myarea = {{0}};
+ geohashDecode(long_range, lat_range, neighbors[i], &myarea);
+
+ /* Dump center square. */
+ D("neighbors[%d]:\n",i);
+ D("area.longitude.min: %f\n", myarea.longitude.min);
+ D("area.longitude.max: %f\n", myarea.longitude.max);
+ D("area.latitude.min: %f\n", myarea.latitude.min);
+ D("area.latitude.max: %f\n", myarea.latitude.max);
+ D("\n");
+ }
+
+ /* When a huge Radius (in the 5000 km range or more) is used,
+ * adjacent neighbors can be the same, leading to duplicated
+ * elements. Skip every range which is the same as the one
+ * processed previously. */
+ if (last_processed &&
+ neighbors[i].bits == neighbors[last_processed].bits &&
+ neighbors[i].step == neighbors[last_processed].step)
+ {
+ if (debugmsg)
+ D("Skipping processing of %d, same as previous\n",i);
+ continue;
+ }
+ count += membersOfGeoHashBox(zobj, neighbors[i], ga, lon, lat, radius);
+ last_processed = i;
+ }
+ return count;
+}
+
+/* Sort comparators for qsort() */
+static int sort_gp_asc(const void *a, const void *b) {
+ const struct geoPoint *gpa = a, *gpb = b;
+ /* We can't do adist - bdist because they are doubles and
+ * the comparator returns an int. */
+ if (gpa->dist > gpb->dist)
+ return 1;
+ else if (gpa->dist == gpb->dist)
+ return 0;
+ else
+ return -1;
+}
+
+static int sort_gp_desc(const void *a, const void *b) {
+ return -sort_gp_asc(a, b);
+}
+
+/* ====================================================================
+ * Commands
+ * ==================================================================== */
+
+/* GEOADD key long lat name [long2 lat2 name2 ... longN latN nameN] */
+void geoaddCommand(client *c) {
+ /* Check arguments number for sanity. */
+ if ((c->argc - 2) % 3 != 0) {
+ /* Need an odd number of arguments if we got this far... */
+ addReplyError(c, "syntax error. Try GEOADD key [x1] [y1] [name1] "
+ "[x2] [y2] [name2] ... ");
+ return;
+ }
+
+ int elements = (c->argc - 2) / 3;
+ int argc = 2+elements*2; /* ZADD key score ele ... */
+ robj **argv = zcalloc(argc*sizeof(robj*));
+ argv[0] = createRawStringObject("zadd",4);
+ argv[1] = c->argv[1]; /* key */
+ incrRefCount(argv[1]);
+
+ /* Create the argument vector to call ZADD in order to add all
+ * the score,value pairs to the requested zset, where score is actually
+ * an encoded version of lat,long. */
+ int i;
+ for (i = 0; i < elements; i++) {
+ double xy[2];
+
+ if (extractLongLatOrReply(c, (c->argv+2)+(i*3),xy) == C_ERR) {
+ for (i = 0; i < argc; i++)
+ if (argv[i]) decrRefCount(argv[i]);
+ zfree(argv);
+ return;
+ }
+
+ /* Turn the coordinates into the score of the element. */
+ GeoHashBits hash;
+ geohashEncodeWGS84(xy[0], xy[1], GEO_STEP_MAX, &hash);
+ GeoHashFix52Bits bits = geohashAlign52Bits(hash);
+ robj *score = createObject(OBJ_STRING, sdsfromlonglong(bits));
+ robj *val = c->argv[2 + i * 3 + 2];
+ argv[2+i*2] = score;
+ argv[3+i*2] = val;
+ incrRefCount(val);
+ }
+
+ /* Finally call ZADD that will do the work for us. */
+ replaceClientCommandVector(c,argc,argv);
+ zaddCommand(c);
+}
+
+#define SORT_NONE 0
+#define SORT_ASC 1
+#define SORT_DESC 2
+
+#define RADIUS_COORDS (1<<0) /* Search around coordinates. */
+#define RADIUS_MEMBER (1<<1) /* Search around member. */
+#define RADIUS_NOSTORE (1<<2) /* Do not acceot STORE/STOREDIST option. */
+
+/* GEORADIUS key x y radius unit [WITHDIST] [WITHHASH] [WITHCOORD] [ASC|DESC]
+ * [COUNT count] [STORE key] [STOREDIST key]
+ * GEORADIUSBYMEMBER key member radius unit ... options ... */
+void georadiusGeneric(client *c, int flags) {
+ robj *key = c->argv[1];
+ robj *storekey = NULL;
+ int storedist = 0; /* 0 for STORE, 1 for STOREDIST. */
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, key, shared.emptymultibulk)) == NULL ||
+ checkType(c, zobj, OBJ_ZSET)) {
+ return;
+ }
+
+ /* Find long/lat to use for radius search based on inquiry type */
+ int base_args;
+ double xy[2] = { 0 };
+ if (flags & RADIUS_COORDS) {
+ base_args = 6;
+ if (extractLongLatOrReply(c, c->argv + 2, xy) == C_ERR)
+ return;
+ } else if (flags & RADIUS_MEMBER) {
+ base_args = 5;
+ robj *member = c->argv[2];
+ if (longLatFromMember(zobj, member, xy) == C_ERR) {
+ addReplyError(c, "could not decode requested zset member");
+ return;
+ }
+ } else {
+ addReplyError(c, "Unknown georadius search type");
+ return;
+ }
+
+ /* Extract radius and units from arguments */
+ double radius_meters = 0, conversion = 1;
+ if ((radius_meters = extractDistanceOrReply(c, c->argv + base_args - 2,
+ &conversion)) < 0) {
+ return;
+ }
+
+ /* Discover and populate all optional parameters. */
+ int withdist = 0, withhash = 0, withcoords = 0;
+ int sort = SORT_NONE;
+ long long count = 0;
+ if (c->argc > base_args) {
+ int remaining = c->argc - base_args;
+ for (int i = 0; i < remaining; i++) {
+ char *arg = c->argv[base_args + i]->ptr;
+ if (!strcasecmp(arg, "withdist")) {
+ withdist = 1;
+ } else if (!strcasecmp(arg, "withhash")) {
+ withhash = 1;
+ } else if (!strcasecmp(arg, "withcoord")) {
+ withcoords = 1;
+ } else if (!strcasecmp(arg, "asc")) {
+ sort = SORT_ASC;
+ } else if (!strcasecmp(arg, "desc")) {
+ sort = SORT_DESC;
+ } else if (!strcasecmp(arg, "count") && (i+1) < remaining) {
+ if (getLongLongFromObjectOrReply(c, c->argv[base_args+i+1],
+ &count, NULL) != C_OK) return;
+ if (count <= 0) {
+ addReplyError(c,"COUNT must be > 0");
+ return;
+ }
+ i++;
+ } else if (!strcasecmp(arg, "store") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 0;
+ i++;
+ } else if (!strcasecmp(arg, "storedist") &&
+ (i+1) < remaining &&
+ !(flags & RADIUS_NOSTORE))
+ {
+ storekey = c->argv[base_args+i+1];
+ storedist = 1;
+ i++;
+ } else {
+ addReply(c, shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* Trap options not compatible with STORE and STOREDIST. */
+ if (storekey && (withdist || withhash || withcoords)) {
+ addReplyError(c,
+ "STORE option in GEORADIUS is not compatible with "
+ "WITHDIST, WITHHASH and WITHCOORDS options");
+ return;
+ }
+
+ /* COUNT without ordering does not make much sense, force ASC
+ * ordering if COUNT was specified but no sorting was requested. */
+ if (count != 0 && sort == SORT_NONE) sort = SORT_ASC;
+
+ /* Get all neighbor geohash boxes for our radius search */
+ GeoHashRadius georadius =
+ geohashGetAreasByRadiusWGS84(xy[0], xy[1], radius_meters);
+
+ /* Search the zset for all matching points */
+ geoArray *ga = geoArrayCreate();
+ membersOfAllNeighbors(zobj, georadius, xy[0], xy[1], radius_meters, ga);
+
+ /* If no matching results, the user gets an empty reply. */
+ if (ga->used == 0 && storekey == NULL) {
+ addReply(c, shared.emptymultibulk);
+ geoArrayFree(ga);
+ return;
+ }
+
+ long result_length = ga->used;
+ long returned_items = (count == 0 || result_length < count) ?
+ result_length : count;
+ long option_length = 0;
+
+ /* Process [optional] requested sorting */
+ if (sort == SORT_ASC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_asc);
+ } else if (sort == SORT_DESC) {
+ qsort(ga->array, result_length, sizeof(geoPoint), sort_gp_desc);
+ }
+
+ if (storekey == NULL) {
+ /* No target key, return results to user. */
+
+ /* Our options are self-contained nested multibulk replies, so we
+ * only need to track how many of those nested replies we return. */
+ if (withdist)
+ option_length++;
+
+ if (withcoords)
+ option_length++;
+
+ if (withhash)
+ option_length++;
+
+ /* The multibulk len we send is exactly result_length. The result is
+ * either all strings of just zset members *or* a nested multi-bulk
+ * reply containing the zset member string _and_ all the additional
+ * options the user enabled for this request. */
+ addReplyMultiBulkLen(c, returned_items);
+
+ /* Finally send results back to the caller */
+ int i;
+ for (i = 0; i < returned_items; i++) {
+ geoPoint *gp = ga->array+i;
+ gp->dist /= conversion; /* Fix according to unit. */
+
+ /* If we have options in option_length, return each sub-result
+ * as a nested multi-bulk. Add 1 to account for result value
+ * itself. */
+ if (option_length)
+ addReplyMultiBulkLen(c, option_length + 1);
+
+ addReplyBulkSds(c,gp->member);
+ gp->member = NULL;
+
+ if (withdist)
+ addReplyDoubleDistance(c, gp->dist);
+
+ if (withhash)
+ addReplyLongLong(c, gp->score);
+
+ if (withcoords) {
+ addReplyMultiBulkLen(c, 2);
+ addReplyHumanLongDouble(c, gp->longitude);
+ addReplyHumanLongDouble(c, gp->latitude);
+ }
+ }
+ } else {
+ /* Target key, create a sorted set with the results. */
+ robj *zobj;
+ zset *zs;
+ int i;
+ size_t maxelelen = 0;
+
+ if (returned_items) {
+ zobj = createZsetObject();
+ zs = zobj->ptr;
+ }
+
+ for (i = 0; i < returned_items; i++) {
+ zskiplistNode *znode;
+ geoPoint *gp = ga->array+i;
+ gp->dist /= conversion; /* Fix according to unit. */
+ double score = storedist ? gp->dist : gp->score;
+ size_t elelen = sdslen(gp->member);
+
+ if (maxelelen < elelen) maxelelen = elelen;
+ znode = zslInsert(zs->zsl,score,gp->member);
+ serverAssert(dictAdd(zs->dict,gp->member,&znode->score) == DICT_OK);
+ gp->member = NULL;
+ }
+
+ if (returned_items) {
+ zsetConvertToZiplistIfNeeded(zobj,maxelelen);
+ setKey(c->db,storekey,zobj);
+ decrRefCount(zobj);
+ notifyKeyspaceEvent(NOTIFY_LIST,"georadiusstore",storekey,
+ c->db->id);
+ server.dirty += returned_items;
+ } else if (dbDelete(c->db,storekey)) {
+ signalModifiedKey(c->db,storekey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
+ server.dirty++;
+ }
+ addReplyLongLong(c, returned_items);
+ }
+ geoArrayFree(ga);
+}
+
+/* GEORADIUS wrapper function. */
+void georadiusCommand(client *c) {
+ georadiusGeneric(c, RADIUS_COORDS);
+}
+
+/* GEORADIUSBYMEMBER wrapper function. */
+void georadiusbymemberCommand(client *c) {
+ georadiusGeneric(c, RADIUS_MEMBER);
+}
+
+/* GEORADIUS_RO wrapper function. */
+void georadiusroCommand(client *c) {
+ georadiusGeneric(c, RADIUS_COORDS|RADIUS_NOSTORE);
+}
+
+/* GEORADIUSBYMEMBER_RO wrapper function. */
+void georadiusbymemberroCommand(client *c) {
+ georadiusGeneric(c, RADIUS_MEMBER|RADIUS_NOSTORE);
+}
+
+/* GEOHASH key ele1 ele2 ... eleN
+ *
+ * Returns an array with an 11 characters geohash representation of the
+ * position of the specified elements. */
+void geohashCommand(client *c) {
+ char *geoalphabet= "0123456789bcdefghjkmnpqrstuvwxyz";
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (zobj && checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Geohash elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReply(c,shared.nullbulk);
+ } else {
+ /* The internal format we use for geocoding is a bit different
+ * than the standard, since we use as initial latitude range
+ * -85,85, while the normal geohashing algorithm uses -90,90.
+ * So we have to decode our position and re-encode using the
+ * standard ranges in order to output a valid geohash string. */
+
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullbulk);
+ continue;
+ }
+
+ /* Re-encode */
+ GeoHashRange r[2];
+ GeoHashBits hash;
+ r[0].min = -180;
+ r[0].max = 180;
+ r[1].min = -90;
+ r[1].max = 90;
+ geohashEncode(&r[0],&r[1],xy[0],xy[1],26,&hash);
+
+ char buf[12];
+ int i;
+ for (i = 0; i < 11; i++) {
+ int idx = (hash.bits >> (52-((i+1)*5))) & 0x1f;
+ buf[i] = geoalphabet[idx];
+ }
+ buf[11] = '\0';
+ addReplyBulkCBuffer(c,buf,11);
+ }
+ }
+}
+
+/* GEOPOS key ele1 ele2 ... eleN
+ *
+ * Returns an array of two-items arrays representing the x,y position of each
+ * element specified in the arguments. For missing elements NULL is returned. */
+void geoposCommand(client *c) {
+ int j;
+
+ /* Look up the requested zset */
+ robj *zobj = lookupKeyRead(c->db, c->argv[1]);
+ if (zobj && checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Report elements one after the other, using a null bulk reply for
+ * missing elements. */
+ addReplyMultiBulkLen(c,c->argc-2);
+ for (j = 2; j < c->argc; j++) {
+ double score;
+ if (!zobj || zsetScore(zobj, c->argv[j]->ptr, &score) == C_ERR) {
+ addReply(c,shared.nullmultibulk);
+ } else {
+ /* Decode... */
+ double xy[2];
+ if (!decodeGeohash(score,xy)) {
+ addReply(c,shared.nullmultibulk);
+ continue;
+ }
+ addReplyMultiBulkLen(c,2);
+ addReplyHumanLongDouble(c,xy[0]);
+ addReplyHumanLongDouble(c,xy[1]);
+ }
+ }
+}
+
+/* GEODIST key ele1 ele2 [unit]
+ *
+ * Return the distance, in meters by default, otherwise accordig to "unit",
+ * between points ele1 and ele2. If one or more elements are missing NULL
+ * is returned. */
+void geodistCommand(client *c) {
+ double to_meter = 1;
+
+ /* Check if there is the unit to extract, otherwise assume meters. */
+ if (c->argc == 5) {
+ to_meter = extractUnitOrReply(c,c->argv[4]);
+ if (to_meter < 0) return;
+ } else if (c->argc > 5) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Look up the requested zset */
+ robj *zobj = NULL;
+ if ((zobj = lookupKeyReadOrReply(c, c->argv[1], shared.nullbulk))
+ == NULL || checkType(c, zobj, OBJ_ZSET)) return;
+
+ /* Get the scores. We need both otherwise NULL is returned. */
+ double score1, score2, xyxy[4];
+ if (zsetScore(zobj, c->argv[2]->ptr, &score1) == C_ERR ||
+ zsetScore(zobj, c->argv[3]->ptr, &score2) == C_ERR)
+ {
+ addReply(c,shared.nullbulk);
+ return;
+ }
+
+ /* Decode & compute the distance. */
+ if (!decodeGeohash(score1,xyxy) || !decodeGeohash(score2,xyxy+2))
+ addReply(c,shared.nullbulk);
+ else
+ addReplyDoubleDistance(c,
+ geohashGetDistance(xyxy[0],xyxy[1],xyxy[2],xyxy[3]) / to_meter);
+}
diff --git a/src/geo.h b/src/geo.h
new file mode 100644
index 000000000..79d0a6a4a
--- /dev/null
+++ b/src/geo.h
@@ -0,0 +1,22 @@
+#ifndef __GEO_H__
+#define __GEO_H__
+
+#include "server.h"
+
+/* Structures used inside geo.c in order to represent points and array of
+ * points on the earth. */
+typedef struct geoPoint {
+ double longitude;
+ double latitude;
+ double dist;
+ double score;
+ char *member;
+} geoPoint;
+
+typedef struct geoArray {
+ struct geoPoint *array;
+ size_t buckets;
+ size_t used;
+} geoArray;
+
+#endif
diff --git a/src/geohash.c b/src/geohash.c
new file mode 100644
index 000000000..b40282e76
--- /dev/null
+++ b/src/geohash.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "geohash.h"
+
+/**
+ * Hashing works like this:
+ * Divide the world into 4 buckets. Label each one as such:
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,1 | 1,1 |
+ * -----------------
+ * | | |
+ * | | |
+ * | 0,0 | 1,0 |
+ * -----------------
+ */
+
+/* Interleave lower bits of x and y, so the bits of x
+ * are in the even positions and bits from y in the odd;
+ * x and y must initially be less than 2**32 (65536).
+ * From: https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
+ */
+static inline uint64_t interleave64(uint32_t xlo, uint32_t ylo) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL};
+ static const unsigned int S[] = {1, 2, 4, 8, 16};
+
+ uint64_t x = xlo;
+ uint64_t y = ylo;
+
+ x = (x | (x << S[4])) & B[4];
+ y = (y | (y << S[4])) & B[4];
+
+ x = (x | (x << S[3])) & B[3];
+ y = (y | (y << S[3])) & B[3];
+
+ x = (x | (x << S[2])) & B[2];
+ y = (y | (y << S[2])) & B[2];
+
+ x = (x | (x << S[1])) & B[1];
+ y = (y | (y << S[1])) & B[1];
+
+ x = (x | (x << S[0])) & B[0];
+ y = (y | (y << S[0])) & B[0];
+
+ return x | (y << 1);
+}
+
+/* reverse the interleave process
+ * derived from http://stackoverflow.com/questions/4909263
+ */
+static inline uint64_t deinterleave64(uint64_t interleaved) {
+ static const uint64_t B[] = {0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+ static const unsigned int S[] = {0, 1, 2, 4, 8, 16};
+
+ uint64_t x = interleaved;
+ uint64_t y = interleaved >> 1;
+
+ x = (x | (x >> S[0])) & B[0];
+ y = (y | (y >> S[0])) & B[0];
+
+ x = (x | (x >> S[1])) & B[1];
+ y = (y | (y >> S[1])) & B[1];
+
+ x = (x | (x >> S[2])) & B[2];
+ y = (y | (y >> S[2])) & B[2];
+
+ x = (x | (x >> S[3])) & B[3];
+ y = (y | (y >> S[3])) & B[3];
+
+ x = (x | (x >> S[4])) & B[4];
+ y = (y | (y >> S[4])) & B[4];
+
+ x = (x | (x >> S[5])) & B[5];
+ y = (y | (y >> S[5])) & B[5];
+
+ return x | (y << 32);
+}
+
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range) {
+ /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+ /* We can't geocode at the north/south pole. */
+ long_range->max = GEO_LONG_MAX;
+ long_range->min = GEO_LONG_MIN;
+ lat_range->max = GEO_LAT_MAX;
+ lat_range->min = GEO_LAT_MIN;
+}
+
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ /* Check basic arguments sanity. */
+ if (hash == NULL || step > 32 || step == 0 ||
+ RANGEPISZERO(lat_range) || RANGEPISZERO(long_range)) return 0;
+
+ /* Return an error when trying to index outside the supported
+ * constraints. */
+ if (longitude > 180 || longitude < -180 ||
+ latitude > 85.05112878 || latitude < -85.05112878) return 0;
+
+ hash->bits = 0;
+ hash->step = step;
+
+ if (latitude < lat_range->min || latitude > lat_range->max ||
+ longitude < long_range->min || longitude > long_range->max) {
+ return 0;
+ }
+
+ double lat_offset =
+ (latitude - lat_range->min) / (lat_range->max - lat_range->min);
+ double long_offset =
+ (longitude - long_range->min) / (long_range->max - long_range->min);
+
+ /* convert to fixed point based on the step size */
+ lat_offset *= (1ULL << step);
+ long_offset *= (1ULL << step);
+ hash->bits = interleave64(lat_offset, long_offset);
+ return 1;
+}
+
+int geohashEncodeType(double longitude, double latitude, uint8_t step, GeoHashBits *hash) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashEncode(&r[0], &r[1], longitude, latitude, step, hash);
+}
+
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash) {
+ return geohashEncodeType(longitude, latitude, step, hash);
+}
+
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area) {
+ if (HASHISZERO(hash) || NULL == area || RANGEISZERO(lat_range) ||
+ RANGEISZERO(long_range)) {
+ return 0;
+ }
+
+ area->hash = hash;
+ uint8_t step = hash.step;
+ uint64_t hash_sep = deinterleave64(hash.bits); /* hash = [LAT][LONG] */
+
+ double lat_scale = lat_range.max - lat_range.min;
+ double long_scale = long_range.max - long_range.min;
+
+ uint32_t ilato = hash_sep; /* get lat part of deinterleaved hash */
+ uint32_t ilono = hash_sep >> 32; /* shift over to get long part of hash */
+
+ /* divide by 2**step.
+ * Then, for 0-1 coordinate, multiply times scale and add
+ to the min to get the absolute coordinate. */
+ area->latitude.min =
+ lat_range.min + (ilato * 1.0 / (1ull << step)) * lat_scale;
+ area->latitude.max =
+ lat_range.min + ((ilato + 1) * 1.0 / (1ull << step)) * lat_scale;
+ area->longitude.min =
+ long_range.min + (ilono * 1.0 / (1ull << step)) * long_scale;
+ area->longitude.max =
+ long_range.min + ((ilono + 1) * 1.0 / (1ull << step)) * long_scale;
+
+ return 1;
+}
+
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area) {
+ GeoHashRange r[2] = {{0}};
+ geohashGetCoordRange(&r[0], &r[1]);
+ return geohashDecode(r[0], r[1], hash, area);
+}
+
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) {
+ return geohashDecodeType(hash, area);
+}
+
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) {
+ if (!xy) return 0;
+ xy[0] = (area->longitude.min + area->longitude.max) / 2;
+ xy[1] = (area->latitude.min + area->latitude.max) / 2;
+ return 1;
+}
+
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy) {
+ GeoHashArea area = {{0}};
+ if (!xy || !geohashDecodeType(hash, &area))
+ return 0;
+ return geohashDecodeAreaToLongLat(&area, xy);
+}
+
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy) {
+ return geohashDecodeToLongLatType(hash, xy);
+}
+
+static void geohash_move_x(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0x5555555555555555ULL >> (64 - hash->step * 2);
+
+ if (d > 0) {
+ x = x + (zz + 1);
+ } else {
+ x = x | zz;
+ x = x - (zz + 1);
+ }
+
+ x &= (0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+static void geohash_move_y(GeoHashBits *hash, int8_t d) {
+ if (d == 0)
+ return;
+
+ uint64_t x = hash->bits & 0xaaaaaaaaaaaaaaaaULL;
+ uint64_t y = hash->bits & 0x5555555555555555ULL;
+
+ uint64_t zz = 0xaaaaaaaaaaaaaaaaULL >> (64 - hash->step * 2);
+ if (d > 0) {
+ y = y + (zz + 1);
+ } else {
+ y = y | zz;
+ y = y - (zz + 1);
+ }
+ y &= (0x5555555555555555ULL >> (64 - hash->step * 2));
+ hash->bits = (x | y);
+}
+
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors) {
+ neighbors->east = *hash;
+ neighbors->west = *hash;
+ neighbors->north = *hash;
+ neighbors->south = *hash;
+ neighbors->south_east = *hash;
+ neighbors->south_west = *hash;
+ neighbors->north_east = *hash;
+ neighbors->north_west = *hash;
+
+ geohash_move_x(&neighbors->east, 1);
+ geohash_move_y(&neighbors->east, 0);
+
+ geohash_move_x(&neighbors->west, -1);
+ geohash_move_y(&neighbors->west, 0);
+
+ geohash_move_x(&neighbors->south, 0);
+ geohash_move_y(&neighbors->south, -1);
+
+ geohash_move_x(&neighbors->north, 0);
+ geohash_move_y(&neighbors->north, 1);
+
+ geohash_move_x(&neighbors->north_west, -1);
+ geohash_move_y(&neighbors->north_west, 1);
+
+ geohash_move_x(&neighbors->north_east, 1);
+ geohash_move_y(&neighbors->north_east, 1);
+
+ geohash_move_x(&neighbors->south_east, 1);
+ geohash_move_y(&neighbors->south_east, -1);
+
+ geohash_move_x(&neighbors->south_west, -1);
+ geohash_move_y(&neighbors->south_west, -1);
+}
diff --git a/src/geohash.h b/src/geohash.h
new file mode 100644
index 000000000..ed2ef9336
--- /dev/null
+++ b/src/geohash.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_H_
+#define GEOHASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define HASHISZERO(r) (!(r).bits && !(r).step)
+#define RANGEISZERO(r) (!(r).max && !(r).min)
+#define RANGEPISZERO(r) (r == NULL || RANGEISZERO(*r))
+
+#define GEO_STEP_MAX 26 /* 26*2 = 52 bits. */
+
+/* Limits from EPSG:900913 / EPSG:3785 / OSGEO:41001 */
+#define GEO_LAT_MIN -85.05112878
+#define GEO_LAT_MAX 85.05112878
+#define GEO_LONG_MIN -180
+#define GEO_LONG_MAX 180
+
+typedef enum {
+ GEOHASH_NORTH = 0,
+ GEOHASH_EAST,
+ GEOHASH_WEST,
+ GEOHASH_SOUTH,
+ GEOHASH_SOUTH_WEST,
+ GEOHASH_SOUTH_EAST,
+ GEOHASH_NORT_WEST,
+ GEOHASH_NORT_EAST
+} GeoDirection;
+
+typedef struct {
+ uint64_t bits;
+ uint8_t step;
+} GeoHashBits;
+
+typedef struct {
+ double min;
+ double max;
+} GeoHashRange;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashRange longitude;
+ GeoHashRange latitude;
+} GeoHashArea;
+
+typedef struct {
+ GeoHashBits north;
+ GeoHashBits east;
+ GeoHashBits west;
+ GeoHashBits south;
+ GeoHashBits north_east;
+ GeoHashBits south_east;
+ GeoHashBits north_west;
+ GeoHashBits south_west;
+} GeoHashNeighbors;
+
+/*
+ * 0:success
+ * -1:failed
+ */
+void geohashGetCoordRange(GeoHashRange *long_range, GeoHashRange *lat_range);
+int geohashEncode(const GeoHashRange *long_range, const GeoHashRange *lat_range,
+ double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashEncodeType(double longitude, double latitude,
+ uint8_t step, GeoHashBits *hash);
+int geohashEncodeWGS84(double longitude, double latitude, uint8_t step,
+ GeoHashBits *hash);
+int geohashDecode(const GeoHashRange long_range, const GeoHashRange lat_range,
+ const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeType(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area);
+int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy);
+int geohashDecodeToLongLatType(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatWGS84(const GeoHashBits hash, double *xy);
+int geohashDecodeToLongLatMercator(const GeoHashBits hash, double *xy);
+void geohashNeighbors(const GeoHashBits *hash, GeoHashNeighbors *neighbors);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* GEOHASH_H_ */
diff --git a/src/geohash_helper.c b/src/geohash_helper.c
new file mode 100644
index 000000000..e23f17b4e
--- /dev/null
+++ b/src/geohash_helper.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015-2016, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a C++ to C conversion from the ardb project.
+ * This file started out as:
+ * https://github.com/yinqiwen/ardb/blob/d42503/src/geo/geohash_helper.cpp
+ */
+
+#include "fmacros.h"
+#include "geohash_helper.h"
+#include "debugmacro.h"
+#include <math.h>
+
+#define D_R (M_PI / 180.0)
+#define R_MAJOR 6378137.0
+#define R_MINOR 6356752.3142
+#define RATIO (R_MINOR / R_MAJOR)
+#define ECCENT (sqrt(1.0 - (RATIO *RATIO)))
+#define COM (0.5 * ECCENT)
+
+/// @brief The usual PI/180 constant
+const double DEG_TO_RAD = 0.017453292519943295769236907684886;
+/// @brief Earth's quatratic mean radius for WGS-84
+const double EARTH_RADIUS_IN_METERS = 6372797.560856;
+
+const double MERCATOR_MAX = 20037726.37;
+const double MERCATOR_MIN = -20037726.37;
+
+static inline double deg_rad(double ang) { return ang * D_R; }
+static inline double rad_deg(double ang) { return ang / D_R; }
+
+/* This function is used in order to estimate the step (bits precision)
+ * of the 9 search area boxes during radius queries. */
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
+ if (range_meters == 0) return 26;
+ int step = 1;
+ while (range_meters < MERCATOR_MAX) {
+ range_meters *= 2;
+ step++;
+ }
+ step -= 2; /* Make sure range is included in most of the base cases. */
+
+ /* Wider range torwards the poles... Note: it is possible to do better
+ * than this approximation by computing the distance between meridians
+ * at this latitude, but this does the trick for now. */
+ if (lat > 66 || lat < -66) {
+ step--;
+ if (lat > 80 || lat < -80) step--;
+ }
+
+ /* Frame to valid range. */
+ if (step < 1) step = 1;
+ if (step > 26) step = 26;
+ return step;
+}
+
+/* Return the bounding box of the search area centered at latitude,longitude
+ * having a radius of radius_meter. bounds[0] - bounds[2] is the minimum
+ * and maxium longitude, while bounds[1] - bounds[3] is the minimum and
+ * maximum latitude.
+ *
+ * This function does not behave correctly with very large radius values, for
+ * instance for the coordinates 81.634948934258375 30.561509253718668 and a
+ * radius of 7083 kilometers, it reports as bounding boxes:
+ *
+ * min_lon 7.680495, min_lat -33.119473, max_lon 155.589402, max_lat 94.242491
+ *
+ * However, for instance, a min_lon of 7.680495 is not correct, because the
+ * point -1.27579540014266968 61.33421815228281559 is at less than 7000
+ * kilometers away.
+ *
+ * Since this function is currently only used as an optimization, the
+ * optimization is not used for very big radiuses, however the function
+ * should be fixed. */
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds) {
+ if (!bounds) return 0;
+
+ bounds[0] = longitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude)));
+ bounds[2] = longitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude)));
+ bounds[1] = latitude - rad_deg(radius_meters/EARTH_RADIUS_IN_METERS);
+ bounds[3] = latitude + rad_deg(radius_meters/EARTH_RADIUS_IN_METERS);
+ return 1;
+}
+
+/* Return a set of areas (center + 8) that are able to cover a range query
+ * for the specified position and radius. */
+GeoHashRadius geohashGetAreasByRadius(double longitude, double latitude, double radius_meters) {
+ GeoHashRange long_range, lat_range;
+ GeoHashRadius radius;
+ GeoHashBits hash;
+ GeoHashNeighbors neighbors;
+ GeoHashArea area;
+ double min_lon, max_lon, min_lat, max_lat;
+ double bounds[4];
+ int steps;
+
+ geohashBoundingBox(longitude, latitude, radius_meters, bounds);
+ min_lon = bounds[0];
+ min_lat = bounds[1];
+ max_lon = bounds[2];
+ max_lat = bounds[3];
+
+ steps = geohashEstimateStepsByRadius(radius_meters,latitude);
+
+ geohashGetCoordRange(&long_range,&lat_range);
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+
+ /* Check if the step is enough at the limits of the covered area.
+ * Sometimes when the search area is near an edge of the
+ * area, the estimated step is not small enough, since one of the
+ * north / south / west / east square is too near to the search area
+ * to cover everything. */
+ int decrease_step = 0;
+ {
+ GeoHashArea north, south, east, west;
+
+ geohashDecode(long_range, lat_range, neighbors.north, &north);
+ geohashDecode(long_range, lat_range, neighbors.south, &south);
+ geohashDecode(long_range, lat_range, neighbors.east, &east);
+ geohashDecode(long_range, lat_range, neighbors.west, &west);
+
+ if (geohashGetDistance(longitude,latitude,longitude,north.latitude.max)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,longitude,south.latitude.min)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,east.longitude.max,latitude)
+ < radius_meters) decrease_step = 1;
+ if (geohashGetDistance(longitude,latitude,west.longitude.min,latitude)
+ < radius_meters) decrease_step = 1;
+ }
+
+ if (steps > 1 && decrease_step) {
+ steps--;
+ geohashEncode(&long_range,&lat_range,longitude,latitude,steps,&hash);
+ geohashNeighbors(&hash,&neighbors);
+ geohashDecode(long_range,lat_range,hash,&area);
+ }
+
+ /* Exclude the search areas that are useless. */
+ if (steps >= 2) {
+ if (area.latitude.min < min_lat) {
+ GZERO(neighbors.south);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.south_east);
+ }
+ if (area.latitude.max > max_lat) {
+ GZERO(neighbors.north);
+ GZERO(neighbors.north_east);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.min < min_lon) {
+ GZERO(neighbors.west);
+ GZERO(neighbors.south_west);
+ GZERO(neighbors.north_west);
+ }
+ if (area.longitude.max > max_lon) {
+ GZERO(neighbors.east);
+ GZERO(neighbors.south_east);
+ GZERO(neighbors.north_east);
+ }
+ }
+ radius.hash = hash;
+ radius.neighbors = neighbors;
+ radius.area = area;
+ return radius;
+}
+
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters) {
+ return geohashGetAreasByRadius(longitude, latitude, radius_meters);
+}
+
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash) {
+ uint64_t bits = hash.bits;
+ bits <<= (52 - hash.step * 2);
+ return bits;
+}
+
+/* Calculate distance using haversin great circle distance formula. */
+double geohashGetDistance(double lon1d, double lat1d, double lon2d, double lat2d) {
+ double lat1r, lon1r, lat2r, lon2r, u, v;
+ lat1r = deg_rad(lat1d);
+ lon1r = deg_rad(lon1d);
+ lat2r = deg_rad(lat2d);
+ lon2r = deg_rad(lon2d);
+ u = sin((lat2r - lat1r) / 2);
+ v = sin((lon2r - lon1r) / 2);
+ return 2.0 * EARTH_RADIUS_IN_METERS *
+ asin(sqrt(u * u + cos(lat1r) * cos(lat2r) * v * v));
+}
+
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance) {
+ *distance = geohashGetDistance(x1, y1, x2, y2);
+ if (*distance > radius) return 0;
+ return 1;
+}
+
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance) {
+ return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);
+}
diff --git a/src/geohash_helper.h b/src/geohash_helper.h
new file mode 100644
index 000000000..eb0dda38a
--- /dev/null
+++ b/src/geohash_helper.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2013-2014, yinqiwen <yinqiwen@gmail.com>
+ * Copyright (c) 2014, Matt Stancliff <matt@genges.com>.
+ * Copyright (c) 2015, Salvatore Sanfilippo <antirez@gmail.com>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GEOHASH_HELPER_HPP_
+#define GEOHASH_HELPER_HPP_
+
+#include "geohash.h"
+
+#define GZERO(s) s.bits = s.step = 0;
+#define GISZERO(s) (!s.bits && !s.step)
+#define GISNOTZERO(s) (s.bits || s.step)
+
+typedef uint64_t GeoHashFix52Bits;
+typedef uint64_t GeoHashVarBits;
+
+typedef struct {
+ GeoHashBits hash;
+ GeoHashArea area;
+ GeoHashNeighbors neighbors;
+} GeoHashRadius;
+
+int GeoHashBitsComparator(const GeoHashBits *a, const GeoHashBits *b);
+uint8_t geohashEstimateStepsByRadius(double range_meters, double lat);
+int geohashBoundingBox(double longitude, double latitude, double radius_meters,
+ double *bounds);
+GeoHashRadius geohashGetAreasByRadius(double longitude,
+ double latitude, double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusWGS84(double longitude, double latitude,
+ double radius_meters);
+GeoHashRadius geohashGetAreasByRadiusMercator(double longitude, double latitude,
+ double radius_meters);
+GeoHashFix52Bits geohashAlign52Bits(const GeoHashBits hash);
+double geohashGetDistance(double lon1d, double lat1d,
+ double lon2d, double lat2d);
+int geohashGetDistanceIfInRadius(double x1, double y1,
+ double x2, double y2, double radius,
+ double *distance);
+int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
+ double y2, double radius,
+ double *distance);
+
+#endif /* GEOHASH_HELPER_HPP_ */
diff --git a/src/help.h b/src/help.h
index 9f4c979df..c89f1f44b 100644
--- a/src/help.h
+++ b/src/help.h
@@ -1,4 +1,4 @@
-/* Automatically generated by utils/generate-command-help.rb, do not edit. */
+/* Automatically generated by generate-command-help.rb, do not edit. */
#ifndef __REDIS_HELP_H
#define __REDIS_HELP_H
@@ -15,7 +15,10 @@ static char *commandGroups[] = {
"connection",
"server",
"scripting",
- "hyperloglog"
+ "hyperloglog",
+ "cluster",
+ "geo",
+ "stream"
};
struct commandHelp {
@@ -46,10 +49,15 @@ struct commandHelp {
9,
"1.0.0" },
{ "BITCOUNT",
- "key [start] [end]",
+ "key [start end]",
"Count set bits in a string",
1,
"2.6.0" },
+ { "BITFIELD",
+ "key [GET type offset] [SET type offset value] [INCRBY type offset increment] [OVERFLOW WRAP|SAT|FAIL]",
+ "Perform arbitrary bitfield integer operations on strings",
+ 1,
+ "3.2.0" },
{ "BITOP",
"operation destkey key [key ...]",
"Perform bitwise operations between strings",
@@ -75,13 +83,23 @@ struct commandHelp {
"Pop a value from a list, push it to another list and return it; or block until one is available",
2,
"2.2.0" },
+ { "BZPOPMAX",
+ "key [key ...] timeout",
+ "Remove and return the member with the highest score from one or more sorted sets, or block until one is available",
+ 4,
+ "5.0.0" },
+ { "BZPOPMIN",
+ "key [key ...] timeout",
+ "Remove and return the member with the lowest score from one or more sorted sets, or block until one is available",
+ 4,
+ "5.0.0" },
{ "CLIENT GETNAME",
"-",
"Get the current connection name",
9,
"2.6.9" },
{ "CLIENT KILL",
- "ip:port",
+ "[ip:port] [ID client-id] [TYPE normal|master|slave|pubsub] [ADDR ip:port] [SKIPME yes/no]",
"Kill the connection of a client",
9,
"2.4.0" },
@@ -95,11 +113,126 @@ struct commandHelp {
"Stop processing commands from clients for some time",
9,
"2.9.50" },
+ { "CLIENT REPLY",
+ "ON|OFF|SKIP",
+ "Instruct the server whether to reply to commands",
+ 9,
+ "3.2" },
{ "CLIENT SETNAME",
"connection-name",
"Set the current connection name",
9,
"2.6.9" },
+ { "CLUSTER ADDSLOTS",
+ "slot [slot ...]",
+ "Assign new hash slots to receiving node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER COUNT-FAILURE-REPORTS",
+ "node-id",
+ "Return the number of failure reports active for a given node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER COUNTKEYSINSLOT",
+ "slot",
+ "Return the number of local keys in the specified hash slot",
+ 12,
+ "3.0.0" },
+ { "CLUSTER DELSLOTS",
+ "slot [slot ...]",
+ "Set hash slots as unbound in receiving node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER FAILOVER",
+ "[FORCE|TAKEOVER]",
+ "Forces a slave to perform a manual failover of its master.",
+ 12,
+ "3.0.0" },
+ { "CLUSTER FORGET",
+ "node-id",
+ "Remove a node from the nodes table",
+ 12,
+ "3.0.0" },
+ { "CLUSTER GETKEYSINSLOT",
+ "slot count",
+ "Return local key names in the specified hash slot",
+ 12,
+ "3.0.0" },
+ { "CLUSTER INFO",
+ "-",
+ "Provides info about Redis Cluster node state",
+ 12,
+ "3.0.0" },
+ { "CLUSTER KEYSLOT",
+ "key",
+ "Returns the hash slot of the specified key",
+ 12,
+ "3.0.0" },
+ { "CLUSTER MEET",
+ "ip port",
+ "Force a node cluster to handshake with another node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER NODES",
+ "-",
+ "Get Cluster config for the node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER REPLICATE",
+ "node-id",
+ "Reconfigure a node as a slave of the specified master node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER RESET",
+ "[HARD|SOFT]",
+ "Reset a Redis Cluster node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SAVECONFIG",
+ "-",
+ "Forces the node to save cluster state on disk",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SET-CONFIG-EPOCH",
+ "config-epoch",
+ "Set the configuration epoch in a new node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SETSLOT",
+ "slot IMPORTING|MIGRATING|STABLE|NODE [node-id]",
+ "Bind a hash slot to a specific node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SLAVES",
+ "node-id",
+ "List slave nodes of the specified master node",
+ 12,
+ "3.0.0" },
+ { "CLUSTER SLOTS",
+ "-",
+ "Get array of Cluster slot to node mappings",
+ 12,
+ "3.0.0" },
+ { "COMMAND",
+ "-",
+ "Get array of Redis command details",
+ 9,
+ "2.8.13" },
+ { "COMMAND COUNT",
+ "-",
+ "Get total number of Redis commands",
+ 9,
+ "2.8.13" },
+ { "COMMAND GETKEYS",
+ "-",
+ "Extract keys given a full Redis command",
+ 9,
+ "2.8.13" },
+ { "COMMAND INFO",
+ "command-name [command-name ...]",
+ "Get array of specific Redis command details",
+ 9,
+ "2.8.13" },
{ "CONFIG GET",
"parameter",
"Get the value of a configuration parameter",
@@ -181,7 +314,7 @@ struct commandHelp {
7,
"1.2.0" },
{ "EXISTS",
- "key",
+ "key [key ...]",
"Determine if a key exists",
0,
"1.0.0" },
@@ -196,15 +329,45 @@ struct commandHelp {
0,
"1.2.0" },
{ "FLUSHALL",
- "-",
+ "[ASYNC]",
"Remove all keys from all databases",
9,
"1.0.0" },
{ "FLUSHDB",
- "-",
+ "[ASYNC]",
"Remove all keys from the current database",
9,
"1.0.0" },
+ { "GEOADD",
+ "key longitude latitude member [longitude latitude member ...]",
+ "Add one or more geospatial items in the geospatial index represented using a sorted set",
+ 13,
+ "3.2.0" },
+ { "GEODIST",
+ "key member1 member2 [unit]",
+ "Returns the distance between two members of a geospatial index",
+ 13,
+ "3.2.0" },
+ { "GEOHASH",
+ "key member [member ...]",
+ "Returns members of a geospatial index as standard geohash strings",
+ 13,
+ "3.2.0" },
+ { "GEOPOS",
+ "key member [member ...]",
+ "Returns longitude and latitude of members of a geospatial index",
+ 13,
+ "3.2.0" },
+ { "GEORADIUS",
+ "key longitude latitude radius m|km|ft|mi [WITHCOORD] [WITHDIST] [WITHHASH] [COUNT count] [ASC|DESC] [STORE key] [STOREDIST key]",
+ "Query a sorted set representing a geospatial index to fetch members matching a given maximum distance from a point",
+ 13,
+ "3.2.0" },
+ { "GEORADIUSBYMEMBER",
+ "key member radius m|km|ft|mi [WITHCOORD] [WITHDIST] [WITHHASH] [COUNT count] [ASC|DESC] [STORE key] [STOREDIST key]",
+ "Query a sorted set representing a geospatial index to fetch members matching a given maximum distance from a member",
+ 13,
+ "3.2.0" },
{ "GET",
"key",
"Get the value of a key",
@@ -290,6 +453,11 @@ struct commandHelp {
"Set the value of a hash field, only if the field does not exist",
5,
"2.0.0" },
+ { "HSTRLEN",
+ "key field",
+ "Get the length of the value of a hash field",
+ 5,
+ "3.2.0" },
{ "HVALS",
"key",
"Get all the values in a hash",
@@ -375,13 +543,43 @@ struct commandHelp {
"Trim a list to the specified range",
2,
"1.0.0" },
+ { "MEMORY DOCTOR",
+ "-",
+ "Outputs memory problems report",
+ 9,
+ "4.0.0" },
+ { "MEMORY HELP",
+ "-",
+ "Show helpful text about the different subcommands",
+ 9,
+ "4.0.0" },
+ { "MEMORY MALLOC-STATS",
+ "-",
+ "Show allocator internal stats",
+ 9,
+ "4.0.0" },
+ { "MEMORY PURGE",
+ "-",
+ "Ask the allocator to release memory",
+ 9,
+ "4.0.0" },
+ { "MEMORY STATS",
+ "-",
+ "Show memory usage details",
+ 9,
+ "4.0.0" },
+ { "MEMORY USAGE",
+ "key [SAMPLES count]",
+ "Estimate the memory usage of a key",
+ 9,
+ "4.0.0" },
{ "MGET",
"key [key ...]",
"Get the values of all the given keys",
1,
"1.0.0" },
{ "MIGRATE",
- "host port key destination-db timeout [COPY] [REPLACE]",
+ "host port key|"" destination-db timeout [COPY] [REPLACE] [KEYS key]",
"Atomically transfer a key from a Redis instance to another one.",
0,
"2.6.0" },
@@ -446,7 +644,7 @@ struct commandHelp {
11,
"2.8.9" },
{ "PING",
- "-",
+ "[message]",
"Ping the server",
8,
"1.0.0" },
@@ -490,6 +688,16 @@ struct commandHelp {
"Return a random key from the keyspace",
0,
"1.0.0" },
+ { "READONLY",
+ "-",
+ "Enables read queries for a connection to a cluster slave node",
+ 12,
+ "3.0.0" },
+ { "READWRITE",
+ "-",
+ "Disables read queries for a connection to a cluster slave node",
+ 12,
+ "3.0.0" },
{ "RENAME",
"key newkey",
"Rename a key",
@@ -501,10 +709,15 @@ struct commandHelp {
0,
"1.0.0" },
{ "RESTORE",
- "key ttl serialized-value",
+ "key ttl serialized-value [REPLACE]",
"Create a key using the provided serialized value, previously obtained using DUMP.",
0,
"2.6.0" },
+ { "ROLE",
+ "-",
+ "Return the role of the instance in the context of replication",
+ 9,
+ "2.8.12" },
{ "RPOP",
"key",
"Remove and get the last element in a list",
@@ -512,7 +725,7 @@ struct commandHelp {
"1.0.0" },
{ "RPOPLPUSH",
"source destination",
- "Remove the last element in a list, append it to another list and return it",
+ "Remove the last element in a list, prepend it to another list and return it",
2,
"1.2.0" },
{ "RPUSH",
@@ -545,8 +758,13 @@ struct commandHelp {
"Get the number of members in a set",
3,
"1.0.0" },
+ { "SCRIPT DEBUG",
+ "YES|SYNC|NO",
+ "Set the debug mode for executed scripts.",
+ 10,
+ "3.2.0" },
{ "SCRIPT EXISTS",
- "script [script ...]",
+ "sha1 [sha1 ...]",
"Check existence of scripts in the script cache.",
10,
"2.6.0" },
@@ -581,7 +799,7 @@ struct commandHelp {
8,
"1.0.0" },
{ "SET",
- "key value [EX seconds] [PX milliseconds] [NX|XX]",
+ "key value [expiration EX seconds|PX milliseconds] [NX|XX]",
"Set the string value of a key",
1,
"1.0.0" },
@@ -606,7 +824,7 @@ struct commandHelp {
1,
"2.2.0" },
{ "SHUTDOWN",
- "[NOSAVE] [SAVE]",
+ "[NOSAVE|SAVE]",
"Synchronously save the dataset to disk and then shut down the server",
9,
"1.0.0" },
@@ -690,6 +908,11 @@ struct commandHelp {
"Add multiple sets and store the resulting set in a key",
3,
"1.0.0" },
+ { "SWAPDB",
+ "index index",
+ "Swaps two Redis databases",
+ 8,
+ "4.0.0" },
{ "SYNC",
"-",
"Internal command used for replication",
@@ -700,6 +923,11 @@ struct commandHelp {
"Return the current server time",
9,
"2.6.0" },
+ { "TOUCH",
+ "key [key ...]",
+ "Alters the last access time of a key(s). Returns the number of existing keys specified.",
+ 0,
+ "3.2.1" },
{ "TTL",
"key",
"Get the time to live for a key",
@@ -710,6 +938,11 @@ struct commandHelp {
"Determine the type stored at key",
0,
"1.0.0" },
+ { "UNLINK",
+ "key [key ...]",
+ "Delete a key asynchronously in another thread. Otherwise it is just as DEL, but non blocking.",
+ 0,
+ "4.0.0" },
{ "UNSUBSCRIBE",
"[channel [channel ...]]",
"Stop listening for messages posted to the given channels",
@@ -720,13 +953,53 @@ struct commandHelp {
"Forget about all watched keys",
7,
"2.2.0" },
+ { "WAIT",
+ "numslaves timeout",
+ "Wait for the synchronous replication of all the write commands sent in the context of the current connection",
+ 0,
+ "3.0.0" },
{ "WATCH",
"key [key ...]",
"Watch the given keys to determine execution of the MULTI/EXEC block",
7,
"2.2.0" },
+ { "XADD",
+ "key ID field string [field string ...]",
+ "Appends a new entry to a stream",
+ 14,
+ "5.0.0" },
+ { "XLEN",
+ "key",
+ "Return the number of entires in a stream",
+ 14,
+ "5.0.0" },
+ { "XPENDING",
+ "key group [start end count] [consumer]",
+ "Return information and entries from a stream consumer group pending entries list, that are messages fetched but never acknowledged.",
+ 14,
+ "5.0.0" },
+ { "XRANGE",
+ "key start end [COUNT count]",
+ "Return a range of elements in a stream, with IDs matching the specified IDs interval",
+ 14,
+ "5.0.0" },
+ { "XREAD",
+ "[COUNT count] [BLOCK milliseconds] STREAMS key [key ...] ID [ID ...]",
+ "Return never seen elements in multiple streams, with IDs greater than the ones reported by the caller for each stream. Can block.",
+ 14,
+ "5.0.0" },
+ { "XREADGROUP",
+ "GROUP group consumer [COUNT count] [BLOCK milliseconds] STREAMS key [key ...] ID [ID ...]",
+ "Return new entries from a stream using a consumer group, or access the history of the pending entries for a given consumer. Can block.",
+ 14,
+ "5.0.0" },
+ { "XREVRANGE",
+ "key end start [COUNT count]",
+ "Return a range of elements in a stream, with IDs matching the specified IDs interval, in reverse order (from greater to smaller IDs) compared to XRANGE",
+ 14,
+ "5.0.0" },
{ "ZADD",
- "key score member [score member ...]",
+ "key [NX|XX] [CH] [INCR] score member [score member ...]",
"Add one or more members to a sorted set, or update its score if it already exists",
4,
"1.2.0" },
@@ -755,6 +1028,16 @@ struct commandHelp {
"Count the number of members in a sorted set between a given lexicographical range",
4,
"2.8.9" },
+ { "ZPOPMAX",
+ "key [count]",
+ "Remove and return members with the highest scores in a sorted set",
+ 4,
+ "5.0.0" },
+ { "ZPOPMIN",
+ "key [count]",
+ "Remove and return members with the lowest scores in a sorted set",
+ 4,
+ "5.0.0" },
{ "ZRANGE",
"key start stop [WITHSCORES]",
"Return a range of members in a sorted set, by index",
@@ -800,6 +1083,11 @@ struct commandHelp {
"Return a range of members in a sorted set, by index, with scores ordered from high to low",
4,
"1.2.0" },
+ { "ZREVRANGEBYLEX",
+ "key max min [LIMIT offset count]",
+ "Return a range of members in a sorted set, by lexicographical range, ordered from higher to lower strings.",
+ 4,
+ "2.8.9" },
{ "ZREVRANGEBYSCORE",
"key max min [WITHSCORES] [LIMIT offset count]",
"Return a range of members in a sorted set, by score, with scores ordered from high to low",
diff --git a/src/hyperloglog.c b/src/hyperloglog.c
index b3542f997..ba3a3ab60 100644
--- a/src/hyperloglog.c
+++ b/src/hyperloglog.c
@@ -29,7 +29,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <stdint.h>
#include <math.h>
@@ -192,6 +192,8 @@ struct hllhdr {
#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)
#define HLL_P 14 /* The greater is P, the smaller the error. */
+#define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for
+ determining the number of leading zeros. */
#define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
#define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
#define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
@@ -384,6 +386,7 @@ static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";
*(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
*((p)+1) = (_l&0xff); \
} while(0)
+#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */
/* ========================= HyperLogLog algorithm ========================= */
@@ -401,7 +404,11 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
uint64_t k;
#if (BYTE_ORDER == LITTLE_ENDIAN)
+ #ifdef USE_ALIGNED_ACCESS
+ memcpy(&k,data,sizeof(uint64_t));
+ #else
k = *((uint64_t*)data);
+ #endif
#else
k = (uint64_t) data[0];
k |= (uint64_t) data[1] << 8;
@@ -422,14 +429,14 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
}
switch(len & 7) {
- case 7: h ^= (uint64_t)data[6] << 48;
- case 6: h ^= (uint64_t)data[5] << 40;
- case 5: h ^= (uint64_t)data[4] << 32;
- case 4: h ^= (uint64_t)data[3] << 24;
- case 3: h ^= (uint64_t)data[2] << 16;
- case 2: h ^= (uint64_t)data[1] << 8;
+ case 7: h ^= (uint64_t)data[6] << 48; /* fall-thru */
+ case 6: h ^= (uint64_t)data[5] << 40; /* fall-thru */
+ case 5: h ^= (uint64_t)data[4] << 32; /* fall-thru */
+ case 4: h ^= (uint64_t)data[3] << 24; /* fall-thru */
+ case 3: h ^= (uint64_t)data[2] << 16; /* fall-thru */
+ case 2: h ^= (uint64_t)data[1] << 8; /* fall-thru */
case 1: h ^= (uint64_t)data[0];
- h *= m;
+ h *= m; /* fall-thru */
};
h ^= h >> r;
@@ -447,7 +454,7 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
/* Count the number of zeroes starting from bit HLL_REGISTERS
* (that is a power of two corresponding to the first bit we don't use
- * as index). The max run can be 64-P+1 bits.
+ * as index). The max run can be 64-P+1 = Q+1 bits.
*
* Note that the final "1" ending the sequence of zeroes must be
* included in the count, so if we find "001" the count is 3, and
@@ -458,8 +465,10 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
* there are high probabilities to find a 1 after a few iterations. */
hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
index = hash & HLL_P_MASK; /* Register index. */
- hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */
- bit = HLL_REGISTERS; /* First bit not used to address the register. */
+ hash >>= HLL_P; /* Remove bits used to address the register. */
+ hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
+ and count will be <= Q+1. */
+ bit = 1;
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
while((hash & bit) == 0) {
count++;
@@ -471,9 +480,8 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
/* ================== Dense representation implementation ================== */
-/* "Add" the element in the dense hyperloglog data structure.
- * Actually nothing is added, but the max 0 pattern counter of the subset
- * the element belongs to is incremented if needed.
+/* Low level function to set the dense HLL register at 'index' to the
+ * specified value if the current value is smaller than 'count'.
*
* 'registers' is expected to have room for HLL_REGISTERS plus an
* additional byte on the right. This requirement is met by sds strings
@@ -482,12 +490,9 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
* The function always succeed, however if as a result of the operation
* the approximated cardinality changed, 1 is returned. Otherwise 0
* is returned. */
-int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
- uint8_t oldcount, count;
- long index;
+int hllDenseSet(uint8_t *registers, long index, uint8_t count) {
+ uint8_t oldcount;
- /* Update the register if this element produced a longer run of zeroes. */
- count = hllPatLen(ele,elesize,&index);
HLL_DENSE_GET_REGISTER(oldcount,registers,index);
if (count > oldcount) {
HLL_DENSE_SET_REGISTER(registers,index,count);
@@ -497,13 +502,22 @@ int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
}
}
-/* Compute SUM(2^-reg) in the dense representation.
- * PE is an array with a pre-computer table of values 2^-reg indexed by reg.
- * As a side effect the integer pointed by 'ezp' is set to the number
- * of zero registers. */
-double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
- double E = 0;
- int j, ez = 0;
+/* "Add" the element in the dense hyperloglog data structure.
+ * Actually nothing is added, but the max 0 pattern counter of the subset
+ * the element belongs to is incremented if needed.
+ *
+ * This is just a wrapper to hllDenseSet(), performing the hashing of the
+ * element in order to retrieve the index and zero-run count. */
+int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
+ long index;
+ uint8_t count = hllPatLen(ele,elesize,&index);
+ /* Update the register if this element produced a longer run of zeroes. */
+ return hllDenseSet(registers,index,count);
+}
+
+/* Compute the register histogram in the dense representation. */
+void hllDenseRegHisto(uint8_t *registers, int* reghisto) {
+ int j;
/* Redis default is to use 16384 registers 6 bits each. The code works
* with other values by modifying the defines, but for our target value
@@ -514,47 +528,49 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
r10, r11, r12, r13, r14, r15;
for (j = 0; j < 1024; j++) {
/* Handle 16 registers per iteration. */
- r0 = r[0] & 63; if (r0 == 0) ez++;
- r1 = (r[0] >> 6 | r[1] << 2) & 63; if (r1 == 0) ez++;
- r2 = (r[1] >> 4 | r[2] << 4) & 63; if (r2 == 0) ez++;
- r3 = (r[2] >> 2) & 63; if (r3 == 0) ez++;
- r4 = r[3] & 63; if (r4 == 0) ez++;
- r5 = (r[3] >> 6 | r[4] << 2) & 63; if (r5 == 0) ez++;
- r6 = (r[4] >> 4 | r[5] << 4) & 63; if (r6 == 0) ez++;
- r7 = (r[5] >> 2) & 63; if (r7 == 0) ez++;
- r8 = r[6] & 63; if (r8 == 0) ez++;
- r9 = (r[6] >> 6 | r[7] << 2) & 63; if (r9 == 0) ez++;
- r10 = (r[7] >> 4 | r[8] << 4) & 63; if (r10 == 0) ez++;
- r11 = (r[8] >> 2) & 63; if (r11 == 0) ez++;
- r12 = r[9] & 63; if (r12 == 0) ez++;
- r13 = (r[9] >> 6 | r[10] << 2) & 63; if (r13 == 0) ez++;
- r14 = (r[10] >> 4 | r[11] << 4) & 63; if (r14 == 0) ez++;
- r15 = (r[11] >> 2) & 63; if (r15 == 0) ez++;
-
- /* Additional parens will allow the compiler to optimize the
- * code more with a loss of precision that is not very relevant
- * here (floating point math is not commutative!). */
- E += (PE[r0] + PE[r1]) + (PE[r2] + PE[r3]) + (PE[r4] + PE[r5]) +
- (PE[r6] + PE[r7]) + (PE[r8] + PE[r9]) + (PE[r10] + PE[r11]) +
- (PE[r12] + PE[r13]) + (PE[r14] + PE[r15]);
+ r0 = r[0] & 63;
+ r1 = (r[0] >> 6 | r[1] << 2) & 63;
+ r2 = (r[1] >> 4 | r[2] << 4) & 63;
+ r3 = (r[2] >> 2) & 63;
+ r4 = r[3] & 63;
+ r5 = (r[3] >> 6 | r[4] << 2) & 63;
+ r6 = (r[4] >> 4 | r[5] << 4) & 63;
+ r7 = (r[5] >> 2) & 63;
+ r8 = r[6] & 63;
+ r9 = (r[6] >> 6 | r[7] << 2) & 63;
+ r10 = (r[7] >> 4 | r[8] << 4) & 63;
+ r11 = (r[8] >> 2) & 63;
+ r12 = r[9] & 63;
+ r13 = (r[9] >> 6 | r[10] << 2) & 63;
+ r14 = (r[10] >> 4 | r[11] << 4) & 63;
+ r15 = (r[11] >> 2) & 63;
+
+ reghisto[r0]++;
+ reghisto[r1]++;
+ reghisto[r2]++;
+ reghisto[r3]++;
+ reghisto[r4]++;
+ reghisto[r5]++;
+ reghisto[r6]++;
+ reghisto[r7]++;
+ reghisto[r8]++;
+ reghisto[r9]++;
+ reghisto[r10]++;
+ reghisto[r11]++;
+ reghisto[r12]++;
+ reghisto[r13]++;
+ reghisto[r14]++;
+ reghisto[r15]++;
+
r += 12;
}
} else {
- for (j = 0; j < HLL_REGISTERS; j++) {
+ for(j = 0; j < HLL_REGISTERS; j++) {
unsigned long reg;
-
HLL_DENSE_GET_REGISTER(reg,registers,j);
- if (reg == 0) {
- ez++;
- /* Increment E at the end of the loop. */
- } else {
- E += PE[reg]; /* Precomputed 2^(-reg[j]). */
- }
+ reghisto[reg]++;
}
- E += ez; /* Add 2^0 'ez' times. */
}
- *ezp = ez;
- return E;
}
/* ================== Sparse representation implementation ================= */
@@ -563,8 +579,8 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
* representation. Both representations are represented by SDS strings, and
* the input representation is freed as a side effect.
*
- * The function returns REDIS_OK if the sparse representation was valid,
- * otherwise REDIS_ERR is returned if the representation was corrupted. */
+ * The function returns C_OK if the sparse representation was valid,
+ * otherwise C_ERR is returned if the representation was corrupted. */
int hllSparseToDense(robj *o) {
sds sparse = o->ptr, dense;
struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
@@ -573,7 +589,7 @@ int hllSparseToDense(robj *o) {
/* If the representation is already the right one return ASAP. */
hdr = (struct hllhdr*) sparse;
- if (hdr->encoding == HLL_DENSE) return REDIS_OK;
+ if (hdr->encoding == HLL_DENSE) return C_OK;
/* Create a string of the right size filled with zero bytes.
* Note that the cached cardinality is set to 0 as a side effect
@@ -610,18 +626,17 @@ int hllSparseToDense(robj *o) {
* set to HLL_REGISTERS. */
if (idx != HLL_REGISTERS) {
sdsfree(dense);
- return REDIS_ERR;
+ return C_ERR;
}
/* Free the old representation and set the new one. */
sdsfree(o->ptr);
o->ptr = dense;
- return REDIS_OK;
+ return C_OK;
}
-/* "Add" the element in the sparse hyperloglog data structure.
- * Actually nothing is added, but the max 0 pattern counter of the subset
- * the element belongs to is incremented if needed.
+/* Low level function to set the sparse HLL register at 'index' to the
+ * specified value if the current value is smaller than 'count'.
*
* The object 'o' is the String object holding the HLL. The function requires
* a reference to the object in order to be able to enlarge the string if
@@ -635,15 +650,12 @@ int hllSparseToDense(robj *o) {
* sparse to dense: this happens when a register requires to be set to a value
* not representable with the sparse representation, or when the resulting
* size would be greater than server.hll_sparse_max_bytes. */
-int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
+int hllSparseSet(robj *o, long index, uint8_t count) {
struct hllhdr *hdr;
- uint8_t oldcount, count, *sparse, *end, *p, *prev, *next;
- long index, first, span;
+ uint8_t oldcount, *sparse, *end, *p, *prev, *next;
+ long first, span;
long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0;
- /* Update the register if this element produced a longer run of zeroes. */
- count = hllPatLen(ele,elesize,&index);
-
/* If the count is too big to be representable by the sparse representation
* switch to dense representation. */
if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;
@@ -661,7 +673,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
end = p + sdslen(o->ptr) - HLL_HDR_SIZE;
first = 0;
- prev = NULL; /* Points to previos opcode at the end of the loop. */
+ prev = NULL; /* Points to previous opcode at the end of the loop. */
next = NULL; /* Points to the next opcode at the end of the loop. */
span = 0;
while(p < end) {
@@ -752,7 +764,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
* and is either currently represented by a VAL opcode with len > 1,
* by a ZERO opcode with len > 1, or by an XZERO opcode.
*
- * In those cases the original opcode must be split into muliple
+ * In those cases the original opcode must be split into multiple
* opcodes. The worst case is an XZERO split in the middle resuling into
* XZERO - VAL - XZERO, so the resulting sequence max length is
* 5 bytes.
@@ -866,7 +878,7 @@ updated:
return 1;
promote: /* Promote to dense representation. */
- if (hllSparseToDense(o) == REDIS_ERR) return -1; /* Corrupted HLL. */
+ if (hllSparseToDense(o) == C_ERR) return -1; /* Corrupted HLL. */
hdr = o->ptr;
/* We need to call hllDenseAdd() to perform the operation after the
@@ -875,82 +887,115 @@ promote: /* Promote to dense representation. */
*
* Note that this in turn means that PFADD will make sure the command
* is propagated to slaves / AOF, so if there is a sparse -> dense
- * convertion, it will be performed in all the slaves as well. */
- int dense_retval = hllDenseAdd(hdr->registers, ele, elesize);
- redisAssert(dense_retval == 1);
+ * conversion, it will be performed in all the slaves as well. */
+ int dense_retval = hllDenseSet(hdr->registers,index,count);
+ serverAssert(dense_retval == 1);
return dense_retval;
}
-/* Compute SUM(2^-reg) in the sparse representation.
- * PE is an array with a pre-computer table of values 2^-reg indexed by reg.
- * As a side effect the integer pointed by 'ezp' is set to the number
- * of zero registers. */
-double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *invalid) {
- double E = 0;
- int ez = 0, idx = 0, runlen, regval;
+/* "Add" the element in the sparse hyperloglog data structure.
+ * Actually nothing is added, but the max 0 pattern counter of the subset
+ * the element belongs to is incremented if needed.
+ *
+ * This function is actually a wrapper for hllSparseSet(), it only performs
+ * the hashshing of the elmenet to obtain the index and zeros run length. */
+int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
+ long index;
+ uint8_t count = hllPatLen(ele,elesize,&index);
+ /* Update the register if this element produced a longer run of zeroes. */
+ return hllSparseSet(o,index,count);
+}
+
+/* Compute the register histogram in the sparse representation. */
+void hllSparseRegHisto(uint8_t *sparse, int sparselen, int *invalid, int* reghisto) {
+ int idx = 0, runlen, regval;
uint8_t *end = sparse+sparselen, *p = sparse;
while(p < end) {
if (HLL_SPARSE_IS_ZERO(p)) {
runlen = HLL_SPARSE_ZERO_LEN(p);
idx += runlen;
- ez += runlen;
- /* Increment E at the end of the loop. */
+ reghisto[0] += runlen;
p++;
} else if (HLL_SPARSE_IS_XZERO(p)) {
runlen = HLL_SPARSE_XZERO_LEN(p);
idx += runlen;
- ez += runlen;
- /* Increment E at the end of the loop. */
+ reghisto[0] += runlen;
p += 2;
} else {
runlen = HLL_SPARSE_VAL_LEN(p);
regval = HLL_SPARSE_VAL_VALUE(p);
idx += runlen;
- E += PE[regval]*runlen;
+ reghisto[regval] += runlen;
p++;
}
}
if (idx != HLL_REGISTERS && invalid) *invalid = 1;
- E += ez; /* Add 2^0 'ez' times. */
- *ezp = ez;
- return E;
}
/* ========================= HyperLogLog Count ==============================
* This is the core of the algorithm where the approximated count is computed.
- * The function uses the lower level hllDenseSum() and hllSparseSum() functions
- * as helpers to compute the SUM(2^-reg) part of the computation, which is
- * representation-specific, while all the rest is common. */
-
-/* Implements the SUM operation for uint8_t data type which is only used
- * internally as speedup for PFCOUNT with multiple keys. */
-double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
- double E = 0;
- int j, ez = 0;
+ * The function uses the lower level hllDenseRegHisto() and hllSparseRegHisto()
+ * functions as helpers to compute histogram of register values part of the
+ * computation, which is representation-specific, while all the rest is common. */
+
+/* Implements the register histogram calculation for uint8_t data type
+ * which is only used internally as speedup for PFCOUNT with multiple keys. */
+void hllRawRegHisto(uint8_t *registers, int* reghisto) {
uint64_t *word = (uint64_t*) registers;
uint8_t *bytes;
+ int j;
for (j = 0; j < HLL_REGISTERS/8; j++) {
if (*word == 0) {
- ez += 8;
+ reghisto[0] += 8;
} else {
bytes = (uint8_t*) word;
- if (bytes[0]) E += PE[bytes[0]]; else ez++;
- if (bytes[1]) E += PE[bytes[1]]; else ez++;
- if (bytes[2]) E += PE[bytes[2]]; else ez++;
- if (bytes[3]) E += PE[bytes[3]]; else ez++;
- if (bytes[4]) E += PE[bytes[4]]; else ez++;
- if (bytes[5]) E += PE[bytes[5]]; else ez++;
- if (bytes[6]) E += PE[bytes[6]]; else ez++;
- if (bytes[7]) E += PE[bytes[7]]; else ez++;
+ reghisto[bytes[0]]++;
+ reghisto[bytes[1]]++;
+ reghisto[bytes[2]]++;
+ reghisto[bytes[3]]++;
+ reghisto[bytes[4]]++;
+ reghisto[bytes[5]]++;
+ reghisto[bytes[6]]++;
+ reghisto[bytes[7]]++;
}
word++;
}
- E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every
- zero register in the HLL. */
- *ezp = ez;
- return E;
+}
+
+/* Helper function sigma as defined in
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+double hllSigma(double x) {
+ if (x == 1.) return INFINITY;
+ double zPrime;
+ double y = 1;
+ double z = x;
+ do {
+ x *= x;
+ zPrime = z;
+ z += x * y;
+ y += y;
+ } while(zPrime != z);
+ return z;
+}
+
+/* Helper function tau as defined in
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+double hllTau(double x) {
+ if (x == 0. || x == 1.) return 0.;
+ double zPrime;
+ double y = 1.0;
+ double z = 1 - x;
+ do {
+ x = sqrt(x);
+ zPrime = z;
+ y *= 0.5;
+ z -= pow(1 - x, 2)*y;
+ } while(zPrime != z);
+ return z / 3;
}
/* Return the approximated cardinality of the set based on the harmonic
@@ -966,60 +1011,33 @@ double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
* keys (no need to work with 6-bit integers encoding). */
uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
double m = HLL_REGISTERS;
- double E, alpha = 0.7213/(1+1.079/m);
- int j, ez; /* Number of registers equal to 0. */
-
- /* We precompute 2^(-reg[j]) in a small table in order to
- * speedup the computation of SUM(2^-register[0..i]). */
- static int initialized = 0;
- static double PE[64];
- if (!initialized) {
- PE[0] = 1; /* 2^(-reg[j]) is 1 when m is 0. */
- for (j = 1; j < 64; j++) {
- /* 2^(-reg[j]) is the same as 1/2^reg[j]. */
- PE[j] = 1.0/(1ULL << j);
- }
- initialized = 1;
- }
+ double E;
+ int j;
+ int reghisto[HLL_Q+2] = {0};
- /* Compute SUM(2^-register[0..i]). */
+ /* Compute register histogram */
if (hdr->encoding == HLL_DENSE) {
- E = hllDenseSum(hdr->registers,PE,&ez);
+ hllDenseRegHisto(hdr->registers,reghisto);
} else if (hdr->encoding == HLL_SPARSE) {
- E = hllSparseSum(hdr->registers,
- sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid);
+ hllSparseRegHisto(hdr->registers,
+ sdslen((sds)hdr)-HLL_HDR_SIZE,invalid,reghisto);
} else if (hdr->encoding == HLL_RAW) {
- E = hllRawSum(hdr->registers,PE,&ez);
+ hllRawRegHisto(hdr->registers,reghisto);
} else {
- redisPanic("Unknown HyperLogLog encoding in hllCount()");
+ serverPanic("Unknown HyperLogLog encoding in hllCount()");
}
- /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */
- E = (1/E)*alpha*m*m;
-
- /* Use the LINEARCOUNTING algorithm for small cardinalities.
- * For larger values but up to 72000 HyperLogLog raw approximation is
- * used since linear counting error starts to increase. However HyperLogLog
- * shows a strong bias in the range 2.5*16384 - 72000, so we try to
- * compensate for it. */
- if (E < m*2.5 && ez != 0) {
- E = m*log(m/ez); /* LINEARCOUNTING() */
- } else if (m == 16384 && E < 72000) {
- /* We did polynomial regression of the bias for this range, this
- * way we can compute the bias for a given cardinality and correct
- * according to it. Only apply the correction for P=14 that's what
- * we use and the value the correction was verified with. */
- double bias = 5.9119*1.0e-18*(E*E*E*E)
- -1.4253*1.0e-12*(E*E*E)+
- 1.2940*1.0e-7*(E*E)
- -5.2921*1.0e-3*E+
- 83.3216;
- E -= E*(bias/100);
+ /* Estimate cardinality form register histogram. See:
+ * "New cardinality estimation algorithms for HyperLogLog sketches"
+ * Otmar Ertl, arXiv:1702.01284 */
+ double z = m * hllTau((m-reghisto[HLL_Q+1])/(double)m);
+ for (j = HLL_Q; j >= 1; --j) {
+ z += reghisto[j];
+ z *= 0.5;
}
- /* We don't apply the correction for E > 1/30 of 2^32 since we use
- * a 64 bit function and 6 bit counters. To apply the correction for
- * 1/30 of 2^64 is not needed since it would require a huge set
- * to approach such a value. */
+ z += m * hllSigma(reghisto[0]/(double)m);
+ E = llroundl(HLL_ALPHA_INF*m*m/z);
+
return (uint64_t) E;
}
@@ -1039,7 +1057,7 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) {
* The hll object must be already validated via isHLLObjectOrReply()
* or in some other way.
*
- * If the HyperLogLog is sparse and is found to be invalid, REDIS_ERR
+ * If the HyperLogLog is sparse and is found to be invalid, C_ERR
* is returned, otherwise the function always succeeds. */
int hllMerge(uint8_t *max, robj *hll) {
struct hllhdr *hdr = hll->ptr;
@@ -1077,9 +1095,9 @@ int hllMerge(uint8_t *max, robj *hll) {
p++;
}
}
- if (i != HLL_REGISTERS) return REDIS_ERR;
+ if (i != HLL_REGISTERS) return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* ========================== HyperLogLog commands ========================== */
@@ -1108,10 +1126,10 @@ robj *createHLLObject(void) {
p += 2;
aux -= xzero;
}
- redisAssert((p-(uint8_t*)s) == sparselen);
+ serverAssert((p-(uint8_t*)s) == sparselen);
/* Create the actual object. */
- o = createObject(REDIS_STRING,s);
+ o = createObject(OBJ_STRING,s);
hdr = o->ptr;
memcpy(hdr->magic,"HYLL",4);
hdr->encoding = HLL_SPARSE;
@@ -1119,15 +1137,16 @@ robj *createHLLObject(void) {
}
/* Check if the object is a String with a valid HLL representation.
- * Return REDIS_OK if this is true, otherwise reply to the client
- * with an error and return REDIS_ERR. */
-int isHLLObjectOrReply(redisClient *c, robj *o) {
+ * Return C_OK if this is true, otherwise reply to the client
+ * with an error and return C_ERR. */
+int isHLLObjectOrReply(client *c, robj *o) {
struct hllhdr *hdr;
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
- return REDIS_ERR; /* Error already sent. */
+ if (checkType(c,o,OBJ_STRING))
+ return C_ERR; /* Error already sent. */
+ if (!sdsEncodedObject(o)) goto invalid;
if (stringObjectLen(o) < sizeof(*hdr)) goto invalid;
hdr = o->ptr;
@@ -1142,17 +1161,17 @@ int isHLLObjectOrReply(redisClient *c, robj *o) {
stringObjectLen(o) != HLL_DENSE_SIZE) goto invalid;
/* All tests passed. */
- return REDIS_OK;
+ return C_OK;
invalid:
addReplySds(c,
sdsnew("-WRONGTYPE Key is not a valid "
"HyperLogLog string value.\r\n"));
- return REDIS_ERR;
+ return C_ERR;
}
/* PFADD var ele ele ele ... ele => :0 or :1 */
-void pfaddCommand(redisClient *c) {
+void pfaddCommand(client *c) {
robj *o = lookupKeyWrite(c->db,c->argv[1]);
struct hllhdr *hdr;
int updated = 0, j;
@@ -1165,7 +1184,7 @@ void pfaddCommand(redisClient *c) {
dbAdd(c->db,c->argv[1],o);
updated++;
} else {
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[1],o);
}
/* Perform the low level ADD operation for every element. */
@@ -1184,7 +1203,7 @@ void pfaddCommand(redisClient *c) {
hdr = o->ptr;
if (updated) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
server.dirty++;
HLL_INVALIDATE_CACHE(hdr);
}
@@ -1192,7 +1211,7 @@ void pfaddCommand(redisClient *c) {
}
/* PFCOUNT var -> approximated cardinality of set. */
-void pfcountCommand(redisClient *c) {
+void pfcountCommand(client *c) {
robj *o;
struct hllhdr *hdr;
uint64_t card;
@@ -1214,11 +1233,11 @@ void pfcountCommand(redisClient *c) {
/* Check type and size. */
robj *o = lookupKeyRead(c->db,c->argv[j]);
if (o == NULL) continue; /* Assume empty HLL for non existing var.*/
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
/* Merge with this HLL with our 'max' HHL by setting max[i]
* to MAX(max[i],hll[i]). */
- if (hllMerge(registers,o) == REDIS_ERR) {
+ if (hllMerge(registers,o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1239,7 +1258,7 @@ void pfcountCommand(redisClient *c) {
* we would have a key as HLLADD creates it as a side effect. */
addReply(c,shared.czero);
} else {
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[1],o);
/* Check if the cached cardinality is valid. */
@@ -1282,24 +1301,30 @@ void pfcountCommand(redisClient *c) {
}
/* PFMERGE dest src1 src2 src3 ... srcN => OK */
-void pfmergeCommand(redisClient *c) {
+void pfmergeCommand(client *c) {
uint8_t max[HLL_REGISTERS];
struct hllhdr *hdr;
int j;
+ int use_dense = 0; /* Use dense representation as target? */
/* Compute an HLL with M[i] = MAX(M[i]_j).
- * We we the maximum into the max array of registers. We'll write
+ * We store the maximum into the max array of registers. We'll write
* it to the target variable later. */
memset(max,0,sizeof(max));
for (j = 1; j < c->argc; j++) {
/* Check type and size. */
robj *o = lookupKeyRead(c->db,c->argv[j]);
if (o == NULL) continue; /* Assume empty HLL for non existing var. */
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
+
+ /* If at least one involved HLL is dense, use the dense representation
+ * as target ASAP to save time and avoid the conversion step. */
+ hdr = o->ptr;
+ if (hdr->encoding == HLL_DENSE) use_dense = 1;
/* Merge with this HLL with our 'max' HHL by setting max[i]
* to MAX(max[i],hll[i]). */
- if (hllMerge(max,o) == REDIS_ERR) {
+ if (hllMerge(max,o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1320,24 +1345,31 @@ void pfmergeCommand(redisClient *c) {
o = dbUnshareStringValue(c->db,c->argv[1],o);
}
- /* Only support dense objects as destination. */
- if (hllSparseToDense(o) == REDIS_ERR) {
+ /* Convert the destination object to dense representation if at least
+ * one of the inputs was dense. */
+ if (use_dense && hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
/* Write the resulting HLL to the destination HLL registers and
* invalidate the cached value. */
- hdr = o->ptr;
for (j = 0; j < HLL_REGISTERS; j++) {
- HLL_DENSE_SET_REGISTER(hdr->registers,j,max[j]);
+ if (max[j] == 0) continue;
+ hdr = o->ptr;
+ switch(hdr->encoding) {
+ case HLL_DENSE: hllDenseSet(hdr->registers,j,max[j]); break;
+ case HLL_SPARSE: hllSparseSet(o,j,max[j]); break;
+ }
}
+ hdr = o->ptr; /* o->ptr may be different now, as a side effect of
+ last hllSparseSet() call. */
HLL_INVALIDATE_CACHE(hdr);
signalModifiedKey(c->db,c->argv[1]);
- /* We generate an PFADD event for PFMERGE for semantical simplicity
+ /* We generate a PFADD event for PFMERGE for semantical simplicity
* since in theory this is a mass-add of elements. */
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id);
server.dirty++;
addReply(c,shared.ok);
}
@@ -1348,7 +1380,7 @@ void pfmergeCommand(redisClient *c) {
* This command performs a self-test of the HLL registers implementation.
* Something that is not easy to test from within the outside. */
#define HLL_TEST_CYCLES 1000
-void pfselftestCommand(redisClient *c) {
+void pfselftestCommand(client *c) {
unsigned int j, i;
sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE);
struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2;
@@ -1452,7 +1484,7 @@ cleanup:
/* PFDEBUG <subcommand> <key> ... args ...
* Different debugging related operations about the HLL implementation. */
-void pfdebugCommand(redisClient *c) {
+void pfdebugCommand(client *c) {
char *cmd = c->argv[1]->ptr;
struct hllhdr *hdr;
robj *o;
@@ -1463,7 +1495,7 @@ void pfdebugCommand(redisClient *c) {
addReplyError(c,"The specified key does not exist");
return;
}
- if (isHLLObjectOrReply(c,o) != REDIS_OK) return;
+ if (isHLLObjectOrReply(c,o) != C_OK) return;
o = dbUnshareStringValue(c->db,c->argv[2],o);
hdr = o->ptr;
@@ -1472,7 +1504,7 @@ void pfdebugCommand(redisClient *c) {
if (c->argc != 3) goto arityerr;
if (hdr->encoding == HLL_SPARSE) {
- if (hllSparseToDense(o) == REDIS_ERR) {
+ if (hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
@@ -1536,7 +1568,7 @@ void pfdebugCommand(redisClient *c) {
if (c->argc != 3) goto arityerr;
if (hdr->encoding == HLL_SPARSE) {
- if (hllSparseToDense(o) == REDIS_ERR) {
+ if (hllSparseToDense(o) == C_ERR) {
addReplySds(c,sdsnew(invalid_hll_err));
return;
}
diff --git a/src/intset.c b/src/intset.c
index 762bd48c8..198c90aa1 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -261,91 +261,7 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}
-/* How many times bigger should the set length be compared to the requested
- * count of members for us to use the Floyd algorithm instead of
- * the Knuth algorithm */
-#define RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO (2)
-
-/* Copies 'count' random members from the set into the 'values' array.
- * 'values' must be an array of int64_t values, of length 'count'.
- * Returns the amount of items returned. If this amount is less than 'count',
- * then the remaining 'values' are left uninitialized. */
-int intsetRandomMembers(intset *is, int64_t* values, int count) {
-
- /* We don't check that is and values are non-NULL - the caller must
- * play nice. */
-
- int length = intsetLen(is);
-
- if (count > length) {
- /* Return everything in the set */
- count = length;
- }
-
- /* Choose between the Knuth shuffle algorithm, O(1) space, O(length) time,
- * and the Floyd algorithm, O(length) space, O(count) time. */
- if ((RANDOMMEMBERS_ALGORITHM_SELECTION_RATIO * count) > length) {
-
- /* If the count of members requested is almost the length of the set,
- * use the Knuth shuffle algorithm, O(1) space, O(length) time. */
-
- /* First, fill the values array with unique random indexes inside
- * the set. */
- int in, im, rn, rm;
- im = 0;
- for (in = 0; in < length && im < count; in++) {
-
- rn = length - in;
- rm = count - im;
- if (rand() % rn < rm) {
- values[im++] = in;
- }
- }
-
- } else {
-
- /* If the length is considerably more than the count of members
- * requested, use Robert Floyd's algorithm, O(length) space,
- * O(count) time.
- * Based on Jon Bentley's Programming Pearls */
-
- int64_t *is_used = zcalloc(sizeof(int64_t) * length);
- int in, im, r;
-
- r = 0;
- im = 0;
-
- for (in = length - count; in < length && im < count; in++) {
-
- /* Generate a random number r */
- r = rand() % (in + 1);
-
- /* Do we already have the value in r? */
- if (is_used[r]) {
- /* Use in instead of the generated number */
- r = in;
- }
-
- values[im++] = r ;
-
- /* Mark it as used */
- is_used[r] = 1;
- }
-
- zfree(is_used);
- }
-
- /* Replace each random index with the value stored there in the intset */
- uint8_t encoding = intrev32ifbe(is->encoding);
- for (int currentValue = 0; currentValue < count; currentValue++) {
- values[currentValue] =
- _intsetGetEncoded(is, values[currentValue], encoding);
- }
-
- return count;
-}
-
-/* Sets the value to the value at the given position. When this position is
+/* Get the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
if (pos < intrev32ifbe(is->length)) {
@@ -356,7 +272,7 @@ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
}
/* Return intset length */
-uint32_t intsetLen(intset *is) {
+uint32_t intsetLen(const intset *is) {
return intrev32ifbe(is->length);
}
diff --git a/src/intset.h b/src/intset.h
index 7550df303..8119e6636 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -43,9 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
-int intsetRandomMembers(intset *is, int64_t* value, int count);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
-uint32_t intsetLen(intset *is);
+uint32_t intsetLen(const intset *is);
size_t intsetBlobLen(intset *is);
#ifdef REDIS_TEST
diff --git a/src/latency.c b/src/latency.c
index cb116fb90..e8d2af306 100644
--- a/src/latency.c
+++ b/src/latency.c
@@ -33,15 +33,15 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* Dictionary type for latency events. */
int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
- REDIS_NOTUSED(privdata);
+ UNUSED(privdata);
return strcmp(key1,key2) == 0;
}
-unsigned int dictStringHash(const void *key) {
+uint64_t dictStringHash(const void *key) {
return dictGenHashFunction(key, strlen(key));
}
@@ -79,7 +79,7 @@ int THPIsEnabled(void) {
* value of the function is non-zero, the process is being targeted by
* THP support, and is likely to have memory usage / latency issues. */
int THPGetAnonHugePagesSize(void) {
- return zmalloc_get_smap_bytes_by_field("AnonHugePages:");
+ return zmalloc_get_smap_bytes_by_field("AnonHugePages:",-1);
}
/* ---------------------------- Latency API --------------------------------- */
@@ -109,6 +109,8 @@ void latencyAddSample(char *event, mstime_t latency) {
dictAdd(server.latency_events,zstrdup(event),ts);
}
+ if (latency > ts->max) ts->max = latency;
+
/* If the previous sample is in the same second, we update our old sample
* if this latency is > of the old one, or just return. */
prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
@@ -120,7 +122,6 @@ void latencyAddSample(char *event, mstime_t latency) {
ts->samples[ts->idx].time = time(NULL);
ts->samples[ts->idx].latency = latency;
- if (latency > ts->max) ts->max = latency;
ts->idx++;
if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;
@@ -151,7 +152,7 @@ int latencyResetEvent(char *event_to_reset) {
/* ------------------------ Latency reporting (doctor) ---------------------- */
-/* Analyze the samples avaialble for a given event and return a structure
+/* Analyze the samples available for a given event and return a structure
* populate with different metrics, average, MAD, min, max, and so forth.
* Check latency.h definition of struct latenctStat for more info.
* If the specified event has no elements the structure is populate with
@@ -228,6 +229,7 @@ sds createLatencyReport(void) {
int advise_write_load_info = 0; /* Print info about AOF and write load. */
int advise_hz = 0; /* Use higher HZ. */
int advise_large_objects = 0; /* Deletion of large objects. */
+ int advise_mass_eviction = 0; /* Avoid mass eviction of keys. */
int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
int advise_disable_thp = 0; /* AnonHugePages detected. */
int advices = 0;
@@ -247,7 +249,7 @@ sds createLatencyReport(void) {
dictEntry *de;
int eventnum = 0;
- di = dictGetIterator(server.latency_events);
+ di = dictGetSafeIterator(server.latency_events);
while((de = dictNext(di)) != NULL) {
char *event = dictGetKey(de);
struct latencyTimeSeries *ts = dictGetVal(de);
@@ -292,7 +294,7 @@ sds createLatencyReport(void) {
/* Potentially commands. */
if (!strcasecmp(event,"command")) {
- if (server.slowlog_log_slower_than == 0) {
+ if (server.slowlog_log_slower_than < 0) {
advise_slowlog_enabled = 1;
advices++;
} else if (server.slowlog_log_slower_than/1000 >
@@ -364,11 +366,16 @@ sds createLatencyReport(void) {
}
/* Eviction cycle. */
- if (!strcasecmp(event,"eviction-cycle")) {
+ if (!strcasecmp(event,"eviction-del")) {
advise_large_objects = 1;
advices++;
}
+ if (!strcasecmp(event,"eviction-cycle")) {
+ advise_mass_eviction = 1;
+ advices++;
+ }
+
report = sdscatlen(report,"\n",1);
}
dictReleaseIterator(di);
@@ -452,6 +459,10 @@ sds createLatencyReport(void) {
report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
}
+ if (advise_mass_eviction) {
+ report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
+ }
+
if (advise_disable_thp) {
report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
}
@@ -464,7 +475,7 @@ sds createLatencyReport(void) {
/* latencyCommand() helper to produce a time-delay reply for all the samples
* in memory for the specified time series. */
-void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts) {
+void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
void *replylen = addDeferredMultiBulkLength(c);
int samples = 0, j;
@@ -482,7 +493,7 @@ void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts
/* latencyCommand() helper to produce the reply for the LATEST subcommand,
* listing the last latency sample for every event type registered so far. */
-void latencyCommandReplyWithLatestEvents(redisClient *c) {
+void latencyCommandReplyWithLatestEvents(client *c) {
dictIterator *di;
dictEntry *de;
@@ -554,7 +565,7 @@ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
* LATENCY DOCTOR: returns an human readable analysis of instance latency.
* LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
*/
-void latencyCommand(redisClient *c) {
+void latencyCommand(client *c) {
struct latencyTimeSeries *ts;
if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
diff --git a/src/latency.h b/src/latency.h
index 240f54b45..0fe26e0e4 100644
--- a/src/latency.h
+++ b/src/latency.h
@@ -86,4 +86,8 @@ int THPIsEnabled(void);
(var) >= server.latency_monitor_threshold) \
latencyAddSample((event),(var));
+/* Remove time from a nested event. */
+#define latencyRemoveNestedEvent(event_var,nested_var) \
+ event_var += nested_var;
+
#endif /* __LATENCY_H */
diff --git a/src/lazyfree.c b/src/lazyfree.c
new file mode 100644
index 000000000..3d3159c90
--- /dev/null
+++ b/src/lazyfree.c
@@ -0,0 +1,152 @@
+#include "server.h"
+#include "bio.h"
+#include "atomicvar.h"
+#include "cluster.h"
+
+static size_t lazyfree_objects = 0;
+pthread_mutex_t lazyfree_objects_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Return the number of currently pending objects to free. */
+size_t lazyfreeGetPendingObjectsCount(void) {
+ size_t aux;
+ atomicGet(lazyfree_objects,aux);
+ return aux;
+}
+
+/* Return the amount of work needed in order to free an object.
+ * The return value is not always the actual number of allocations the
+ * object is compoesd of, but a number proportional to it.
+ *
+ * For strings the function always returns 1.
+ *
+ * For aggregated objects represented by hash tables or other data structures
+ * the function just returns the number of elements the object is composed of.
+ *
+ * Objects composed of single allocations are always reported as having a
+ * single item even if they are actually logical composed of multiple
+ * elements.
+ *
+ * For lists the function returns the number of elements in the quicklist
+ * representing the list. */
+size_t lazyfreeGetFreeEffort(robj *obj) {
+ if (obj->type == OBJ_LIST) {
+ quicklist *ql = obj->ptr;
+ return ql->len;
+ } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST){
+ zset *zs = obj->ptr;
+ return zs->zsl->length;
+ } else if (obj->type == OBJ_HASH && obj->encoding == OBJ_ENCODING_HT) {
+ dict *ht = obj->ptr;
+ return dictSize(ht);
+ } else {
+ return 1; /* Everything else is a single allocation. */
+ }
+}
+
+/* Delete a key, value, and associated expiration entry if any, from the DB.
+ * If there are enough allocations to free the value object may be put into
+ * a lazy free list instead of being freed synchronously. The lazy free list
+ * will be reclaimed in a different bio.c thread. */
+#define LAZYFREE_THRESHOLD 64
+int dbAsyncDelete(redisDb *db, robj *key) {
+ /* Deleting an entry from the expires dict will not free the sds of
+ * the key, because it is shared with the main dictionary. */
+ if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
+
+ /* If the value is composed of a few allocations, to free in a lazy way
+ * is actually just slower... So under a certain limit we just free
+ * the object synchronously. */
+ dictEntry *de = dictUnlink(db->dict,key->ptr);
+ if (de) {
+ robj *val = dictGetVal(de);
+ size_t free_effort = lazyfreeGetFreeEffort(val);
+
+ /* If releasing the object is too much work, do it in the background
+ * by adding the object to the lazy free list.
+ * Note that if the object is shared, to reclaim it now it is not
+ * possible. This rarely happens, however sometimes the implementation
+ * of parts of the Redis core may call incrRefCount() to protect
+ * objects, and then call dbDelete(). In this case we'll fall
+ * through and reach the dictFreeUnlinkedEntry() call, that will be
+ * equivalent to just calling decrRefCount(). */
+ if (free_effort > LAZYFREE_THRESHOLD && val->refcount == 1) {
+ atomicIncr(lazyfree_objects,1);
+ bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL);
+ dictSetVal(db->dict,de,NULL);
+ }
+ }
+
+ /* Release the key-val pair, or just the key if we set the val
+ * field to NULL in order to lazy free it later. */
+ if (de) {
+ dictFreeUnlinkedEntry(db->dict,de);
+ if (server.cluster_enabled) slotToKeyDel(key);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Free an object, if the object is huge enough, free it in async way. */
+void freeObjAsync(robj *o) {
+ size_t free_effort = lazyfreeGetFreeEffort(o);
+ if (free_effort > LAZYFREE_THRESHOLD && o->refcount == 1) {
+ atomicIncr(lazyfree_objects,1);
+ bioCreateBackgroundJob(BIO_LAZY_FREE,o,NULL,NULL);
+ } else {
+ decrRefCount(o);
+ }
+}
+
+/* Empty a Redis DB asynchronously. What the function does actually is to
+ * create a new empty set of hash tables and scheduling the old ones for
+ * lazy freeing. */
+void emptyDbAsync(redisDb *db) {
+ dict *oldht1 = db->dict, *oldht2 = db->expires;
+ db->dict = dictCreate(&dbDictType,NULL);
+ db->expires = dictCreate(&keyptrDictType,NULL);
+ atomicIncr(lazyfree_objects,dictSize(oldht1));
+ bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,oldht1,oldht2);
+}
+
+/* Empty the slots-keys map of Redis CLuster by creating a new empty one
+ * and scheduiling the old for lazy freeing. */
+void slotToKeyFlushAsync(void) {
+ rax *old = server.cluster->slots_to_keys;
+
+ server.cluster->slots_to_keys = raxNew();
+ memset(server.cluster->slots_keys_count,0,
+ sizeof(server.cluster->slots_keys_count));
+ atomicIncr(lazyfree_objects,old->numele);
+ bioCreateBackgroundJob(BIO_LAZY_FREE,NULL,NULL,old);
+}
+
+/* Release objects from the lazyfree thread. It's just decrRefCount()
+ * updating the count of objects to release. */
+void lazyfreeFreeObjectFromBioThread(robj *o) {
+ decrRefCount(o);
+ atomicDecr(lazyfree_objects,1);
+}
+
+/* Release a database from the lazyfree thread. The 'db' pointer is the
+ * database which was substitutied with a fresh one in the main thread
+ * when the database was logically deleted. 'sl' is a skiplist used by
+ * Redis Cluster in order to take the hash slots -> keys mapping. This
+ * may be NULL if Redis Cluster is disabled. */
+void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2) {
+ size_t numkeys = dictSize(ht1);
+ dictRelease(ht1);
+ dictRelease(ht2);
+ atomicDecr(lazyfree_objects,numkeys);
+}
+
+/* Release the skiplist mapping Redis Cluster keys to slots in the
+ * lazyfree thread. */
+void lazyfreeFreeSlotsMapFromBioThread(rax *rt) {
+ size_t len = rt->numele;
+ raxFree(rt);
+ atomicDecr(lazyfree_objects,len);
+}
diff --git a/src/listpack.c b/src/listpack.c
new file mode 100644
index 000000000..c3070db6d
--- /dev/null
+++ b/src/listpack.c
@@ -0,0 +1,783 @@
+/* Listpack -- A lists of strings serialization format
+ *
+ * This file implements the specification you can find at:
+ *
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "listpack.h"
+#include "listpack_malloc.h"
+
+#define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */
+#define LP_HDR_NUMELE_UNKNOWN UINT16_MAX
+#define LP_MAX_INT_ENCODING_LEN 9
+#define LP_MAX_BACKLEN_SIZE 5
+#define LP_MAX_ENTRY_BACKLEN 34359738367ULL
+#define LP_ENCODING_INT 0
+#define LP_ENCODING_STRING 1
+
+#define LP_ENCODING_7BIT_UINT 0
+#define LP_ENCODING_7BIT_UINT_MASK 0x80
+#define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)
+
+#define LP_ENCODING_6BIT_STR 0x80
+#define LP_ENCODING_6BIT_STR_MASK 0xC0
+#define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)
+
+#define LP_ENCODING_13BIT_INT 0xC0
+#define LP_ENCODING_13BIT_INT_MASK 0xE0
+#define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)
+
+#define LP_ENCODING_12BIT_STR 0xE0
+#define LP_ENCODING_12BIT_STR_MASK 0xF0
+#define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)
+
+#define LP_ENCODING_16BIT_INT 0xF1
+#define LP_ENCODING_16BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)
+
+#define LP_ENCODING_24BIT_INT 0xF2
+#define LP_ENCODING_24BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)
+
+#define LP_ENCODING_32BIT_INT 0xF3
+#define LP_ENCODING_32BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)
+
+#define LP_ENCODING_64BIT_INT 0xF4
+#define LP_ENCODING_64BIT_INT_MASK 0xFF
+#define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)
+
+#define LP_ENCODING_32BIT_STR 0xF0
+#define LP_ENCODING_32BIT_STR_MASK 0xFF
+#define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)
+
+#define LP_EOF 0xFF
+
+#define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)
+#define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])
+#define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \
+ ((uint32_t)(p)[2]<<8) | \
+ ((uint32_t)(p)[3]<<16) | \
+ ((uint32_t)(p)[4]<<24))
+
+#define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \
+ ((uint32_t)(p)[1]<<8) | \
+ ((uint32_t)(p)[2]<<16) | \
+ ((uint32_t)(p)[3]<<24))
+
+#define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \
+ ((uint32_t)(p)[5]<<8))
+#define lpSetTotalBytes(p,v) do { \
+ (p)[0] = (v)&0xff; \
+ (p)[1] = ((v)>>8)&0xff; \
+ (p)[2] = ((v)>>16)&0xff; \
+ (p)[3] = ((v)>>24)&0xff; \
+} while(0)
+
+#define lpSetNumElements(p,v) do { \
+ (p)[4] = (v)&0xff; \
+ (p)[5] = ((v)>>8)&0xff; \
+} while(0)
+
+/* Convert a string into a signed 64 bit integer.
+ * The function returns 1 if the string could be parsed into a (non-overflowing)
+ * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value
+ * when the function returns success.
+ *
+ * Note that this function demands that the string strictly represents
+ * a int64 value: no spaces or other characters before or after the string
+ * representing the number are accepted, nor zeroes at the start if not
+ * for the string "0" representing the zero number.
+ *
+ * Because of its strictness, it is safe to use this function to check if
+ * you can convert a string into a long long, and obtain back the string
+ * from the number without any loss in the string representation. *
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Credits: this function was adapted from the Redis source code, file
+ * "utils.c", function string2ll(), and is copyright:
+ *
+ * Copyright(C) 2011, Pieter Noordhuis
+ * Copyright(C) 2011, Salvatore Sanfilippo
+ *
+ * The function is released under the BSD 3-clause license.
+ */
+int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {
+ const char *p = s;
+ unsigned long plen = 0;
+ int negative = 0;
+ uint64_t v;
+
+ if (plen == slen)
+ return 0;
+
+ /* Special case: first and only digit is 0. */
+ if (slen == 1 && p[0] == '0') {
+ if (value != NULL) *value = 0;
+ return 1;
+ }
+
+ if (p[0] == '-') {
+ negative = 1;
+ p++; plen++;
+
+ /* Abort on only a negative sign. */
+ if (plen == slen)
+ return 0;
+ }
+
+ /* First digit should be 1-9, otherwise the string should just be 0. */
+ if (p[0] >= '1' && p[0] <= '9') {
+ v = p[0]-'0';
+ p++; plen++;
+ } else if (p[0] == '0' && slen == 1) {
+ *value = 0;
+ return 1;
+ } else {
+ return 0;
+ }
+
+ while (plen < slen && p[0] >= '0' && p[0] <= '9') {
+ if (v > (UINT64_MAX / 10)) /* Overflow. */
+ return 0;
+ v *= 10;
+
+ if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */
+ return 0;
+ v += p[0]-'0';
+
+ p++; plen++;
+ }
+
+ /* Return if not all bytes were used. */
+ if (plen < slen)
+ return 0;
+
+ if (negative) {
+ if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = -v;
+ } else {
+ if (v > INT64_MAX) /* Overflow. */
+ return 0;
+ if (value != NULL) *value = v;
+ }
+ return 1;
+}
+
+/* Create a new, empty listpack.
+ * On success the new listpack is returned, otherwise an error is returned. */
+unsigned char *lpNew(void) {
+ unsigned char *lp = lp_malloc(LP_HDR_SIZE+1);
+ if (lp == NULL) return NULL;
+ lpSetTotalBytes(lp,LP_HDR_SIZE+1);
+ lpSetNumElements(lp,0);
+ lp[LP_HDR_SIZE] = LP_EOF;
+ return lp;
+}
+
+/* Free the specified listpack. */
+void lpFree(unsigned char *lp) {
+ lp_free(lp);
+}
+
+/* Given an element 'ele' of size 'size', determine if the element can be
+ * represented inside the listpack encoded as integer, and returns
+ * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer
+ * encoding is possible.
+ *
+ * If the LP_ENCODING_INT is returned, the function stores the integer encoded
+ * representation of the element in the 'intenc' buffer.
+ *
+ * Regardless of the returned encoding, 'enclen' is populated by reference to
+ * the number of bytes that the string or integer encoded element will require
+ * in order to be represented. */
+int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {
+ int64_t v;
+ if (lpStringToInt64((const char*)ele, size, &v)) {
+ if (v >= 0 && v <= 127) {
+ /* Single byte 0-127 integer. */
+ intenc[0] = v;
+ *enclen = 1;
+ } else if (v >= -4096 && v <= 4095) {
+ /* 13 bit integer. */
+ if (v < 0) v = ((int64_t)1<<13)+v;
+ intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
+ intenc[1] = v&0xff;
+ *enclen = 2;
+ } else if (v >= -32768 && v <= 32767) {
+ /* 16 bit integer. */
+ if (v < 0) v = ((int64_t)1<<16)+v;
+ intenc[0] = LP_ENCODING_16BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = v>>8;
+ *enclen = 3;
+ } else if (v >= -8388608 && v <= 8388607) {
+ /* 24 bit integer. */
+ if (v < 0) v = ((int64_t)1<<24)+v;
+ intenc[0] = LP_ENCODING_24BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = (v>>8)&0xff;
+ intenc[3] = v>>16;
+ *enclen = 4;
+ } else if (v >= -2147483648 && v <= 2147483647) {
+ /* 32 bit integer. */
+ if (v < 0) v = ((int64_t)1<<32)+v;
+ intenc[0] = LP_ENCODING_32BIT_INT;
+ intenc[1] = v&0xff;
+ intenc[2] = (v>>8)&0xff;
+ intenc[3] = (v>>16)&0xff;
+ intenc[4] = v>>24;
+ *enclen = 5;
+ } else {
+ /* 64 bit integer. */
+ uint64_t uv = v;
+ intenc[0] = LP_ENCODING_64BIT_INT;
+ intenc[1] = uv&0xff;
+ intenc[2] = (uv>>8)&0xff;
+ intenc[3] = (uv>>16)&0xff;
+ intenc[4] = (uv>>24)&0xff;
+ intenc[5] = (uv>>32)&0xff;
+ intenc[6] = (uv>>40)&0xff;
+ intenc[7] = (uv>>48)&0xff;
+ intenc[8] = uv>>56;
+ *enclen = 9;
+ }
+ return LP_ENCODING_INT;
+ } else {
+ if (size < 64) *enclen = 1+size;
+ else if (size < 4096) *enclen = 2+size;
+ else *enclen = 5+size;
+ return LP_ENCODING_STRING;
+ }
+}
+
+/* Store a reverse-encoded variable length field, representing the length
+ * of the previous element of size 'l', in the target buffer 'buf'.
+ * The function returns the number of bytes used to encode it, from
+ * 1 to 5. If 'buf' is NULL the function just returns the number of bytes
+ * needed in order to encode the backlen. */
+unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
+ if (l <= 127) {
+ if (buf) buf[0] = l;
+ return 1;
+ } else if (l < 16383) {
+ if (buf) {
+ buf[0] = l>>7;
+ buf[1] = (l&127)|128;
+ }
+ return 2;
+ } else if (l < 2097151) {
+ if (buf) {
+ buf[0] = l>>14;
+ buf[1] = ((l>>7)&127)|128;
+ buf[2] = (l&127)|128;
+ }
+ return 3;
+ } else if (l < 268435455) {
+ if (buf) {
+ buf[0] = l>>21;
+ buf[1] = ((l>>14)&127)|128;
+ buf[2] = ((l>>7)&127)|128;
+ buf[3] = (l&127)|128;
+ }
+ return 4;
+ } else {
+ if (buf) {
+ buf[0] = l>>28;
+ buf[1] = ((l>>21)&127)|128;
+ buf[2] = ((l>>14)&127)|128;
+ buf[3] = ((l>>7)&127)|128;
+ buf[4] = (l&127)|128;
+ }
+ return 5;
+ }
+}
+
+/* Decode the backlen and returns it. If the encoding looks invalid (more than
+ * 5 bytes are used), UINT64_MAX is returned to report the problem. */
+uint64_t lpDecodeBacklen(unsigned char *p) {
+ uint64_t val = 0;
+ uint64_t shift = 0;
+ do {
+ val |= (uint64_t)(p[0] & 127) << shift;
+ if (!(p[0] & 128)) break;
+ shift += 7;
+ p--;
+ if (shift > 28) return UINT64_MAX;
+ } while(1);
+ return val;
+}
+
+/* Encode the string element pointed by 's' of size 'len' in the target
+ * buffer 's'. The function should be called with 'buf' having always enough
+ * space for encoding the string. This is done by calling lpEncodeGetType()
+ * before calling this function. */
+void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) {
+ if (len < 64) {
+ buf[0] = len | LP_ENCODING_6BIT_STR;
+ memcpy(buf+1,s,len);
+ } else if (len < 4096) {
+ buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;
+ buf[1] = len & 0xff;
+ memcpy(buf+2,s,len);
+ } else {
+ buf[0] = LP_ENCODING_32BIT_STR;
+ buf[1] = len & 0xff;
+ buf[2] = (len >> 8) & 0xff;
+ buf[3] = (len >> 16) & 0xff;
+ buf[4] = (len >> 24) & 0xff;
+ memcpy(buf+5,s,len);
+ }
+}
+
+/* Return the encoded length of the listpack element pointed by 'p'. If the
+ * element encoding is wrong then 0 is returned. */
+uint32_t lpCurrentEncodedSize(unsigned char *p) {
+ if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
+ if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);
+ if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;
+ if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;
+ if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;
+ if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;
+ if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;
+ if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);
+ if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);
+ if (p[0] == LP_EOF) return 1;
+ return 0;
+}
+
+/* Skip the current entry returning the next. It is invalid to call this
+ * function if the current element is the EOF element at the end of the
+ * listpack, however, while this function is used to implement lpNext(),
+ * it does not return NULL when the EOF element is encountered. */
+unsigned char *lpSkip(unsigned char *p) {
+ unsigned long entrylen = lpCurrentEncodedSize(p);
+ entrylen += lpEncodeBacklen(NULL,entrylen);
+ p += entrylen;
+ return p;
+}
+
+/* If 'p' points to an element of the listpack, calling lpNext() will return
+ * the pointer to the next element (the one on the right), or NULL if 'p'
+ * already pointed to the last element of the listpack. */
+unsigned char *lpNext(unsigned char *lp, unsigned char *p) {
+ ((void) lp); /* lp is not used for now. However lpPrev() uses it. */
+ p = lpSkip(p);
+ if (p[0] == LP_EOF) return NULL;
+ return p;
+}
+
+/* If 'p' points to an element of the listpack, calling lpPrev() will return
+ * the pointer to the preivous element (the one on the left), or NULL if 'p'
+ * already pointed to the first element of the listpack. */
+unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
+ if (p-lp == LP_HDR_SIZE) return NULL;
+ p--; /* Seek the first backlen byte of the last element. */
+ uint64_t prevlen = lpDecodeBacklen(p);
+ prevlen += lpEncodeBacklen(NULL,prevlen);
+ return p-prevlen+1; /* Seek the first byte of the previous entry. */
+}
+
+/* Return a pointer to the first element of the listpack, or NULL if the
+ * listpack has no elements. */
+unsigned char *lpFirst(unsigned char *lp) {
+ lp += LP_HDR_SIZE; /* Skip the header. */
+ if (lp[0] == LP_EOF) return NULL;
+ return lp;
+}
+
+/* Return a pointer to the last element of the listpack, or NULL if the
+ * listpack has no elements. */
+unsigned char *lpLast(unsigned char *lp) {
+ unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */
+ return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */
+}
+
+/* Return the number of elements inside the listpack. This function attempts
+ * to use the cached value when within range, otherwise a full scan is
+ * needed. As a side effect of calling this function, the listpack header
+ * could be modified, because if the count is found to be already within
+ * the 'numele' header field range, the new value is set. */
+uint32_t lpLength(unsigned char *lp) {
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;
+
+ /* Too many elements inside the listpack. We need to scan in order
+ * to get the total number. */
+ uint32_t count = 0;
+ unsigned char *p = lpFirst(lp);
+ while(p) {
+ count++;
+ p = lpNext(lp,p);
+ }
+
+ /* If the count is again within range of the header numele field,
+ * set it. */
+ if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);
+ return count;
+}
+
+/* Return the listpack element pointed by 'p'.
+ *
+ * The function changes behavior depending on the passed 'intbuf' value.
+ * Specifically, if 'intbuf' is NULL:
+ *
+ * If the element is internally encoded as an integer, the function returns
+ * NULL and populates the integer value by reference in 'count'. Otherwise if
+ * the element is encoded as a string a pointer to the string (pointing inside
+ * the listpack itself) is returned, and 'count' is set to the length of the
+ * string.
+ *
+ * If instead 'intbuf' points to a buffer passed by the caller, that must be
+ * at least LP_INTBUF_SIZE bytes, the function always returns the element as
+ * it was a string (returning the pointer to the string and setting the
+ * 'count' argument to the string length by reference). However if the element
+ * is encoded as an integer, the 'intbuf' buffer is used in order to store
+ * the string representation.
+ *
+ * The user should use one or the other form depending on what the value will
+ * be used for. If there is immediate usage for an integer value returned
+ * by the function, than to pass a buffer (and convert it back to a number)
+ * is of course useless.
+ *
+ * If the function is called against a badly encoded ziplist, so that there
+ * is no valid way to parse it, the function returns like if there was an
+ * integer encoded with value 12345678900000000 + <unrecognized byte>, this may
+ * be an hint to understand that something is wrong. To crash in this case is
+ * not sensible because of the different requirements of the application using
+ * this lib.
+ *
+ * Similarly, there is no error returned since the listpack normally can be
+ * assumed to be valid, so that would be a very high API cost. However a function
+ * in order to check the integrity of the listpack at load time is provided,
+ * check lpIsValid(). */
+unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
+ int64_t val;
+ uint64_t uval, negstart, negmax;
+
+ if (LP_ENCODING_IS_7BIT_UINT(p[0])) {
+ negstart = UINT64_MAX; /* 7 bit ints are always positive. */
+ negmax = 0;
+ uval = p[0] & 0x7f;
+ } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
+ *count = LP_ENCODING_6BIT_STR_LEN(p);
+ return p+1;
+ } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
+ uval = ((p[0]&0x1f)<<8) | p[1];
+ negstart = (uint64_t)1<<12;
+ negmax = 8191;
+ } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8;
+ negstart = (uint64_t)1<<15;
+ negmax = UINT16_MAX;
+ } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16;
+ negstart = (uint64_t)1<<23;
+ negmax = UINT32_MAX>>8;
+ } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16 |
+ (uint64_t)p[4]<<24;
+ negstart = (uint64_t)1<<31;
+ negmax = UINT32_MAX;
+ } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {
+ uval = (uint64_t)p[1] |
+ (uint64_t)p[2]<<8 |
+ (uint64_t)p[3]<<16 |
+ (uint64_t)p[4]<<24 |
+ (uint64_t)p[5]<<32 |
+ (uint64_t)p[6]<<40 |
+ (uint64_t)p[7]<<48 |
+ (uint64_t)p[8]<<56;
+ negstart = (uint64_t)1<<63;
+ negmax = UINT64_MAX;
+ } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
+ *count = LP_ENCODING_12BIT_STR_LEN(p);
+ return p+2;
+ } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
+ *count = LP_ENCODING_32BIT_STR_LEN(p);
+ return p+5;
+ } else {
+ uval = 12345678900000000ULL + p[0];
+ negstart = UINT64_MAX;
+ negmax = 0;
+ }
+
+ /* We reach this code path only for integer encodings.
+ * Convert the unsigned value to the signed one using two's complement
+ * rule. */
+ if (uval >= negstart) {
+ /* This three steps conversion should avoid undefined behaviors
+ * in the unsigned -> signed conversion. */
+ uval = negmax-uval;
+ val = uval;
+ val = -val-1;
+ } else {
+ val = uval;
+ }
+
+ /* Return the string representation of the integer or the value itself
+ * depending on intbuf being NULL or not. */
+ if (intbuf) {
+ *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",(long long)val);
+ return intbuf;
+ } else {
+ *count = val;
+ return NULL;
+ }
+}
+
+/* Insert, delete or replace the specified element 'ele' of length 'len' at
+ * the specified position 'p', with 'p' being a listpack element pointer
+ * obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or
+ * lpSeek().
+ *
+ * The element is inserted before, after, or replaces the element pointed
+ * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER
+ * or LP_REPLACE.
+ *
+ * If 'ele' is set to NULL, the function removes the element pointed by 'p'
+ * instead of inserting one.
+ *
+ * Returns NULL on out of memory or when the listpack total length would exceed
+ * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
+ * holding the new element is returned (and the old pointer passed is no longer
+ * considered valid)
+ *
+ * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
+ * to the address of the element just added, so that it will be possible to
+ * continue an interation with lpNext() and lpPrev().
+ *
+ * For deletion operations ('ele' set to NULL) 'newp' is set to the next
+ * element, on the right of the deleted one, or to NULL if the deleted element
+ * was the last one. */
+unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) {
+ unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+ unsigned char backlen[LP_MAX_BACKLEN_SIZE];
+
+ uint64_t enclen; /* The length of the encoded element. */
+
+ /* An element pointer set to NULL means deletion, which is conceptually
+ * replacing the element with a zero-length element. So whatever we
+ * get passed as 'where', set it to LP_REPLACE. */
+ if (ele == NULL) where = LP_REPLACE;
+
+ /* If we need to insert after the current element, we just jump to the
+ * next element (that could be the EOF one) and handle the case of
+ * inserting before. So the function will actually deal with just two
+ * cases: LP_BEFORE and LP_REPLACE. */
+ if (where == LP_AFTER) {
+ p = lpSkip(p);
+ where = LP_BEFORE;
+ }
+
+ /* Store the offset of the element 'p', so that we can obtain its
+ * address again after a reallocation. */
+ unsigned long poff = p-lp;
+
+ /* Calling lpEncodeGetType() results into the encoded version of the
+ * element to be stored into 'intenc' in case it is representable as
+ * an integer: in that case, the function returns LP_ENCODING_INT.
+ * Otherwise if LP_ENCODING_STR is returned, we'll have to call
+ * lpEncodeString() to actually write the encoded string on place later.
+ *
+ * Whatever the returned encoding is, 'enclen' is populated with the
+ * length of the encoded element. */
+ int enctype;
+ if (ele) {
+ enctype = lpEncodeGetType(ele,size,intenc,&enclen);
+ } else {
+ enctype = -1;
+ enclen = 0;
+ }
+
+ /* We need to also encode the backward-parsable length of the element
+ * and append it to the end: this allows to traverse the listpack from
+ * the end to the start. */
+ unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0;
+ uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
+ uint32_t replaced_len = 0;
+ if (where == LP_REPLACE) {
+ replaced_len = lpCurrentEncodedSize(p);
+ replaced_len += lpEncodeBacklen(NULL,replaced_len);
+ }
+
+ uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size
+ - replaced_len;
+ if (new_listpack_bytes > UINT32_MAX) return NULL;
+
+ /* We now need to reallocate in order to make space or shrink the
+ * allocation (in case 'when' value is LP_REPLACE and the new element is
+ * smaller). However we do that before memmoving the memory to
+ * make room for the new element if the final allocation will get
+ * larger, or we do it after if the final allocation will get smaller. */
+
+ unsigned char *dst = lp + poff; /* May be updated after reallocation. */
+
+ /* Realloc before: we need more room. */
+ if (new_listpack_bytes > old_listpack_bytes) {
+ if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
+ dst = lp + poff;
+ }
+
+ /* Setup the listpack relocating the elements to make the exact room
+ * we need to store the new one. */
+ if (where == LP_BEFORE) {
+ memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);
+ } else { /* LP_REPLACE. */
+ long lendiff = (enclen+backlen_size)-replaced_len;
+ memmove(dst+replaced_len+lendiff,
+ dst+replaced_len,
+ old_listpack_bytes-poff-replaced_len);
+ }
+
+ /* Realloc after: we need to free space. */
+ if (new_listpack_bytes < old_listpack_bytes) {
+ if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
+ dst = lp + poff;
+ }
+
+ /* Store the entry. */
+ if (newp) {
+ *newp = dst;
+ /* In case of deletion, set 'newp' to NULL if the next element is
+ * the EOF element. */
+ if (!ele && dst[0] == LP_EOF) *newp = NULL;
+ }
+ if (ele) {
+ if (enctype == LP_ENCODING_INT) {
+ memcpy(dst,intenc,enclen);
+ } else {
+ lpEncodeString(dst,ele,size);
+ }
+ dst += enclen;
+ memcpy(dst,backlen,backlen_size);
+ dst += backlen_size;
+ }
+
+ /* Update header. */
+ if (where != LP_REPLACE || ele == NULL) {
+ uint32_t num_elements = lpGetNumElements(lp);
+ if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
+ if (ele)
+ lpSetNumElements(lp,num_elements+1);
+ else
+ lpSetNumElements(lp,num_elements-1);
+ }
+ }
+ lpSetTotalBytes(lp,new_listpack_bytes);
+ return lp;
+}
+
+/* Append the specified element 'ele' of length 'len' at the end of the
+ * listpack. It is implemented in terms of lpInsert(), so the return value is
+ * the same as lpInsert(). */
+unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {
+ uint64_t listpack_bytes = lpGetTotalBytes(lp);
+ unsigned char *eofptr = lp + listpack_bytes - 1;
+ return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL);
+}
+
+/* Remove the element pointed by 'p', and return the resulting listpack.
+ * If 'newp' is not NULL, the next element pointer (to the right of the
+ * deleted one) is returned by reference. If the deleted element was the
+ * last one, '*newp' is set to NULL. */
+unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {
+ return lpInsert(lp,NULL,0,p,LP_REPLACE,newp);
+}
+
+/* Return the total number of bytes the listpack is composed of. */
+uint32_t lpBytes(unsigned char *lp) {
+ return lpGetTotalBytes(lp);
+}
+
+/* Seek the specified element and returns the pointer to the seeked element.
+ * Positive indexes specify the zero-based element to seek from the head to
+ * the tail, negative indexes specify elements starting from the tail, where
+ * -1 means the last element, -2 the penultimate and so forth. If the index
+ * is out of range, NULL is returned. */
+unsigned char *lpSeek(unsigned char *lp, long index) {
+ int forward = 1; /* Seek forward by default. */
+
+ /* We want to seek from left to right or the other way around
+ * depending on the listpack length and the element position.
+ * However if the listpack length cannot be obtained in constant time,
+ * we always seek from left to right. */
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) {
+ if (index < 0) index = (long)numele+index;
+ if (index < 0) return NULL; /* Index still < 0 means out of range. */
+ if (index >= numele) return NULL; /* Out of range the other side. */
+ /* We want to scan right-to-left if the element we are looking for
+ * is past the half of the listpack. */
+ if (index > numele/2) {
+ forward = 0;
+ /* Left to right scanning always expects a negative index. Convert
+ * our index to negative form. */
+ index -= numele;
+ }
+ } else {
+ /* If the listpack length is unspecified, for negative indexes we
+ * want to always scan left-to-right. */
+ if (index < 0) forward = 0;
+ }
+
+ /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
+ if (forward) {
+ unsigned char *ele = lpFirst(lp);
+ while (index > 0 && ele) {
+ ele = lpNext(lp,ele);
+ index--;
+ }
+ return ele;
+ } else {
+ unsigned char *ele = lpLast(lp);
+ while (index < -1 && ele) {
+ ele = lpPrev(lp,ele);
+ index++;
+ }
+ return ele;
+ }
+}
+
diff --git a/src/listpack.h b/src/listpack.h
new file mode 100644
index 000000000..af67b4b41
--- /dev/null
+++ b/src/listpack.h
@@ -0,0 +1,61 @@
+/* Listpack -- A lists of strings serialization format
+ *
+ * This file implements the specification you can find at:
+ *
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __LISTPACK_H
+#define __LISTPACK_H
+
+#include <stdint.h>
+
+#define LP_INTBUF_SIZE 21 /* 20 digits of -2^63 + 1 null term = 21. */
+
+/* lpInsert() where argument possible values: */
+#define LP_BEFORE 0
+#define LP_AFTER 1
+#define LP_REPLACE 2
+
+unsigned char *lpNew(void);
+void lpFree(unsigned char *lp);
+unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp);
+unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size);
+unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
+uint32_t lpLength(unsigned char *lp);
+unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
+unsigned char *lpFirst(unsigned char *lp);
+unsigned char *lpLast(unsigned char *lp);
+unsigned char *lpNext(unsigned char *lp, unsigned char *p);
+unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
+uint32_t lpBytes(unsigned char *lp);
+unsigned char *lpSeek(unsigned char *lp, long index);
+
+#endif
diff --git a/src/listpack_malloc.h b/src/listpack_malloc.h
new file mode 100644
index 000000000..401ab6f74
--- /dev/null
+++ b/src/listpack_malloc.h
@@ -0,0 +1,45 @@
+/* Listpack -- A lists of strings serialization format
+ * https://github.com/antirez/listpack
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef LISTPACK_ALLOC_H
+#define LISTPACK_ALLOC_H
+#include "zmalloc.h"
+#define lp_malloc zmalloc
+#define lp_realloc zrealloc
+#define lp_free zfree
+#endif
diff --git a/src/localtime.c b/src/localtime.c
new file mode 100644
index 000000000..3f59a3331
--- /dev/null
+++ b/src/localtime.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <time.h>
+
+/* This is a safe version of localtime() which contains no locks and is
+ * fork() friendly. Even the _r version of localtime() cannot be used safely
+ * in Redis. Another thread may be calling localtime() while the main thread
+ * forks(). Later when the child process calls localtime() again, for instance
+ * in order to log something to the Redis log, it may deadlock: in the copy
+ * of the address space of the forked process the lock will never be released.
+ *
+ * This function takes the timezone 'tz' as argument, and the 'dst' flag is
+ * used to check if daylight saving time is currently in effect. The caller
+ * of this function should obtain such information calling tzset() ASAP in the
+ * main() function to obtain the timezone offset from the 'timezone' global
+ * variable. To obtain the daylight information, if it is currently active or not,
+ * one trick is to call localtime() in main() ASAP as well, and get the
+ * information from the tm_isdst field of the tm structure. However the daylight
+ * time may switch in the future for long running processes, so this information
+ * should be refreshed at safe times.
+ *
+ * Note that this function does not work for dates < 1/1/1970, it is solely
+ * designed to work with what time(NULL) may return, and to support Redis
+ * logging of the dates, it's not really a complete implementation. */
+static int is_leap_year(time_t year) {
+ if (year % 4) return 0; /* A year not divisible by 4 is not leap. */
+ else if (year % 100) return 1; /* If div by 4 and not 100 is surely leap. */
+ else if (year % 400) return 0; /* If div by 100 *and* 400 is not leap. */
+ else return 1; /* If div by 100 and not by 400 is leap. */
+}
+
+void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst) {
+ const time_t secs_min = 60;
+ const time_t secs_hour = 3600;
+ const time_t secs_day = 3600*24;
+
+ t -= tz; /* Adjust for timezone. */
+ t += 3600*dst; /* Adjust for daylight time. */
+ time_t days = t / secs_day; /* Days passed since epoch. */
+ time_t seconds = t % secs_day; /* Remaining seconds. */
+
+ tmp->tm_isdst = dst;
+ tmp->tm_hour = seconds / secs_hour;
+ tmp->tm_min = (seconds % secs_hour) / secs_min;
+ tmp->tm_sec = (seconds % secs_hour) % secs_min;
+
+ /* 1/1/1970 was a Thursday, that is, day 4 from the POV of the tm structure
+ * where sunday = 0, so to calculate the day of the week we have to add 4
+ * and take the modulo by 7. */
+ tmp->tm_wday = (days+4)%7;
+
+ /* Calculate the current year. */
+ tmp->tm_year = 1970;
+ while(1) {
+ /* Leap years have one day more. */
+ time_t days_this_year = 365 + is_leap_year(tmp->tm_year);
+ if (days_this_year > days) break;
+ days -= days_this_year;
+ tmp->tm_year++;
+ }
+ tmp->tm_yday = days; /* Number of day of the current year. */
+
+ /* We need to calculate in which month and day of the month we are. To do
+ * so we need to skip days according to how many days there are in each
+ * month, and adjust for the leap year that has one more day in February. */
+ int mdays[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+ mdays[1] += is_leap_year(tmp->tm_year);
+
+ tmp->tm_mon = 0;
+ while(days >= mdays[tmp->tm_mon]) {
+ days -= mdays[tmp->tm_mon];
+ tmp->tm_mon++;
+ }
+
+ tmp->tm_mday = days+1; /* Add 1 since our 'days' is zero-based. */
+ tmp->tm_year -= 1900; /* Surprisingly tm_year is year-1900. */
+}
+
+#ifdef LOCALTIME_TEST_MAIN
+#include <stdio.h>
+
+int main(void) {
+ /* Obtain timezone and daylight info. */
+ tzset(); /* Now 'timezome' global is populated. */
+ time_t t = time(NULL);
+ struct tm *aux = localtime(&t);
+ int daylight_active = aux->tm_isdst;
+
+ struct tm tm;
+ char buf[1024];
+
+ nolocks_localtime(&tm,t,timezone,daylight_active);
+ strftime(buf,sizeof(buf),"%d %b %H:%M:%S",&tm);
+ printf("[timezone: %d, dl: %d] %s\n", (int)timezone, (int)daylight_active, buf);
+}
+#endif
diff --git a/src/lzfP.h b/src/lzfP.h
index c6d2e096c..93c27b42d 100644
--- a/src/lzfP.h
+++ b/src/lzfP.h
@@ -79,7 +79,11 @@
* Unconditionally aligning does not cost very much, so do it if unsure
*/
#ifndef STRICT_ALIGN
-# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+# if !(defined(__i386) || defined (__amd64))
+# define STRICT_ALIGN 1
+# else
+# define STRICT_ALIGN 0
+# endif
#endif
/*
diff --git a/src/lzf_d.c b/src/lzf_d.c
index c32be8e87..93f43c27c 100644
--- a/src/lzf_d.c
+++ b/src/lzf_d.c
@@ -86,6 +86,8 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
#ifdef lzf_movsb
lzf_movsb (op, ip, ctrl);
#else
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
switch (ctrl)
{
case 32: *op++ = *ip++; case 31: *op++ = *ip++; case 30: *op++ = *ip++; case 29: *op++ = *ip++;
@@ -97,6 +99,7 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
case 8: *op++ = *ip++; case 7: *op++ = *ip++; case 6: *op++ = *ip++; case 5: *op++ = *ip++;
case 4: *op++ = *ip++; case 3: *op++ = *ip++; case 2: *op++ = *ip++; case 1: *op++ = *ip++;
}
+#pragma GCC diagnostic pop
#endif
}
else /* back reference */
@@ -163,17 +166,17 @@ lzf_decompress (const void *const in_data, unsigned int in_len,
break;
- case 9: *op++ = *ref++;
- case 8: *op++ = *ref++;
- case 7: *op++ = *ref++;
- case 6: *op++ = *ref++;
- case 5: *op++ = *ref++;
- case 4: *op++ = *ref++;
- case 3: *op++ = *ref++;
- case 2: *op++ = *ref++;
- case 1: *op++ = *ref++;
+ case 9: *op++ = *ref++; /* fall-thru */
+ case 8: *op++ = *ref++; /* fall-thru */
+ case 7: *op++ = *ref++; /* fall-thru */
+ case 6: *op++ = *ref++; /* fall-thru */
+ case 5: *op++ = *ref++; /* fall-thru */
+ case 4: *op++ = *ref++; /* fall-thru */
+ case 3: *op++ = *ref++; /* fall-thru */
+ case 2: *op++ = *ref++; /* fall-thru */
+ case 1: *op++ = *ref++; /* fall-thru */
case 0: *op++ = *ref++; /* two octets more */
- *op++ = *ref++;
+ *op++ = *ref++; /* fall-thru */
}
#endif
}
diff --git a/src/memtest.c b/src/memtest.c
index 39fc4fcaa..a455430f5 100644
--- a/src/memtest.c
+++ b/src/memtest.c
@@ -26,7 +26,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
-
+#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -90,7 +90,7 @@ void memtest_progress_step(size_t curr, size_t size, char c) {
/* Test that addressing is fine. Every location is populated with its own
* address, and finally verified. This test is very fast but may detect
* ASAP big issues with the memory subsystem. */
-void memtest_addressing(unsigned long *l, size_t bytes) {
+int memtest_addressing(unsigned long *l, size_t bytes, int interactive) {
unsigned long words = bytes/sizeof(unsigned long);
unsigned long j, *p;
@@ -99,48 +99,60 @@ void memtest_addressing(unsigned long *l, size_t bytes) {
for (j = 0; j < words; j++) {
*p = (unsigned long)p;
p++;
- if ((j & 0xffff) == 0) memtest_progress_step(j,words*2,'A');
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j,words*2,'A');
}
/* Test */
p = l;
for (j = 0; j < words; j++) {
if (*p != (unsigned long)p) {
- printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
- (void*) p, *p);
- exit(1);
+ if (interactive) {
+ printf("\n*** MEMORY ADDRESSING ERROR: %p contains %lu\n",
+ (void*) p, *p);
+ exit(1);
+ }
+ return 1;
}
p++;
- if ((j & 0xffff) == 0) memtest_progress_step(j+words,words*2,'A');
+ if ((j & 0xffff) == 0 && interactive)
+ memtest_progress_step(j+words,words*2,'A');
}
+ return 0;
}
/* Fill words stepping a single page at every write, so we continue to
* touch all the pages in the smallest amount of time reducing the
* effectiveness of caches, and making it hard for the OS to transfer
- * pages on the swap. */
-void memtest_fill_random(unsigned long *l, size_t bytes) {
+ * pages on the swap.
+ *
+ * In this test we can't call rand() since the system may be completely
+ * unable to handle library calls, so we have to resort to our own
+ * PRNG that only uses local state. We use an xorshift* PRNG. */
+#define xorshift64star_next() do { \
+ rseed ^= rseed >> 12; \
+ rseed ^= rseed << 25; \
+ rseed ^= rseed >> 27; \
+ rout = rseed * UINT64_C(2685821657736338717); \
+} while(0)
+
+void memtest_fill_random(unsigned long *l, size_t bytes, int interactive) {
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long iwords = words/step; /* words per iteration */
unsigned long off, w, *l1, *l2;
+ uint64_t rseed = UINT64_C(0xd13133de9afdb566); /* Just a random seed. */
+ uint64_t rout = 0;
assert((bytes & 4095) == 0);
for (off = 0; off < step; off++) {
l1 = l+off;
l2 = l1+words;
for (w = 0; w < iwords; w++) {
-#ifdef MEMTEST_32BIT
- *l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
- (((unsigned long) (rand()&0xffff)) << 16);
-#else
- *l1 = *l2 = ((unsigned long) (rand()&0xffff)) |
- (((unsigned long) (rand()&0xffff)) << 16) |
- (((unsigned long) (rand()&0xffff)) << 32) |
- (((unsigned long) (rand()&0xffff)) << 48);
-#endif
+ xorshift64star_next();
+ *l1 = *l2 = (unsigned long) rout;
l1 += step;
l2 += step;
- if ((w & 0xffff) == 0)
+ if ((w & 0xffff) == 0 && interactive)
memtest_progress_step(w+iwords*off,words,'R');
}
}
@@ -149,7 +161,7 @@ void memtest_fill_random(unsigned long *l, size_t bytes) {
/* Like memtest_fill_random() but uses the two specified values to fill
* memory, in an alternated way (v1|v2|v1|v2|...) */
void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
- unsigned long v2, char sym)
+ unsigned long v2, char sym, int interactive)
{
unsigned long step = 4096/sizeof(unsigned long);
unsigned long words = bytes/sizeof(unsigned long)/2;
@@ -173,13 +185,13 @@ void memtest_fill_value(unsigned long *l, size_t bytes, unsigned long v1,
#endif
l1 += step;
l2 += step;
- if ((w & 0xffff) == 0)
+ if ((w & 0xffff) == 0 && interactive)
memtest_progress_step(w+iwords*off,words,sym);
}
}
}
-void memtest_compare(unsigned long *l, size_t bytes) {
+int memtest_compare(unsigned long *l, size_t bytes, int interactive) {
unsigned long words = bytes/sizeof(unsigned long)/2;
unsigned long w, *l1, *l2;
@@ -188,85 +200,150 @@ void memtest_compare(unsigned long *l, size_t bytes) {
l2 = l1+words;
for (w = 0; w < words; w++) {
if (*l1 != *l2) {
- printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
- (void*)l1, (void*)l2, *l1, *l2);
- exit(1);
+ if (interactive) {
+ printf("\n*** MEMORY ERROR DETECTED: %p != %p (%lu vs %lu)\n",
+ (void*)l1, (void*)l2, *l1, *l2);
+ exit(1);
+ }
+ return 1;
}
l1 ++;
l2 ++;
- if ((w & 0xffff) == 0) memtest_progress_step(w,words,'=');
+ if ((w & 0xffff) == 0 && interactive)
+ memtest_progress_step(w,words,'=');
}
+ return 0;
}
-void memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times) {
+int memtest_compare_times(unsigned long *m, size_t bytes, int pass, int times,
+ int interactive)
+{
int j;
+ int errors = 0;
for (j = 0; j < times; j++) {
- memtest_progress_start("Compare",pass);
- memtest_compare(m,bytes);
- memtest_progress_end();
+ if (interactive) memtest_progress_start("Compare",pass);
+ errors += memtest_compare(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
}
+ return errors;
}
-void memtest_test(size_t megabytes, int passes) {
- size_t bytes = megabytes*1024*1024;
- unsigned long *m = malloc(bytes);
+/* Test the specified memory. The number of bytes must be multiple of 4096.
+ * If interactive is true the program exists with an error and prints
+ * ASCII arts to show progresses. Instead when interactive is 0, it can
+ * be used as an API call, and returns 1 if memory errors were found or
+ * 0 if there were no errors detected. */
+int memtest_test(unsigned long *m, size_t bytes, int passes, int interactive) {
int pass = 0;
+ int errors = 0;
- if (m == NULL) {
- fprintf(stderr,"Unable to allocate %zu megabytes: %s",
- megabytes, strerror(errno));
- exit(1);
- }
while (pass != passes) {
pass++;
- memtest_progress_start("Addressing test",pass);
- memtest_addressing(m,bytes);
- memtest_progress_end();
+ if (interactive) memtest_progress_start("Addressing test",pass);
+ errors += memtest_addressing(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
- memtest_progress_start("Random fill",pass);
- memtest_fill_random(m,bytes);
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Random fill",pass);
+ memtest_fill_random(m,bytes,interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
- memtest_progress_start("Solid fill",pass);
- memtest_fill_value(m,bytes,0,(unsigned long)-1,'S');
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Solid fill",pass);
+ memtest_fill_value(m,bytes,0,(unsigned long)-1,'S',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
- memtest_progress_start("Checkerboard fill",pass);
- memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C');
- memtest_progress_end();
- memtest_compare_times(m,bytes,pass,4);
+ if (interactive) memtest_progress_start("Checkerboard fill",pass);
+ memtest_fill_value(m,bytes,ULONG_ONEZERO,ULONG_ZEROONE,'C',interactive);
+ if (interactive) memtest_progress_end();
+ errors += memtest_compare_times(m,bytes,pass,4,interactive);
}
- free(m);
+ return errors;
}
-void memtest_non_destructive_invert(void *addr, size_t size) {
- volatile unsigned long *p = addr;
- size_t words = size / sizeof(unsigned long);
- size_t j;
+/* A version of memtest_test() that tests memory in small pieces
+ * in order to restore the memory content at exit.
+ *
+ * One problem we have with this approach, is that the cache can avoid
+ * real memory accesses, and we can't test big chunks of memory at the
+ * same time, because we need to backup them on the stack (the allocator
+ * may not be usable or we may be already in an out of memory condition).
+ * So what we do is to try to trash the cache with useless memory accesses
+ * between the fill and compare cycles. */
+#define MEMTEST_BACKUP_WORDS (1024*(1024/sizeof(long)))
+/* Random accesses of MEMTEST_DECACHE_SIZE are performed at the start and
+ * end of the region between fill and compare cycles in order to trash
+ * the cache. */
+#define MEMTEST_DECACHE_SIZE (1024*8)
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes) {
+ unsigned long backup[MEMTEST_BACKUP_WORDS];
+ unsigned long *p = m;
+ unsigned long *end = (unsigned long*) (((unsigned char*)m)+(bytes-MEMTEST_DECACHE_SIZE));
+ size_t left = bytes;
+ int errors = 0;
- /* Invert */
- for (j = 0; j < words; j++)
- p[j] = ~p[j];
-}
+ if (bytes & 4095) return 0; /* Can't test across 4k page boundaries. */
+ if (bytes < 4096*2) return 0; /* Can't test a single page. */
+
+ while(left) {
+ /* If we have to test a single final page, go back a single page
+ * so that we can test two pages, since the code can't test a single
+ * page but at least two. */
+ if (left == 4096) {
+ left += 4096;
+ p -= 4096/sizeof(unsigned long);
+ }
-void memtest_non_destructive_swap(void *addr, size_t size) {
- volatile unsigned long *p = addr;
- size_t words = size / sizeof(unsigned long);
- size_t j;
+ int pass = 0;
+ size_t len = (left > sizeof(backup)) ? sizeof(backup) : left;
- /* Swap */
- for (j = 0; j < words; j += 2) {
- unsigned long a, b;
+ /* Always test an even number of pages. */
+ if (len/4096 % 2) len -= 4096;
- a = p[j];
- b = p[j+1];
- p[j] = b;
- p[j+1] = a;
+ memcpy(backup,p,len); /* Backup. */
+ while(pass != passes) {
+ pass++;
+ errors += memtest_addressing(p,len,0);
+ memtest_fill_random(p,len,0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,0,(unsigned long)-1,'S',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ memtest_fill_value(p,len,ULONG_ONEZERO,ULONG_ZEROONE,'C',0);
+ if (bytes >= MEMTEST_DECACHE_SIZE) {
+ memtest_compare_times(m,MEMTEST_DECACHE_SIZE,pass,1,0);
+ memtest_compare_times(end,MEMTEST_DECACHE_SIZE,pass,1,0);
+ }
+ errors += memtest_compare_times(p,len,pass,4,0);
+ }
+ memcpy(p,backup,len); /* Restore. */
+ left -= len;
+ p += len/sizeof(unsigned long);
}
+ return errors;
+}
+
+/* Perform an interactive test allocating the specified number of megabytes. */
+void memtest_alloc_and_test(size_t megabytes, int passes) {
+ size_t bytes = megabytes*1024*1024;
+ unsigned long *m = malloc(bytes);
+
+ if (m == NULL) {
+ fprintf(stderr,"Unable to allocate %zu megabytes: %s",
+ megabytes, strerror(errno));
+ exit(1);
+ }
+ memtest_test(m,bytes,passes,1);
+ free(m);
}
void memtest(size_t megabytes, int passes) {
@@ -274,7 +351,7 @@ void memtest(size_t megabytes, int passes) {
ws.ws_col = 80;
ws.ws_row = 20;
}
- memtest_test(megabytes,passes);
+ memtest_alloc_and_test(megabytes,passes);
printf("\nYour memory passed this test.\n");
printf("Please if you are still in doubt use the following two tools:\n");
printf("1) memtest86: http://www.memtest86.com/\n");
diff --git a/src/module.c b/src/module.c
new file mode 100644
index 000000000..3735cbe24
--- /dev/null
+++ b/src/module.c
@@ -0,0 +1,4703 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "cluster.h"
+#include <dlfcn.h>
+
+#define REDISMODULE_CORE 1
+#include "redismodule.h"
+
+/* --------------------------------------------------------------------------
+ * Private data structures used by the modules system. Those are data
+ * structures that are never exposed to Redis Modules, if not as void
+ * pointers that have an API the module can call with them)
+ * -------------------------------------------------------------------------- */
+
+/* This structure represents a module inside the system. */
+struct RedisModule {
+ void *handle; /* Module dlopen() handle. */
+ char *name; /* Module name. */
+ int ver; /* Module version. We use just progressive integers. */
+ int apiver; /* Module API version as requested during initialization.*/
+ list *types; /* Module data types. */
+};
+typedef struct RedisModule RedisModule;
+
+static dict *modules; /* Hash table of modules. SDS -> RedisModule ptr.*/
+
+/* Entries in the context->amqueue array, representing objects to free
+ * when the callback returns. */
+struct AutoMemEntry {
+ void *ptr;
+ int type;
+};
+
+/* AutMemEntry type field values. */
+#define REDISMODULE_AM_KEY 0
+#define REDISMODULE_AM_STRING 1
+#define REDISMODULE_AM_REPLY 2
+#define REDISMODULE_AM_FREED 3 /* Explicitly freed by user already. */
+
+/* The pool allocator block. Redis Modules can allocate memory via this special
+ * allocator that will automatically release it all once the callback returns.
+ * This means that it can only be used for ephemeral allocations. However
+ * there are two advantages for modules to use this API:
+ *
+ * 1) The memory is automatically released when the callback returns.
+ * 2) This allocator is faster for many small allocations since whole blocks
+ * are allocated, and small pieces returned to the caller just advancing
+ * the index of the allocation.
+ *
+ * Allocations are always rounded to the size of the void pointer in order
+ * to always return aligned memory chunks. */
+
+#define REDISMODULE_POOL_ALLOC_MIN_SIZE (1024*8)
+#define REDISMODULE_POOL_ALLOC_ALIGN (sizeof(void*))
+
+typedef struct RedisModulePoolAllocBlock {
+ uint32_t size;
+ uint32_t used;
+ struct RedisModulePoolAllocBlock *next;
+ char memory[];
+} RedisModulePoolAllocBlock;
+
+/* This structure represents the context in which Redis modules operate.
+ * Most APIs module can access, get a pointer to the context, so that the API
+ * implementation can hold state across calls, or remember what to free after
+ * the call and so forth.
+ *
+ * Note that not all the context structure is always filled with actual values
+ * but only the fields needed in a given context. */
+
+struct RedisModuleBlockedClient;
+
+struct RedisModuleCtx {
+ void *getapifuncptr; /* NOTE: Must be the first field. */
+ struct RedisModule *module; /* Module reference. */
+ client *client; /* Client calling a command. */
+ struct RedisModuleBlockedClient *blocked_client; /* Blocked client for
+ thread safe context. */
+ struct AutoMemEntry *amqueue; /* Auto memory queue of objects to free. */
+ int amqueue_len; /* Number of slots in amqueue. */
+ int amqueue_used; /* Number of used slots in amqueue. */
+ int flags; /* REDISMODULE_CTX_... flags. */
+ void **postponed_arrays; /* To set with RM_ReplySetArrayLength(). */
+ int postponed_arrays_count; /* Number of entries in postponed_arrays. */
+ void *blocked_privdata; /* Privdata set when unblocking a client. */
+
+ /* Used if there is the REDISMODULE_CTX_KEYS_POS_REQUEST flag set. */
+ int *keys_pos;
+ int keys_count;
+
+ struct RedisModulePoolAllocBlock *pa_head;
+};
+typedef struct RedisModuleCtx RedisModuleCtx;
+
+#define REDISMODULE_CTX_INIT {(void*)(unsigned long)&RM_GetApi, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, 0, NULL, NULL, 0, NULL}
+#define REDISMODULE_CTX_MULTI_EMITTED (1<<0)
+#define REDISMODULE_CTX_AUTO_MEMORY (1<<1)
+#define REDISMODULE_CTX_KEYS_POS_REQUEST (1<<2)
+#define REDISMODULE_CTX_BLOCKED_REPLY (1<<3)
+#define REDISMODULE_CTX_BLOCKED_TIMEOUT (1<<4)
+#define REDISMODULE_CTX_THREAD_SAFE (1<<5)
+#define REDISMODULE_CTX_BLOCKED_DISCONNECTED (1<<6)
+
+/* This represents a Redis key opened with RM_OpenKey(). */
+struct RedisModuleKey {
+ RedisModuleCtx *ctx;
+ redisDb *db;
+ robj *key; /* Key name object. */
+ robj *value; /* Value object, or NULL if the key was not found. */
+ void *iter; /* Iterator. */
+ int mode; /* Opening mode. */
+
+ /* Zset iterator. */
+ uint32_t ztype; /* REDISMODULE_ZSET_RANGE_* */
+ zrangespec zrs; /* Score range. */
+ zlexrangespec zlrs; /* Lex range. */
+ uint32_t zstart; /* Start pos for positional ranges. */
+ uint32_t zend; /* End pos for positional ranges. */
+ void *zcurrent; /* Zset iterator current node. */
+ int zer; /* Zset iterator end reached flag
+ (true if end was reached). */
+};
+typedef struct RedisModuleKey RedisModuleKey;
+
+/* RedisModuleKey 'ztype' values. */
+#define REDISMODULE_ZSET_RANGE_NONE 0 /* This must always be 0. */
+#define REDISMODULE_ZSET_RANGE_LEX 1
+#define REDISMODULE_ZSET_RANGE_SCORE 2
+#define REDISMODULE_ZSET_RANGE_POS 3
+
+/* Function pointer type of a function representing a command inside
+ * a Redis module. */
+struct RedisModuleBlockedClient;
+typedef int (*RedisModuleCmdFunc) (RedisModuleCtx *ctx, void **argv, int argc);
+typedef void (*RedisModuleDisconnectFunc) (RedisModuleCtx *ctx, struct RedisModuleBlockedClient *bc);
+
+/* This struct holds the information about a command registered by a module.*/
+struct RedisModuleCommandProxy {
+ struct RedisModule *module;
+ RedisModuleCmdFunc func;
+ struct redisCommand *rediscmd;
+};
+typedef struct RedisModuleCommandProxy RedisModuleCommandProxy;
+
+#define REDISMODULE_REPLYFLAG_NONE 0
+#define REDISMODULE_REPLYFLAG_TOPARSE (1<<0) /* Protocol must be parsed. */
+#define REDISMODULE_REPLYFLAG_NESTED (1<<1) /* Nested reply object. No proto
+ or struct free. */
+
+/* Reply of RM_Call() function. The function is filled in a lazy
+ * way depending on the function called on the reply structure. By default
+ * only the type, proto and protolen are filled. */
+typedef struct RedisModuleCallReply {
+ RedisModuleCtx *ctx;
+ int type; /* REDISMODULE_REPLY_... */
+ int flags; /* REDISMODULE_REPLYFLAG_... */
+ size_t len; /* Len of strings or num of elements of arrays. */
+ char *proto; /* Raw reply protocol. An SDS string at top-level object. */
+ size_t protolen;/* Length of protocol. */
+ union {
+ const char *str; /* String pointer for string and error replies. This
+ does not need to be freed, always points inside
+ a reply->proto buffer of the reply object or, in
+ case of array elements, of parent reply objects. */
+ long long ll; /* Reply value for integer reply. */
+ struct RedisModuleCallReply *array; /* Array of sub-reply elements. */
+ } val;
+} RedisModuleCallReply;
+
+/* Structure representing a blocked client. We get a pointer to such
+ * an object when blocking from modules. */
+typedef struct RedisModuleBlockedClient {
+ client *client; /* Pointer to the blocked client. or NULL if the client
+ was destroyed during the life of this object. */
+ RedisModule *module; /* Module blocking the client. */
+ RedisModuleCmdFunc reply_callback; /* Reply callback on normal completion.*/
+ RedisModuleCmdFunc timeout_callback; /* Reply callback on timeout. */
+ RedisModuleDisconnectFunc disconnect_callback; /* Called on disconnection.*/
+ void (*free_privdata)(RedisModuleCtx*,void*);/* privdata cleanup callback.*/
+ void *privdata; /* Module private data that may be used by the reply
+ or timeout callback. It is set via the
+ RedisModule_UnblockClient() API. */
+ client *reply_client; /* Fake client used to accumulate replies
+ in thread safe contexts. */
+ int dbid; /* Database number selected by the original client. */
+} RedisModuleBlockedClient;
+
+static pthread_mutex_t moduleUnblockedClientsMutex = PTHREAD_MUTEX_INITIALIZER;
+static list *moduleUnblockedClients;
+
+/* We need a mutex that is unlocked / relocked in beforeSleep() in order to
+ * allow thread safe contexts to execute commands at a safe moment. */
+static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER;
+
+
+/* Function pointer type for keyspace event notification subscriptions from modules. */
+typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);
+
+/* Keyspace notification subscriber information.
+ * See RM_SubscribeToKeyspaceEvents() for more information. */
+typedef struct RedisModuleKeyspaceSubscriber {
+ /* The module subscribed to the event */
+ RedisModule *module;
+ /* Notification callback in the module*/
+ RedisModuleNotificationFunc notify_callback;
+ /* A bit mask of the events the module is interested in */
+ int event_mask;
+ /* Active flag set on entry, to avoid reentrant subscribers
+ * calling themselves */
+ int active;
+} RedisModuleKeyspaceSubscriber;
+
+/* The module keyspace notification subscribers list */
+static list *moduleKeyspaceSubscribers;
+
+/* Static client recycled for all notification clients, to avoid allocating
+ * per round. */
+static client *moduleKeyspaceSubscribersClient;
+
+/* --------------------------------------------------------------------------
+ * Prototypes
+ * -------------------------------------------------------------------------- */
+
+void RM_FreeCallReply(RedisModuleCallReply *reply);
+void RM_CloseKey(RedisModuleKey *key);
+void autoMemoryCollect(RedisModuleCtx *ctx);
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap);
+void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx);
+void RM_ZsetRangeStop(RedisModuleKey *kp);
+static void zsetKeyReset(RedisModuleKey *key);
+
+/* --------------------------------------------------------------------------
+ * Heap allocation raw functions
+ * -------------------------------------------------------------------------- */
+
+/* Use like malloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using malloc(). */
+void *RM_Alloc(size_t bytes) {
+ return zmalloc(bytes);
+}
+
+/* Use like calloc(). Memory allocated with this function is reported in
+ * Redis INFO memory, used for keys eviction according to maxmemory settings
+ * and in general is taken into account as memory allocated by Redis.
+ * You should avoid using calloc() directly. */
+void *RM_Calloc(size_t nmemb, size_t size) {
+ return zcalloc(nmemb*size);
+}
+
+/* Use like realloc() for memory obtained with RedisModule_Alloc(). */
+void* RM_Realloc(void *ptr, size_t bytes) {
+ return zrealloc(ptr,bytes);
+}
+
+/* Use like free() for memory obtained by RedisModule_Alloc() and
+ * RedisModule_Realloc(). However you should never try to free with
+ * RedisModule_Free() memory allocated with malloc() inside your module. */
+void RM_Free(void *ptr) {
+ zfree(ptr);
+}
+
+/* Like strdup() but returns memory allocated with RedisModule_Alloc(). */
+char *RM_Strdup(const char *str) {
+ return zstrdup(str);
+}
+
+/* --------------------------------------------------------------------------
+ * Pool allocator
+ * -------------------------------------------------------------------------- */
+
+/* Release the chain of blocks used for pool allocations. */
+void poolAllocRelease(RedisModuleCtx *ctx) {
+ RedisModulePoolAllocBlock *head = ctx->pa_head, *next;
+
+ while(head != NULL) {
+ next = head->next;
+ zfree(head);
+ head = next;
+ }
+ ctx->pa_head = NULL;
+}
+
+/* Return heap allocated memory that will be freed automatically when the
+ * module callback function returns. Mostly suitable for small allocations
+ * that are short living and must be released when the callback returns
+ * anyway. The returned memory is aligned to the architecture word size
+ * if at least word size bytes are requested, otherwise it is just
+ * aligned to the next power of two, so for example a 3 bytes request is
+ * 4 bytes aligned while a 2 bytes request is 2 bytes aligned.
+ *
+ * There is no realloc style function since when this is needed to use the
+ * pool allocator is not a good idea.
+ *
+ * The function returns NULL if `bytes` is 0. */
+void *RM_PoolAlloc(RedisModuleCtx *ctx, size_t bytes) {
+ if (bytes == 0) return NULL;
+ RedisModulePoolAllocBlock *b = ctx->pa_head;
+ size_t left = b ? b->size - b->used : 0;
+
+ /* Fix alignment. */
+ if (left >= bytes) {
+ size_t alignment = REDISMODULE_POOL_ALLOC_ALIGN;
+ while (bytes < alignment && alignment/2 >= bytes) alignment /= 2;
+ if (b->used % alignment)
+ b->used += alignment - (b->used % alignment);
+ left = (b->used > b->size) ? 0 : b->size - b->used;
+ }
+
+ /* Create a new block if needed. */
+ if (left < bytes) {
+ size_t blocksize = REDISMODULE_POOL_ALLOC_MIN_SIZE;
+ if (blocksize < bytes) blocksize = bytes;
+ b = zmalloc(sizeof(*b) + blocksize);
+ b->size = blocksize;
+ b->used = 0;
+ b->next = ctx->pa_head;
+ ctx->pa_head = b;
+ }
+
+ char *retval = b->memory + b->used;
+ b->used += bytes;
+ return retval;
+}
+
+/* --------------------------------------------------------------------------
+ * Helpers for modules API implementation
+ * -------------------------------------------------------------------------- */
+
+/* Create an empty key of the specified type. 'kp' must point to a key object
+ * opened for writing where the .value member is set to NULL because the
+ * key was found to be non existing.
+ *
+ * On success REDISMODULE_OK is returned and the key is populated with
+ * the value of the specified type. The function fails and returns
+ * REDISMODULE_ERR if:
+ *
+ * 1) The key is not open for writing.
+ * 2) The key is not empty.
+ * 3) The specified type is unknown.
+ */
+int moduleCreateEmptyKey(RedisModuleKey *key, int type) {
+ robj *obj;
+
+ /* The key must be open for writing and non existing to proceed. */
+ if (!(key->mode & REDISMODULE_WRITE) || key->value)
+ return REDISMODULE_ERR;
+
+ switch(type) {
+ case REDISMODULE_KEYTYPE_LIST:
+ obj = createQuicklistObject();
+ quicklistSetOptions(obj->ptr, server.list_max_ziplist_size,
+ server.list_compress_depth);
+ break;
+ case REDISMODULE_KEYTYPE_ZSET:
+ obj = createZsetZiplistObject();
+ break;
+ case REDISMODULE_KEYTYPE_HASH:
+ obj = createHashObject();
+ break;
+ default: return REDISMODULE_ERR;
+ }
+ dbAdd(key->db,key->key,obj);
+ key->value = obj;
+ return REDISMODULE_OK;
+}
+
+/* This function is called in low-level API implementation functions in order
+ * to check if the value associated with the key remained empty after an
+ * operation that removed elements from an aggregate data type.
+ *
+ * If this happens, the key is deleted from the DB and the key object state
+ * is set to the right one in order to be targeted again by write operations
+ * possibly recreating the key if needed.
+ *
+ * The function returns 1 if the key value object is found empty and is
+ * deleted, otherwise 0 is returned. */
+int moduleDelKeyIfEmpty(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL) return 0;
+ int isempty;
+ robj *o = key->value;
+
+ switch(o->type) {
+ case OBJ_LIST: isempty = listTypeLength(o) == 0; break;
+ case OBJ_SET: isempty = setTypeSize(o) == 0; break;
+ case OBJ_ZSET: isempty = zsetLength(o) == 0; break;
+ case OBJ_HASH : isempty = hashTypeLength(o) == 0; break;
+ default: isempty = 0;
+ }
+
+ if (isempty) {
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Service API exported to modules
+ *
+ * Note that all the exported APIs are called RM_<funcname> in the core
+ * and RedisModule_<funcname> in the module side (defined as function
+ * pointers in redismodule.h). In this way the dynamic linker does not
+ * mess with our global function pointers, overriding it with the symbols
+ * defined in the main executable having the same names.
+ * -------------------------------------------------------------------------- */
+
+/* Lookup the requested module API and store the function pointer into the
+ * target pointer. The function returns REDISMODULE_ERR if there is no such
+ * named API, otherwise REDISMODULE_OK.
+ *
+ * This function is not meant to be used by modules developer, it is only
+ * used implicitly by including redismodule.h. */
+int RM_GetApi(const char *funcname, void **targetPtrPtr) {
+ dictEntry *he = dictFind(server.moduleapi, funcname);
+ if (!he) return REDISMODULE_ERR;
+ *targetPtrPtr = dictGetVal(he);
+ return REDISMODULE_OK;
+}
+
+/* Free the context after the user function was called. */
+void moduleFreeContext(RedisModuleCtx *ctx) {
+ autoMemoryCollect(ctx);
+ poolAllocRelease(ctx);
+ if (ctx->postponed_arrays) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays_count = 0;
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplyWithArray(REDISMODULE_POSTPONED_ARRAY_LEN) "
+ "not matched by the same number of RedisModule_SetReplyArrayLen() "
+ "calls.",
+ ctx->module->name);
+ }
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) freeClient(ctx->client);
+}
+
+/* Helper function for when a command callback is called, in order to handle
+ * details needed to correctly replicate commands. */
+void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) {
+ client *c = ctx->client;
+
+ if (c->flags & CLIENT_LUA) return;
+
+ /* Handle the replication of the final EXEC, since whatever a command
+ * emits is always wrappered around MULTI/EXEC. */
+ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) {
+ robj *propargv[1];
+ propargv[0] = createStringObject("EXEC",4);
+ alsoPropagate(server.execCommand,c->db->id,propargv,1,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(propargv[0]);
+ }
+}
+
+/* This Redis command binds the normal Redis command invocation with commands
+ * exported by modules. */
+void RedisModuleCommandDispatcher(client *c) {
+ RedisModuleCommandProxy *cp = (void*)(unsigned long)c->cmd->getkeys_proc;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ ctx.module = cp->module;
+ ctx.client = c;
+ cp->func(&ctx,(void**)c->argv,c->argc);
+ moduleHandlePropagationAfterCommandCallback(&ctx);
+ moduleFreeContext(&ctx);
+}
+
+/* This function returns the list of keys, with the same interface as the
+ * 'getkeys' function of the native commands, for module commands that exported
+ * the "getkeys-api" flag during the registration. This is done when the
+ * list of keys are not at fixed positions, so that first/last/step cannot
+ * be used.
+ *
+ * In order to accomplish its work, the module command is called, flagging
+ * the context in a way that the command can recognize this is a special
+ * "get keys" call by calling RedisModule_IsKeysPositionRequest(ctx). */
+int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
+ RedisModuleCommandProxy *cp = (void*)(unsigned long)cmd->getkeys_proc;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ ctx.module = cp->module;
+ ctx.client = NULL;
+ ctx.flags |= REDISMODULE_CTX_KEYS_POS_REQUEST;
+ cp->func(&ctx,(void**)argv,argc);
+ int *res = ctx.keys_pos;
+ if (numkeys) *numkeys = ctx.keys_count;
+ moduleFreeContext(&ctx);
+ return res;
+}
+
+/* Return non-zero if a module command, that was declared with the
+ * flag "getkeys-api", is called in a special way to get the keys positions
+ * and not to get executed. Otherwise zero is returned. */
+int RM_IsKeysPositionRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST) != 0;
+}
+
+/* When a module command is called in order to obtain the position of
+ * keys, since it was flagged as "getkeys-api" during the registration,
+ * the command implementation checks for this special call using the
+ * RedisModule_IsKeysPositionRequest() API and uses this function in
+ * order to report keys, like in the following example:
+ *
+ * if (RedisModule_IsKeysPositionRequest(ctx)) {
+ * RedisModule_KeyAtPos(ctx,1);
+ * RedisModule_KeyAtPos(ctx,2);
+ * }
+ *
+ * Note: in the example below the get keys API would not be needed since
+ * keys are at fixed positions. This interface is only used for commands
+ * with a more complex structure. */
+void RM_KeyAtPos(RedisModuleCtx *ctx, int pos) {
+ if (!(ctx->flags & REDISMODULE_CTX_KEYS_POS_REQUEST)) return;
+ if (pos <= 0) return;
+ ctx->keys_pos = zrealloc(ctx->keys_pos,sizeof(int)*(ctx->keys_count+1));
+ ctx->keys_pos[ctx->keys_count++] = pos;
+}
+
+/* Helper for RM_CreateCommand(). Turns a string representing command
+ * flags into the command flags used by the Redis core.
+ *
+ * It returns the set of flags, or -1 if unknown flags are found. */
+int commandFlagsFromString(char *s) {
+ int count, j;
+ int flags = 0;
+ sds *tokens = sdssplitlen(s,strlen(s)," ",1,&count);
+ for (j = 0; j < count; j++) {
+ char *t = tokens[j];
+ if (!strcasecmp(t,"write")) flags |= CMD_WRITE;
+ else if (!strcasecmp(t,"readonly")) flags |= CMD_READONLY;
+ else if (!strcasecmp(t,"admin")) flags |= CMD_ADMIN;
+ else if (!strcasecmp(t,"deny-oom")) flags |= CMD_DENYOOM;
+ else if (!strcasecmp(t,"deny-script")) flags |= CMD_NOSCRIPT;
+ else if (!strcasecmp(t,"allow-loading")) flags |= CMD_LOADING;
+ else if (!strcasecmp(t,"pubsub")) flags |= CMD_PUBSUB;
+ else if (!strcasecmp(t,"random")) flags |= CMD_RANDOM;
+ else if (!strcasecmp(t,"allow-stale")) flags |= CMD_STALE;
+ else if (!strcasecmp(t,"no-monitor")) flags |= CMD_SKIP_MONITOR;
+ else if (!strcasecmp(t,"fast")) flags |= CMD_FAST;
+ else if (!strcasecmp(t,"getkeys-api")) flags |= CMD_MODULE_GETKEYS;
+ else if (!strcasecmp(t,"no-cluster")) flags |= CMD_MODULE_NO_CLUSTER;
+ else break;
+ }
+ sdsfreesplitres(tokens,count);
+ if (j != count) return -1; /* Some token not processed correctly. */
+ return flags;
+}
+
+/* Register a new command in the Redis server, that will be handled by
+ * calling the function pointer 'func' using the RedisModule calling
+ * convention. The function returns REDISMODULE_ERR if the specified command
+ * name is already busy or a set of invalid flags were passed, otherwise
+ * REDISMODULE_OK is returned and the new command is registered.
+ *
+ * This function must be called during the initialization of the module
+ * inside the RedisModule_OnLoad() function. Calling this function outside
+ * of the initialization function is not defined.
+ *
+ * The command function type is the following:
+ *
+ * int MyCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+ *
+ * And is supposed to always return REDISMODULE_OK.
+ *
+ * The set of flags 'strflags' specify the behavior of the command, and should
+ * be passed as a C string composed of space separated words, like for
+ * example "write deny-oom". The set of flags are:
+ *
+ * * **"write"**: The command may modify the data set (it may also read
+ * from it).
+ * * **"readonly"**: The command returns data from keys but never writes.
+ * * **"admin"**: The command is an administrative command (may change
+ * replication or perform similar tasks).
+ * * **"deny-oom"**: The command may use additional memory and should be
+ * denied during out of memory conditions.
+ * * **"deny-script"**: Don't allow this command in Lua scripts.
+ * * **"allow-loading"**: Allow this command while the server is loading data.
+ * Only commands not interacting with the data set
+ * should be allowed to run in this mode. If not sure
+ * don't use this flag.
+ * * **"pubsub"**: The command publishes things on Pub/Sub channels.
+ * * **"random"**: The command may have different outputs even starting
+ * from the same input arguments and key values.
+ * * **"allow-stale"**: The command is allowed to run on slaves that don't
+ * serve stale data. Don't use if you don't know what
+ * this means.
+ * * **"no-monitor"**: Don't propagate the command on monitor. Use this if
+ * the command has sensible data among the arguments.
+ * * **"fast"**: The command time complexity is not greater
+ * than O(log(N)) where N is the size of the collection or
+ * anything else representing the normal scalability
+ * issue with the command.
+ * * **"getkeys-api"**: The command implements the interface to return
+ * the arguments that are keys. Used when start/stop/step
+ * is not enough because of the command syntax.
+ * * **"no-cluster"**: The command should not register in Redis Cluster
+ * since is not designed to work with it because, for
+ * example, is unable to report the position of the
+ * keys, programmatically creates key names, or any
+ * other reason.
+ */
+int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) {
+ int flags = strflags ? commandFlagsFromString((char*)strflags) : 0;
+ if (flags == -1) return REDISMODULE_ERR;
+ if ((flags & CMD_MODULE_NO_CLUSTER) && server.cluster_enabled)
+ return REDISMODULE_ERR;
+
+ struct redisCommand *rediscmd;
+ RedisModuleCommandProxy *cp;
+ sds cmdname = sdsnew(name);
+
+ /* Check if the command name is busy. */
+ if (lookupCommand(cmdname) != NULL) {
+ sdsfree(cmdname);
+ return REDISMODULE_ERR;
+ }
+
+ /* Create a command "proxy", which is a structure that is referenced
+ * in the command table, so that the generic command that works as
+ * binding between modules and Redis, can know what function to call
+ * and what the module is.
+ *
+ * Note that we use the Redis command table 'getkeys_proc' in order to
+ * pass a reference to the command proxy structure. */
+ cp = zmalloc(sizeof(*cp));
+ cp->module = ctx->module;
+ cp->func = cmdfunc;
+ cp->rediscmd = zmalloc(sizeof(*rediscmd));
+ cp->rediscmd->name = cmdname;
+ cp->rediscmd->proc = RedisModuleCommandDispatcher;
+ cp->rediscmd->arity = -1;
+ cp->rediscmd->flags = flags | CMD_MODULE;
+ cp->rediscmd->getkeys_proc = (redisGetKeysProc*)(unsigned long)cp;
+ cp->rediscmd->firstkey = firstkey;
+ cp->rediscmd->lastkey = lastkey;
+ cp->rediscmd->keystep = keystep;
+ cp->rediscmd->microseconds = 0;
+ cp->rediscmd->calls = 0;
+ dictAdd(server.commands,sdsdup(cmdname),cp->rediscmd);
+ dictAdd(server.orig_commands,sdsdup(cmdname),cp->rediscmd);
+ return REDISMODULE_OK;
+}
+
+/* Called by RM_Init() to setup the `ctx->module` structure.
+ *
+ * This is an internal function, Redis modules developers don't need
+ * to use it. */
+void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int apiver) {
+ RedisModule *module;
+
+ if (ctx->module != NULL) return;
+ module = zmalloc(sizeof(*module));
+ module->name = sdsnew((char*)name);
+ module->ver = ver;
+ module->apiver = apiver;
+ module->types = listCreate();
+ ctx->module = module;
+}
+
+/* Return non-zero if the module name is busy.
+ * Otherwise zero is returned. */
+int RM_IsModuleNameBusy(const char *name) {
+ sds modulename = sdsnew(name);
+ dictEntry *de = dictFind(modules,modulename);
+ sdsfree(modulename);
+ return de != NULL;
+}
+
+/* Return the current UNIX time in milliseconds. */
+long long RM_Milliseconds(void) {
+ return mstime();
+}
+
+/* --------------------------------------------------------------------------
+ * Automatic memory management for modules
+ * -------------------------------------------------------------------------- */
+
+/* Enable automatic memory management. See API.md for more information.
+ *
+ * The function must be called as the first function of a command implementation
+ * that wants to use automatic memory. */
+void RM_AutoMemory(RedisModuleCtx *ctx) {
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+}
+
+/* Add a new object to release automatically when the callback returns. */
+void autoMemoryAdd(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ if (ctx->amqueue_used == ctx->amqueue_len) {
+ ctx->amqueue_len *= 2;
+ if (ctx->amqueue_len < 16) ctx->amqueue_len = 16;
+ ctx->amqueue = zrealloc(ctx->amqueue,sizeof(struct AutoMemEntry)*ctx->amqueue_len);
+ }
+ ctx->amqueue[ctx->amqueue_used].type = type;
+ ctx->amqueue[ctx->amqueue_used].ptr = ptr;
+ ctx->amqueue_used++;
+}
+
+/* Mark an object as freed in the auto release queue, so that users can still
+ * free things manually if they want.
+ *
+ * The function returns 1 if the object was actually found in the auto memory
+ * pool, otherwise 0 is returned. */
+int autoMemoryFreed(RedisModuleCtx *ctx, int type, void *ptr) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return 0;
+
+ int count = (ctx->amqueue_used+1)/2;
+ for (int j = 0; j < count; j++) {
+ for (int side = 0; side < 2; side++) {
+ /* For side = 0 check right side of the array, for
+ * side = 1 check the left side instead (zig-zag scanning). */
+ int i = (side == 0) ? (ctx->amqueue_used - 1 - j) : j;
+ if (ctx->amqueue[i].type == type &&
+ ctx->amqueue[i].ptr == ptr)
+ {
+ ctx->amqueue[i].type = REDISMODULE_AM_FREED;
+
+ /* Switch the freed element and the last element, to avoid growing
+ * the queue unnecessarily if we allocate/free in a loop */
+ if (i != ctx->amqueue_used-1) {
+ ctx->amqueue[i] = ctx->amqueue[ctx->amqueue_used-1];
+ }
+
+ /* Reduce the size of the queue because we either moved the top
+ * element elsewhere or freed it */
+ ctx->amqueue_used--;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Release all the objects in queue. */
+void autoMemoryCollect(RedisModuleCtx *ctx) {
+ if (!(ctx->flags & REDISMODULE_CTX_AUTO_MEMORY)) return;
+ /* Clear the AUTO_MEMORY flag from the context, otherwise the functions
+ * we call to free the resources, will try to scan the auto release
+ * queue to mark the entries as freed. */
+ ctx->flags &= ~REDISMODULE_CTX_AUTO_MEMORY;
+ int j;
+ for (j = 0; j < ctx->amqueue_used; j++) {
+ void *ptr = ctx->amqueue[j].ptr;
+ switch(ctx->amqueue[j].type) {
+ case REDISMODULE_AM_STRING: decrRefCount(ptr); break;
+ case REDISMODULE_AM_REPLY: RM_FreeCallReply(ptr); break;
+ case REDISMODULE_AM_KEY: RM_CloseKey(ptr); break;
+ }
+ }
+ ctx->flags |= REDISMODULE_CTX_AUTO_MEMORY;
+ zfree(ctx->amqueue);
+ ctx->amqueue = NULL;
+ ctx->amqueue_len = 0;
+ ctx->amqueue_used = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * String objects APIs
+ * -------------------------------------------------------------------------- */
+
+/* Create a new module string object. The returned string must be freed
+ * with RedisModule_FreeString(), unless automatic memory is enabled.
+ *
+ * The string is created by copying the `len` bytes starting
+ * at `ptr`. No reference is retained to the passed buffer. */
+RedisModuleString *RM_CreateString(RedisModuleCtx *ctx, const char *ptr, size_t len) {
+ RedisModuleString *o = createStringObject(ptr,len);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+
+/* Create a new module string object from a printf format and arguments.
+ * The returned string must be freed with RedisModule_FreeString(), unless
+ * automatic memory is enabled.
+ *
+ * The string is created using the sds formatter function sdscatvprintf(). */
+RedisModuleString *RM_CreateStringPrintf(RedisModuleCtx *ctx, const char *fmt, ...) {
+ sds s = sdsempty();
+
+ va_list ap;
+ va_start(ap, fmt);
+ s = sdscatvprintf(s, fmt, ap);
+ va_end(ap);
+
+ RedisModuleString *o = createObject(OBJ_STRING, s);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+
+ return o;
+}
+
+
+/* Like RedisModule_CreatString(), but creates a string starting from a long long
+ * integer instead of taking a buffer and its length.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_CreateStringFromLongLong(RedisModuleCtx *ctx, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ return RM_CreateString(ctx,buf,len);
+}
+
+/* Like RedisModule_CreatString(), but creates a string starting from another
+ * RedisModuleString.
+ *
+ * The returned string must be released with RedisModule_FreeString() or by
+ * enabling automatic memory management. */
+RedisModuleString *RM_CreateStringFromString(RedisModuleCtx *ctx, const RedisModuleString *str) {
+ RedisModuleString *o = dupStringObject(str);
+ autoMemoryAdd(ctx,REDISMODULE_AM_STRING,o);
+ return o;
+}
+
+/* Free a module string object obtained with one of the Redis modules API calls
+ * that return new string objects.
+ *
+ * It is possible to call this function even when automatic memory management
+ * is enabled. In that case the string will be released ASAP and removed
+ * from the pool of string to release at the end. */
+void RM_FreeString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ decrRefCount(str);
+ autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str);
+}
+
+/* Every call to this function, will make the string 'str' requiring
+ * an additional call to RedisModule_FreeString() in order to really
+ * free the string. Note that the automatic freeing of the string obtained
+ * enabling modules automatic memory management counts for one
+ * RedisModule_FreeString() call (it is just executed automatically).
+ *
+ * Normally you want to call this function when, at the same time
+ * the following conditions are true:
+ *
+ * 1) You have automatic memory management enabled.
+ * 2) You want to create string objects.
+ * 3) Those string objects you create need to live *after* the callback
+ * function(for example a command implementation) creating them returns.
+ *
+ * Usually you want this in order to store the created string object
+ * into your own data structure, for example when implementing a new data
+ * type.
+ *
+ * Note that when memory management is turned off, you don't need
+ * any call to RetainString() since creating a string will always result
+ * into a string that lives after the callback function returns, if
+ * no FreeString() call is performed. */
+void RM_RetainString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ if (!autoMemoryFreed(ctx,REDISMODULE_AM_STRING,str)) {
+ /* Increment the string reference counting only if we can't
+ * just remove the object from the list of objects that should
+ * be reclaimed. Why we do that, instead of just incrementing
+ * the refcount in any case, and let the automatic FreeString()
+ * call at the end to bring the refcount back at the desired
+ * value? Because this way we ensure that the object refcount
+ * value is 1 (instead of going to 2 to be dropped later to 1)
+ * after the call to this function. This is needed for functions
+ * like RedisModule_StringAppendBuffer() to work. */
+ incrRefCount(str);
+ }
+}
+
+/* Given a string module object, this function returns the string pointer
+ * and length of the string. The returned pointer and length should only
+ * be used for read only accesses and never modified. */
+const char *RM_StringPtrLen(const RedisModuleString *str, size_t *len) {
+ if (str == NULL) {
+ const char *errmsg = "(NULL string reply referenced in module)";
+ if (len) *len = strlen(errmsg);
+ return errmsg;
+ }
+ if (len) *len = sdslen(str->ptr);
+ return str->ptr;
+}
+
+/* --------------------------------------------------------------------------
+ * Higher level string operations
+ * ------------------------------------------------------------------------- */
+
+/* Convert the string into a long long integer, storing it at `*ll`.
+ * Returns REDISMODULE_OK on success. If the string can't be parsed
+ * as a valid, strict long long (no spaces before/after), REDISMODULE_ERR
+ * is returned. */
+int RM_StringToLongLong(const RedisModuleString *str, long long *ll) {
+ return string2ll(str->ptr,sdslen(str->ptr),ll) ? REDISMODULE_OK :
+ REDISMODULE_ERR;
+}
+
+/* Convert the string into a double, storing it at `*d`.
+ * Returns REDISMODULE_OK on success or REDISMODULE_ERR if the string is
+ * not a valid string representation of a double value. */
+int RM_StringToDouble(const RedisModuleString *str, double *d) {
+ int retval = getDoubleFromObject(str,d);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Compare two string objects, returning -1, 0 or 1 respectively if
+ * a < b, a == b, a > b. Strings are compared byte by byte as two
+ * binary blobs without any encoding care / collation attempt. */
+int RM_StringCompare(RedisModuleString *a, RedisModuleString *b) {
+ return compareStringObjects(a,b);
+}
+
+/* Return the (possibly modified in encoding) input 'str' object if
+ * the string is unshared, otherwise NULL is returned. */
+RedisModuleString *moduleAssertUnsharedString(RedisModuleString *str) {
+ if (str->refcount != 1) {
+ serverLog(LL_WARNING,
+ "Module attempted to use an in-place string modify operation "
+ "with a string referenced multiple times. Please check the code "
+ "for API usage correctness.");
+ return NULL;
+ }
+ if (str->encoding == OBJ_ENCODING_EMBSTR) {
+ /* Note: here we "leak" the additional allocation that was
+ * used in order to store the embedded string in the object. */
+ str->ptr = sdsnewlen(str->ptr,sdslen(str->ptr));
+ str->encoding = OBJ_ENCODING_RAW;
+ } else if (str->encoding == OBJ_ENCODING_INT) {
+ /* Convert the string from integer to raw encoding. */
+ str->ptr = sdsfromlonglong((long)str->ptr);
+ str->encoding = OBJ_ENCODING_RAW;
+ }
+ return str;
+}
+
+/* Append the specified buffer to the string 'str'. The string must be a
+ * string created by the user that is referenced only a single time, otherwise
+ * REDISMODULE_ERR is returned and the operation is not performed. */
+int RM_StringAppendBuffer(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) {
+ UNUSED(ctx);
+ str = moduleAssertUnsharedString(str);
+ if (str == NULL) return REDISMODULE_ERR;
+ str->ptr = sdscatlen(str->ptr,buf,len);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Reply APIs
+ *
+ * Most functions always return REDISMODULE_OK so you can use it with
+ * 'return' in order to return from the command implementation with:
+ *
+ * if (... some condition ...)
+ * return RM_ReplyWithLongLong(ctx,mycount);
+ * -------------------------------------------------------------------------- */
+
+/* Send an error about the number of arguments given to the command,
+ * citing the command name in the error message.
+ *
+ * Example:
+ *
+ * if (argc != 3) return RedisModule_WrongArity(ctx);
+ */
+int RM_WrongArity(RedisModuleCtx *ctx) {
+ addReplyErrorFormat(ctx->client,
+ "wrong number of arguments for '%s' command",
+ (char*)ctx->client->argv[0]->ptr);
+ return REDISMODULE_OK;
+}
+
+/* Return the client object the `RM_Reply*` functions should target.
+ * Normally this is just `ctx->client`, that is the client that called
+ * the module command, however in the case of thread safe contexts there
+ * is no directly associated client (since it would not be safe to access
+ * the client from a thread), so instead the blocked client object referenced
+ * in the thread safe context, has a fake client that we just use to accumulate
+ * the replies. Later, when the client is unblocked, the accumulated replies
+ * are appended to the actual client.
+ *
+ * The function returns the client pointer depending on the context, or
+ * NULL if there is no potential client. This happens when we are in the
+ * context of a thread safe context that was not initialized with a blocked
+ * client object. Other contexts without associated clients are the ones
+ * initialized to run the timers callbacks. */
+client *moduleGetReplyClient(RedisModuleCtx *ctx) {
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) {
+ if (ctx->blocked_client)
+ return ctx->blocked_client->reply_client;
+ else
+ return NULL;
+ } else {
+ /* If this is a non thread safe context, just return the client
+ * that is running the command if any. This may be NULL as well
+ * in the case of contexts that are not executed with associated
+ * clients, like timer contexts. */
+ return ctx->client;
+ }
+}
+
+/* Send an integer reply to the client, with the specified long long value.
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithLongLong(RedisModuleCtx *ctx, long long ll) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyLongLong(c,ll);
+ return REDISMODULE_OK;
+}
+
+/* Reply with an error or simple string (status message). Used to implement
+ * ReplyWithSimpleString() and ReplyWithError().
+ * The function always returns REDISMODULE_OK. */
+int replyWithStatus(RedisModuleCtx *ctx, const char *msg, char *prefix) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ sds strmsg = sdsnewlen(prefix,1);
+ strmsg = sdscat(strmsg,msg);
+ strmsg = sdscatlen(strmsg,"\r\n",2);
+ addReplySds(c,strmsg);
+ return REDISMODULE_OK;
+}
+
+/* Reply with the error 'err'.
+ *
+ * Note that 'err' must contain all the error, including
+ * the initial error code. The function only provides the initial "-", so
+ * the usage is, for example:
+ *
+ * RedisModule_ReplyWithError(ctx,"ERR Wrong Type");
+ *
+ * and not just:
+ *
+ * RedisModule_ReplyWithError(ctx,"Wrong Type");
+ *
+ * The function always returns REDISMODULE_OK.
+ */
+int RM_ReplyWithError(RedisModuleCtx *ctx, const char *err) {
+ return replyWithStatus(ctx,err,"-");
+}
+
+/* Reply with a simple string (+... \r\n in RESP protocol). This replies
+ * are suitable only when sending a small non-binary string with small
+ * overhead, like "OK" or similar replies.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithSimpleString(RedisModuleCtx *ctx, const char *msg) {
+ return replyWithStatus(ctx,msg,"+");
+}
+
+/* Reply with an array type of 'len' elements. However 'len' other calls
+ * to `ReplyWith*` style functions must follow in order to emit the elements
+ * of the array.
+ *
+ * When producing arrays with a number of element that is not known beforehand
+ * the function can be called with the special count
+ * REDISMODULE_POSTPONED_ARRAY_LEN, and the actual number of elements can be
+ * later set with RedisModule_ReplySetArrayLength() (which will set the
+ * latest "open" count if there are multiple ones).
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithArray(RedisModuleCtx *ctx, long len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ if (len == REDISMODULE_POSTPONED_ARRAY_LEN) {
+ ctx->postponed_arrays = zrealloc(ctx->postponed_arrays,sizeof(void*)*
+ (ctx->postponed_arrays_count+1));
+ ctx->postponed_arrays[ctx->postponed_arrays_count] =
+ addDeferredMultiBulkLength(c);
+ ctx->postponed_arrays_count++;
+ } else {
+ addReplyMultiBulkLen(c,len);
+ }
+ return REDISMODULE_OK;
+}
+
+/* When RedisModule_ReplyWithArray() is used with the argument
+ * REDISMODULE_POSTPONED_ARRAY_LEN, because we don't know beforehand the number
+ * of items we are going to output as elements of the array, this function
+ * will take care to set the array length.
+ *
+ * Since it is possible to have multiple array replies pending with unknown
+ * length, this function guarantees to always set the latest array length
+ * that was created in a postponed way.
+ *
+ * For example in order to output an array like [1,[10,20,30]] we
+ * could write:
+ *
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,1);
+ * RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ * RedisModule_ReplyWithLongLong(ctx,10);
+ * RedisModule_ReplyWithLongLong(ctx,20);
+ * RedisModule_ReplyWithLongLong(ctx,30);
+ * RedisModule_ReplySetArrayLength(ctx,3); // Set len of 10,20,30 array.
+ * RedisModule_ReplySetArrayLength(ctx,2); // Set len of top array
+ *
+ * Note that in the above example there is no reason to postpone the array
+ * length, since we produce a fixed number of elements, but in the practice
+ * the code may use an iterator or other ways of creating the output so
+ * that is not easy to calculate in advance the number of elements.
+ */
+void RM_ReplySetArrayLength(RedisModuleCtx *ctx, long len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return;
+ if (ctx->postponed_arrays_count == 0) {
+ serverLog(LL_WARNING,
+ "API misuse detected in module %s: "
+ "RedisModule_ReplySetArrayLength() called without previous "
+ "RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN) "
+ "call.", ctx->module->name);
+ return;
+ }
+ ctx->postponed_arrays_count--;
+ setDeferredMultiBulkLength(c,
+ ctx->postponed_arrays[ctx->postponed_arrays_count],
+ len);
+ if (ctx->postponed_arrays_count == 0) {
+ zfree(ctx->postponed_arrays);
+ ctx->postponed_arrays = NULL;
+ }
+}
+
+/* Reply with a bulk string, taking in input a C buffer pointer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulkCBuffer(c,(char*)buf,len);
+ return REDISMODULE_OK;
+}
+
+/* Reply with a bulk string, taking in input a RedisModuleString object.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyBulk(c,str);
+ return REDISMODULE_OK;
+}
+
+/* Reply to the client with a NULL. In the RESP protocol a NULL is encoded
+ * as the string "$-1\r\n".
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithNull(RedisModuleCtx *ctx) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReply(c,shared.nullbulk);
+ return REDISMODULE_OK;
+}
+
+/* Reply exactly what a Redis command returned us with RedisModule_Call().
+ * This function is useful when we use RedisModule_Call() in order to
+ * execute some command, as we want to reply to the client exactly the
+ * same reply we obtained by the command.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithCallReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ sds proto = sdsnewlen(reply->proto, reply->protolen);
+ addReplySds(c,proto);
+ return REDISMODULE_OK;
+}
+
+/* Send a string reply obtained converting the double 'd' into a bulk string.
+ * This function is basically equivalent to converting a double into
+ * a string into a C buffer, and then calling the function
+ * RedisModule_ReplyWithStringBuffer() with the buffer and length.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplyWithDouble(RedisModuleCtx *ctx, double d) {
+ client *c = moduleGetReplyClient(ctx);
+ if (c == NULL) return REDISMODULE_OK;
+ addReplyDouble(c,d);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Commands replication API
+ * -------------------------------------------------------------------------- */
+
+/* Helper function to replicate MULTI the first time we replicate something
+ * in the context of a command execution. EXEC will be handled by the
+ * RedisModuleCommandDispatcher() function. */
+void moduleReplicateMultiIfNeeded(RedisModuleCtx *ctx) {
+ /* Skip this if client explicitly wrap the command with MULTI, or if
+ * the module command was called by a script. */
+ if (ctx->client->flags & (CLIENT_MULTI|CLIENT_LUA)) return;
+ /* If we already emitted MULTI return ASAP. */
+ if (ctx->flags & REDISMODULE_CTX_MULTI_EMITTED) return;
+ /* If this is a thread safe context, we do not want to wrap commands
+ * executed into MUTLI/EXEC, they are executed as single commands
+ * from an external client in essence. */
+ if (ctx->flags & REDISMODULE_CTX_THREAD_SAFE) return;
+ execCommandPropagateMulti(ctx->client);
+ ctx->flags |= REDISMODULE_CTX_MULTI_EMITTED;
+}
+
+/* Replicate the specified command and arguments to slaves and AOF, as effect
+ * of execution of the calling command implementation.
+ *
+ * The replicated commands are always wrapped into the MULTI/EXEC that
+ * contains all the commands replicated in a given module command
+ * execution. However the commands replicated with RedisModule_Call()
+ * are the first items, the ones replicated with RedisModule_Replicate()
+ * will all follow before the EXEC.
+ *
+ * Modules should try to use one interface or the other.
+ *
+ * This command follows exactly the same interface of RedisModule_Call(),
+ * so a set of format specifiers must be passed, followed by arguments
+ * matching the provided format specifiers.
+ *
+ * Please refer to RedisModule_Call() for more information.
+ *
+ * The command returns REDISMODULE_ERR if the format specifiers are invalid
+ * or the command name does not belong to a known command. */
+int RM_Replicate(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) return REDISMODULE_ERR;
+
+ /* Create the client and dispatch the command. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) return REDISMODULE_ERR;
+
+ /* Replicate! */
+ moduleReplicateMultiIfNeeded(ctx);
+ alsoPropagate(cmd,ctx->client->db->id,argv,argc,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+
+ /* Release the argv. */
+ for (j = 0; j < argc; j++) decrRefCount(argv[j]);
+ zfree(argv);
+ server.dirty++;
+ return REDISMODULE_OK;
+}
+
+/* This function will replicate the command exactly as it was invoked
+ * by the client. Note that this function will not wrap the command into
+ * a MULTI/EXEC stanza, so it should not be mixed with other replication
+ * commands.
+ *
+ * Basically this form of replication is useful when you want to propagate
+ * the command to the slaves and AOF file exactly as it was called, since
+ * the command can just be re-executed to deterministically re-create the
+ * new state starting from the old one.
+ *
+ * The function always returns REDISMODULE_OK. */
+int RM_ReplicateVerbatim(RedisModuleCtx *ctx) {
+ alsoPropagate(ctx->client->cmd,ctx->client->db->id,
+ ctx->client->argv,ctx->client->argc,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ server.dirty++;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * DB and Key APIs -- Generic API
+ * -------------------------------------------------------------------------- */
+
+/* Return the ID of the current client calling the currently active module
+ * command. The returned ID has a few guarantees:
+ *
+ * 1. The ID is different for each different client, so if the same client
+ * executes a module command multiple times, it can be recognized as
+ * having the same ID, otherwise the ID will be different.
+ * 2. The ID increases monotonically. Clients connecting to the server later
+ * are guaranteed to get IDs greater than any past ID previously seen.
+ *
+ * Valid IDs are from 1 to 2^64-1. If 0 is returned it means there is no way
+ * to fetch the ID in the context the function was currently called. */
+unsigned long long RM_GetClientId(RedisModuleCtx *ctx) {
+ if (ctx->client == NULL) return 0;
+ return ctx->client->id;
+}
+
+/* Return the currently selected DB. */
+int RM_GetSelectedDb(RedisModuleCtx *ctx) {
+ return ctx->client->db->id;
+}
+
+
+/* Return the current context's flags. The flags provide information on the
+ * current request context (whether the client is a Lua script or in a MULTI),
+ * and about the Redis instance in general, i.e replication and persistence.
+ *
+ * The available flags are:
+ *
+ * * REDISMODULE_CTX_FLAGS_LUA: The command is running in a Lua script
+ *
+ * * REDISMODULE_CTX_FLAGS_MULTI: The command is running inside a transaction
+ *
+ * * REDISMODULE_CTX_FLAGS_MASTER: The Redis instance is a master
+ *
+ * * REDISMODULE_CTX_FLAGS_SLAVE: The Redis instance is a slave
+ *
+ * * REDISMODULE_CTX_FLAGS_READONLY: The Redis instance is read-only
+ *
+ * * REDISMODULE_CTX_FLAGS_CLUSTER: The Redis instance is in cluster mode
+ *
+ * * REDISMODULE_CTX_FLAGS_AOF: The Redis instance has AOF enabled
+ *
+ * * REDISMODULE_CTX_FLAGS_RDB: The instance has RDB enabled
+ *
+ * * REDISMODULE_CTX_FLAGS_MAXMEMORY: The instance has Maxmemory set
+ *
+ * * REDISMODULE_CTX_FLAGS_EVICT: Maxmemory is set and has an eviction
+ * policy that may delete keys
+ *
+ * * REDISMODULE_CTX_FLAGS_OOM: Redis is out of memory according to the
+ * maxmemory setting.
+ *
+ * * REDISMODULE_CTX_FLAGS_OOM_WARNING: Less than 25% of memory remains before
+ * reaching the maxmemory level.
+ */
+int RM_GetContextFlags(RedisModuleCtx *ctx) {
+
+ int flags = 0;
+ /* Client specific flags */
+ if (ctx->client) {
+ if (ctx->client->flags & CLIENT_LUA)
+ flags |= REDISMODULE_CTX_FLAGS_LUA;
+ if (ctx->client->flags & CLIENT_MULTI)
+ flags |= REDISMODULE_CTX_FLAGS_MULTI;
+ }
+
+ if (server.cluster_enabled)
+ flags |= REDISMODULE_CTX_FLAGS_CLUSTER;
+
+ /* Maxmemory and eviction policy */
+ if (server.maxmemory > 0) {
+ flags |= REDISMODULE_CTX_FLAGS_MAXMEMORY;
+
+ if (server.maxmemory_policy != MAXMEMORY_NO_EVICTION)
+ flags |= REDISMODULE_CTX_FLAGS_EVICT;
+ }
+
+ /* Persistence flags */
+ if (server.aof_state != AOF_OFF)
+ flags |= REDISMODULE_CTX_FLAGS_AOF;
+ if (server.saveparamslen > 0)
+ flags |= REDISMODULE_CTX_FLAGS_RDB;
+
+ /* Replication flags */
+ if (server.masterhost == NULL) {
+ flags |= REDISMODULE_CTX_FLAGS_MASTER;
+ } else {
+ flags |= REDISMODULE_CTX_FLAGS_SLAVE;
+ if (server.repl_slave_ro)
+ flags |= REDISMODULE_CTX_FLAGS_READONLY;
+ }
+
+ /* OOM flag. */
+ float level;
+ int retval = getMaxmemoryState(NULL,NULL,NULL,&level);
+ if (retval == C_ERR) flags |= REDISMODULE_CTX_FLAGS_OOM;
+ if (level > 0.75) flags |= REDISMODULE_CTX_FLAGS_OOM_WARNING;
+
+ return flags;
+}
+
+/* Change the currently selected DB. Returns an error if the id
+ * is out of range.
+ *
+ * Note that the client will retain the currently selected DB even after
+ * the Redis command implemented by the module calling this function
+ * returns.
+ *
+ * If the module command wishes to change something in a different DB and
+ * returns back to the original one, it should call RedisModule_GetSelectedDb()
+ * before in order to restore the old DB number before returning. */
+int RM_SelectDb(RedisModuleCtx *ctx, int newid) {
+ int retval = selectDb(ctx->client,newid);
+ return (retval == C_OK) ? REDISMODULE_OK : REDISMODULE_ERR;
+}
+
+/* Return an handle representing a Redis key, so that it is possible
+ * to call other APIs with the key handle as argument to perform
+ * operations on the key.
+ *
+ * The return value is the handle representing the key, that must be
+ * closed with RM_CloseKey().
+ *
+ * If the key does not exist and WRITE mode is requested, the handle
+ * is still returned, since it is possible to perform operations on
+ * a yet not existing key (that will be created, for example, after
+ * a list push operation). If the mode is just READ instead, and the
+ * key does not exist, NULL is returned. However it is still safe to
+ * call RedisModule_CloseKey() and RedisModule_KeyType() on a NULL
+ * value. */
+void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) {
+ RedisModuleKey *kp;
+ robj *value;
+
+ if (mode & REDISMODULE_WRITE) {
+ value = lookupKeyWrite(ctx->client->db,keyname);
+ } else {
+ value = lookupKeyRead(ctx->client->db,keyname);
+ if (value == NULL) {
+ return NULL;
+ }
+ }
+
+ /* Setup the key handle. */
+ kp = zmalloc(sizeof(*kp));
+ kp->ctx = ctx;
+ kp->db = ctx->client->db;
+ kp->key = keyname;
+ incrRefCount(keyname);
+ kp->value = value;
+ kp->iter = NULL;
+ kp->mode = mode;
+ zsetKeyReset(kp);
+ autoMemoryAdd(ctx,REDISMODULE_AM_KEY,kp);
+ return (void*)kp;
+}
+
+/* Close a key handle. */
+void RM_CloseKey(RedisModuleKey *key) {
+ if (key == NULL) return;
+ if (key->mode & REDISMODULE_WRITE) signalModifiedKey(key->db,key->key);
+ /* TODO: if (key->iter) RM_KeyIteratorStop(kp); */
+ RM_ZsetRangeStop(key);
+ decrRefCount(key->key);
+ autoMemoryFreed(key->ctx,REDISMODULE_AM_KEY,key);
+ zfree(key);
+}
+
+/* Return the type of the key. If the key pointer is NULL then
+ * REDISMODULE_KEYTYPE_EMPTY is returned. */
+int RM_KeyType(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return REDISMODULE_KEYTYPE_EMPTY;
+ /* We map between defines so that we are free to change the internal
+ * defines as desired. */
+ switch(key->value->type) {
+ case OBJ_STRING: return REDISMODULE_KEYTYPE_STRING;
+ case OBJ_LIST: return REDISMODULE_KEYTYPE_LIST;
+ case OBJ_SET: return REDISMODULE_KEYTYPE_SET;
+ case OBJ_ZSET: return REDISMODULE_KEYTYPE_ZSET;
+ case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH;
+ case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE;
+ default: return 0;
+ }
+}
+
+/* Return the length of the value associated with the key.
+ * For strings this is the length of the string. For all the other types
+ * is the number of elements (just counting keys for hashes).
+ *
+ * If the key pointer is NULL or the key is empty, zero is returned. */
+size_t RM_ValueLength(RedisModuleKey *key) {
+ if (key == NULL || key->value == NULL) return 0;
+ switch(key->value->type) {
+ case OBJ_STRING: return stringObjectLen(key->value);
+ case OBJ_LIST: return listTypeLength(key->value);
+ case OBJ_SET: return setTypeSize(key->value);
+ case OBJ_ZSET: return zsetLength(key->value);
+ case OBJ_HASH: return hashTypeLength(key->value);
+ default: return 0;
+ }
+}
+
+/* If the key is open for writing, remove it, and setup the key to
+ * accept new writes as an empty key (that will be created on demand).
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing REDISMODULE_ERR is returned. */
+int RM_DeleteKey(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value) {
+ dbDelete(key->db,key->key);
+ key->value = NULL;
+ }
+ return REDISMODULE_OK;
+}
+
+/* If the key is open for writing, unlink it (that is delete it in a
+ * non-blocking way, not reclaiming memory immediately) and setup the key to
+ * accept new writes as an empty key (that will be created on demand).
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing REDISMODULE_ERR is returned. */
+int RM_UnlinkKey(RedisModuleKey *key) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value) {
+ dbAsyncDelete(key->db,key->key);
+ key->value = NULL;
+ }
+ return REDISMODULE_OK;
+}
+
+/* Return the key expire value, as milliseconds of remaining TTL.
+ * If no TTL is associated with the key or if the key is empty,
+ * REDISMODULE_NO_EXPIRE is returned. */
+mstime_t RM_GetExpire(RedisModuleKey *key) {
+ mstime_t expire = getExpire(key->db,key->key);
+ if (expire == -1 || key->value == NULL) return -1;
+ expire -= mstime();
+ return expire >= 0 ? expire : 0;
+}
+
+/* Set a new expire for the key. If the special expire
+ * REDISMODULE_NO_EXPIRE is set, the expire is cancelled if there was
+ * one (the same as the PERSIST command).
+ *
+ * Note that the expire must be provided as a positive integer representing
+ * the number of milliseconds of TTL the key should have.
+ *
+ * The function returns REDISMODULE_OK on success or REDISMODULE_ERR if
+ * the key was not open for writing or is an empty key. */
+int RM_SetExpire(RedisModuleKey *key, mstime_t expire) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->value == NULL)
+ return REDISMODULE_ERR;
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += mstime();
+ setExpire(key->ctx->client,key->db,key->key,expire);
+ } else {
+ removeExpire(key->db,key->key);
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for String type
+ * -------------------------------------------------------------------------- */
+
+/* If the key is open for writing, set the specified string 'str' as the
+ * value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_StringSet(RedisModuleKey *key, RedisModuleString *str) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ setKey(key->db,key->key,str);
+ key->value = str;
+ return REDISMODULE_OK;
+}
+
+/* Prepare the key associated string value for DMA access, and returns
+ * a pointer and size (by reference), that the user can use to read or
+ * modify the string in-place accessing it directly via pointer.
+ *
+ * The 'mode' is composed by bitwise OR-ing the following flags:
+ *
+ * REDISMODULE_READ -- Read access
+ * REDISMODULE_WRITE -- Write access
+ *
+ * If the DMA is not requested for writing, the pointer returned should
+ * only be accessed in a read-only fashion.
+ *
+ * On error (wrong type) NULL is returned.
+ *
+ * DMA access rules:
+ *
+ * 1. No other key writing function should be called since the moment
+ * the pointer is obtained, for all the time we want to use DMA access
+ * to read or modify the string.
+ *
+ * 2. Each time RM_StringTruncate() is called, to continue with the DMA
+ * access, RM_StringDMA() should be called again to re-obtain
+ * a new pointer and length.
+ *
+ * 3. If the returned pointer is not NULL, but the length is zero, no
+ * byte can be touched (the string is empty, or the key itself is empty)
+ * so a RM_StringTruncate() call should be used if there is to enlarge
+ * the string, and later call StringDMA() again to get the pointer.
+ */
+char *RM_StringDMA(RedisModuleKey *key, size_t *len, int mode) {
+ /* We need to return *some* pointer for empty keys, we just return
+ * a string literal pointer, that is the advantage to be mapped into
+ * a read only memory page, so the module will segfault if a write
+ * attempt is performed. */
+ char *emptystring = "<dma-empty-string>";
+ if (key->value == NULL) {
+ *len = 0;
+ return emptystring;
+ }
+
+ if (key->value->type != OBJ_STRING) return NULL;
+
+ /* For write access, and even for read access if the object is encoded,
+ * we unshare the string (that has the side effect of decoding it). */
+ if ((mode & REDISMODULE_WRITE) || key->value->encoding != OBJ_ENCODING_RAW)
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+
+ *len = sdslen(key->value->ptr);
+ return key->value->ptr;
+}
+
+/* If the string is open for writing and is of string type, resize it, padding
+ * with zero bytes if the new length is greater than the old one.
+ *
+ * After this call, RM_StringDMA() must be called again to continue
+ * DMA access with the new pointer.
+ *
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR on
+ * error, that is, the key is not open for writing, is not a string
+ * or resizing for more than 512 MB is requested.
+ *
+ * If the key is empty, a string key is created with the new string value
+ * unless the new length value requested is zero. */
+int RM_StringTruncate(RedisModuleKey *key, size_t newlen) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_STRING) return REDISMODULE_ERR;
+ if (newlen > 512*1024*1024) return REDISMODULE_ERR;
+
+ /* Empty key and new len set to 0. Just return REDISMODULE_OK without
+ * doing anything. */
+ if (key->value == NULL && newlen == 0) return REDISMODULE_OK;
+
+ if (key->value == NULL) {
+ /* Empty key: create it with the new size. */
+ robj *o = createObject(OBJ_STRING,sdsnewlen(NULL, newlen));
+ setKey(key->db,key->key,o);
+ key->value = o;
+ decrRefCount(o);
+ } else {
+ /* Unshare and resize. */
+ key->value = dbUnshareStringValue(key->db, key->key, key->value);
+ size_t curlen = sdslen(key->value->ptr);
+ if (newlen > curlen) {
+ key->value->ptr = sdsgrowzero(key->value->ptr,newlen);
+ } else if (newlen < curlen) {
+ sdsrange(key->value->ptr,0,newlen-1);
+ /* If the string is too wasteful, reallocate it. */
+ if (sdslen(key->value->ptr) < sdsavail(key->value->ptr))
+ key->value->ptr = sdsRemoveFreeSpace(key->value->ptr);
+ }
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for List type
+ * -------------------------------------------------------------------------- */
+
+/* Push an element into a list, on head or tail depending on 'where' argument.
+ * If the key pointer is about an empty key opened for writing, the key
+ * is created. On error (key opened for read-only operations or of the wrong
+ * type) REDISMODULE_ERR is returned, otherwise REDISMODULE_OK is returned. */
+int RM_ListPush(RedisModuleKey *key, int where, RedisModuleString *ele) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_LIST) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_LIST);
+ listTypePush(key->value, ele,
+ (where == REDISMODULE_LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL);
+ return REDISMODULE_OK;
+}
+
+/* Pop an element from the list, and returns it as a module string object
+ * that the user should be free with RM_FreeString() or by enabling
+ * automatic memory. 'where' specifies if the element should be popped from
+ * head or tail. The command returns NULL if:
+ * 1) The list is empty.
+ * 2) The key was not open for writing.
+ * 3) The key is not a list. */
+RedisModuleString *RM_ListPop(RedisModuleKey *key, int where) {
+ if (!(key->mode & REDISMODULE_WRITE) ||
+ key->value == NULL ||
+ key->value->type != OBJ_LIST) return NULL;
+ robj *ele = listTypePop(key->value,
+ (where == REDISMODULE_LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL);
+ robj *decoded = getDecodedObject(ele);
+ decrRefCount(ele);
+ moduleDelKeyIfEmpty(key);
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,decoded);
+ return decoded;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Sorted Set type
+ * -------------------------------------------------------------------------- */
+
+/* Conversion from/to public flags of the Modules API and our private flags,
+ * so that we have everything decoupled. */
+int RM_ZsetAddFlagsToCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & REDISMODULE_ZADD_XX) retflags |= ZADD_XX;
+ if (flags & REDISMODULE_ZADD_NX) retflags |= ZADD_NX;
+ return retflags;
+}
+
+/* See previous function comment. */
+int RM_ZsetAddFlagsFromCoreFlags(int flags) {
+ int retflags = 0;
+ if (flags & ZADD_ADDED) retflags |= REDISMODULE_ZADD_ADDED;
+ if (flags & ZADD_UPDATED) retflags |= REDISMODULE_ZADD_UPDATED;
+ if (flags & ZADD_NOP) retflags |= REDISMODULE_ZADD_NOP;
+ return retflags;
+}
+
+/* Add a new element into a sorted set, with the specified 'score'.
+ * If the element already exists, the score is updated.
+ *
+ * A new sorted set is created at value if the key is an empty open key
+ * setup for writing.
+ *
+ * Additional flags can be passed to the function via a pointer, the flags
+ * are both used to receive input and to communicate state when the function
+ * returns. 'flagsptr' can be NULL if no special flags are used.
+ *
+ * The input flags are:
+ *
+ * REDISMODULE_ZADD_XX: Element must already exist. Do nothing otherwise.
+ * REDISMODULE_ZADD_NX: Element must not exist. Do nothing otherwise.
+ *
+ * The output flags are:
+ *
+ * REDISMODULE_ZADD_ADDED: The new element was added to the sorted set.
+ * REDISMODULE_ZADD_UPDATED: The score of the element was updated.
+ * REDISMODULE_ZADD_NOP: No operation was performed because XX or NX flags.
+ *
+ * On success the function returns REDISMODULE_OK. On the following errors
+ * REDISMODULE_ERR is returned:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ * * 'score' double value is not a number (NaN).
+ */
+int RM_ZsetAdd(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr) {
+ int flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ if (zsetAdd(key->value,score,ele->ptr,&flags,NULL) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ return REDISMODULE_OK;
+}
+
+/* This function works exactly like RM_ZsetAdd(), but instead of setting
+ * a new score, the score of the existing element is incremented, or if the
+ * element does not already exist, it is added assuming the old score was
+ * zero.
+ *
+ * The input and output flags, and the return value, have the same exact
+ * meaning, with the only difference that this function will return
+ * REDISMODULE_ERR even when 'score' is a valid double number, but adding it
+ * to the existing score results into a NaN (not a number) condition.
+ *
+ * This function has an additional field 'newscore', if not NULL is filled
+ * with the new score of the element after the increment, if no error
+ * is returned. */
+int RM_ZsetIncrby(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore) {
+ int flags = 0;
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_ZSET);
+ if (flagsptr) flags = RM_ZsetAddFlagsToCoreFlags(*flagsptr);
+ flags |= ZADD_INCR;
+ if (zsetAdd(key->value,score,ele->ptr,&flags,newscore) == 0) {
+ if (flagsptr) *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ /* zsetAdd() may signal back that the resulting score is not a number. */
+ if (flagsptr && (*flagsptr & ZADD_NAN)) {
+ *flagsptr = 0;
+ return REDISMODULE_ERR;
+ }
+ if (flagsptr) *flagsptr = RM_ZsetAddFlagsFromCoreFlags(flags);
+ return REDISMODULE_OK;
+}
+
+/* Remove the specified element from the sorted set.
+ * The function returns REDISMODULE_OK on success, and REDISMODULE_ERR
+ * on one of the following conditions:
+ *
+ * * The key was not opened for writing.
+ * * The key is of the wrong type.
+ *
+ * The return value does NOT indicate the fact the element was really
+ * removed (since it existed) or not, just if the function was executed
+ * with success.
+ *
+ * In order to know if the element was removed, the additional argument
+ * 'deleted' must be passed, that populates the integer by reference
+ * setting it to 1 or 0 depending on the outcome of the operation.
+ * The 'deleted' argument can be NULL if the caller is not interested
+ * to know if the element was really removed.
+ *
+ * Empty keys will be handled correctly by doing nothing. */
+int RM_ZsetRem(RedisModuleKey *key, RedisModuleString *ele, int *deleted) {
+ if (!(key->mode & REDISMODULE_WRITE)) return REDISMODULE_ERR;
+ if (key->value && key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (key->value != NULL && zsetDel(key->value,ele->ptr)) {
+ if (deleted) *deleted = 1;
+ } else {
+ if (deleted) *deleted = 0;
+ }
+ return REDISMODULE_OK;
+}
+
+/* On success retrieve the double score associated at the sorted set element
+ * 'ele' and returns REDISMODULE_OK. Otherwise REDISMODULE_ERR is returned
+ * to signal one of the following conditions:
+ *
+ * * There is no such element 'ele' in the sorted set.
+ * * The key is not a sorted set.
+ * * The key is an open empty key.
+ */
+int RM_ZsetScore(RedisModuleKey *key, RedisModuleString *ele, double *score) {
+ if (key->value == NULL) return REDISMODULE_ERR;
+ if (key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+ if (zsetScore(key->value,ele->ptr,score) == C_ERR) return REDISMODULE_ERR;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Sorted Set iterator
+ * -------------------------------------------------------------------------- */
+
+void zsetKeyReset(RedisModuleKey *key) {
+ key->ztype = REDISMODULE_ZSET_RANGE_NONE;
+ key->zcurrent = NULL;
+ key->zer = 1;
+}
+
+/* Stop a sorted set iteration. */
+void RM_ZsetRangeStop(RedisModuleKey *key) {
+ /* Free resources if needed. */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_LEX)
+ zslFreeLexRange(&key->zlrs);
+ /* Setup sensible values so that misused iteration API calls when an
+ * iterator is not active will result into something more sensible
+ * than crashing. */
+ zsetKeyReset(key);
+}
+
+/* Return the "End of range" flag value to signal the end of the iteration. */
+int RM_ZsetRangeEndReached(RedisModuleKey *key) {
+ return key->zer;
+}
+
+/* Helper function for RM_ZsetFirstInScoreRange() and RM_ZsetLastInScoreRange().
+ * Setup the sorted set iteration according to the specified score range
+ * (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR. */
+int zsetInitScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->ztype = REDISMODULE_ZSET_RANGE_SCORE;
+ key->zer = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zrangespec *zrs = &key->zrs;
+ zrs->min = min;
+ zrs->max = max;
+ zrs->minex = minex;
+ zrs->maxex = maxex;
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ key->zcurrent = first ? zzlFirstInRange(key->value->ptr,zrs) :
+ zzlLastInRange(key->value->ptr,zrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->zcurrent = first ? zslFirstInRange(zsl,zrs) :
+ zslLastInRange(zsl,zrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->zcurrent == NULL) key->zer = 1;
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * range. Returns REDISMODULE_OK if the iterator was correctly initialized
+ * otherwise REDISMODULE_ERR is returned in the following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ *
+ * The range is specified according to the two double values 'min' and 'max'.
+ * Both can be infinite using the following two macros:
+ *
+ * REDISMODULE_POSITIVE_INFINITE for positive infinite value
+ * REDISMODULE_NEGATIVE_INFINITE for negative infinite value
+ *
+ * 'minex' and 'maxex' parameters, if true, respectively setup a range
+ * where the min and max value are exclusive (not included) instead of
+ * inclusive. */
+int RM_ZsetFirstInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInScoreRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInScoreRange(RedisModuleKey *key, double min, double max, int minex, int maxex) {
+ return zsetInitScoreRange(key,min,max,minex,maxex,0);
+}
+
+/* Helper function for RM_ZsetFirstInLexRange() and RM_ZsetLastInLexRange().
+ * Setup the sorted set iteration according to the specified lexicographical
+ * range (see the functions calling it for more info). If 'first' is true the
+ * first element in the range is used as a starting point for the iterator
+ * otherwise the last. Return REDISMODULE_OK on success otherwise
+ * REDISMODULE_ERR.
+ *
+ * Note that this function takes 'min' and 'max' in the same form of the
+ * Redis ZRANGEBYLEX command. */
+int zsetInitLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max, int first) {
+ if (!key->value || key->value->type != OBJ_ZSET) return REDISMODULE_ERR;
+
+ RM_ZsetRangeStop(key);
+ key->zer = 0;
+
+ /* Setup the range structure used by the sorted set core implementation
+ * in order to seek at the specified element. */
+ zlexrangespec *zlrs = &key->zlrs;
+ if (zslParseLexRange(min, max, zlrs) == C_ERR) return REDISMODULE_ERR;
+
+ /* Set the range type to lex only after successfully parsing the range,
+ * otherwise we don't want the zlexrangespec to be freed. */
+ key->ztype = REDISMODULE_ZSET_RANGE_LEX;
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ key->zcurrent = first ? zzlFirstInLexRange(key->value->ptr,zlrs) :
+ zzlLastInLexRange(key->value->ptr,zlrs);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = key->value->ptr;
+ zskiplist *zsl = zs->zsl;
+ key->zcurrent = first ? zslFirstInLexRange(zsl,zlrs) :
+ zslLastInLexRange(zsl,zlrs);
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ if (key->zcurrent == NULL) key->zer = 1;
+
+ return REDISMODULE_OK;
+}
+
+/* Setup a sorted set iterator seeking the first element in the specified
+ * lexicographical range. Returns REDISMODULE_OK if the iterator was correctly
+ * initialized otherwise REDISMODULE_ERR is returned in the
+ * following conditions:
+ *
+ * 1. The value stored at key is not a sorted set or the key is empty.
+ * 2. The lexicographical range 'min' and 'max' format is invalid.
+ *
+ * 'min' and 'max' should be provided as two RedisModuleString objects
+ * in the same format as the parameters passed to the ZRANGEBYLEX command.
+ * The function does not take ownership of the objects, so they can be released
+ * ASAP after the iterator is setup. */
+int RM_ZsetFirstInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,1);
+}
+
+/* Exactly like RedisModule_ZsetFirstInLexRange() but the last element of
+ * the range is selected for the start of the iteration instead. */
+int RM_ZsetLastInLexRange(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) {
+ return zsetInitLexRange(key,min,max,0);
+}
+
+/* Return the current sorted set element of an active sorted set iterator
+ * or NULL if the range specified in the iterator does not include any
+ * element. */
+RedisModuleString *RM_ZsetRangeCurrentElement(RedisModuleKey *key, double *score) {
+ RedisModuleString *str;
+
+ if (key->zcurrent == NULL) return NULL;
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr, *sptr;
+ eptr = key->zcurrent;
+ sds ele = ziplistGetObject(eptr);
+ if (score) {
+ sptr = ziplistNext(key->value->ptr,eptr);
+ *score = zzlGetScore(sptr);
+ }
+ str = createObject(OBJ_STRING,ele);
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent;
+ if (score) *score = ln->score;
+ str = createStringObject(ln->ele,sdslen(ln->ele));
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,str);
+ return str;
+}
+
+/* Go to the next element of the sorted set iterator. Returns 1 if there was
+ * a next element, 0 if we are already at the latest element or the range
+ * does not include any item at all. */
+int RM_ZsetRangeNext(RedisModuleKey *key) {
+ if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->zcurrent;
+ unsigned char *next;
+ next = ziplistNext(zl,eptr); /* Skip element. */
+ if (next) next = ziplistNext(zl,next); /* Skip score. */
+ if (next == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the next element score for the
+ * range check. */
+ unsigned char *saved_next = next;
+ next = ziplistNext(zl,next); /* Skip next element. */
+ double score = zzlGetScore(next); /* Obtain the next score. */
+ if (!zslValueLteMax(score,&key->zrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ next = saved_next;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueLteMax(next,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = next;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent, *next = ln->level[0].forward;
+ if (next == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueLteMax(next->score,&key->zrs))
+ {
+ key->zer = 1;
+ return 0;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueLteMax(next->ele,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = next;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* Go to the previous element of the sorted set iterator. Returns 1 if there was
+ * a previous element, 0 if we are already at the first element or the range
+ * does not include any item at all. */
+int RM_ZsetRangePrev(RedisModuleKey *key) {
+ if (!key->ztype || !key->zcurrent) return 0; /* No active iterator. */
+
+ if (key->value->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = key->value->ptr;
+ unsigned char *eptr = key->zcurrent;
+ unsigned char *prev;
+ prev = ziplistPrev(zl,eptr); /* Go back to previous score. */
+ if (prev) prev = ziplistPrev(zl,prev); /* Back to previous ele. */
+ if (prev == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE) {
+ /* Fetch the previous element score for the
+ * range check. */
+ unsigned char *saved_prev = prev;
+ prev = ziplistNext(zl,prev); /* Skip element to get the score.*/
+ double score = zzlGetScore(prev); /* Obtain the prev score. */
+ if (!zslValueGteMin(score,&key->zrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ prev = saved_prev;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zzlLexValueGteMin(prev,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = prev;
+ return 1;
+ }
+ } else if (key->value->encoding == OBJ_ENCODING_SKIPLIST) {
+ zskiplistNode *ln = key->zcurrent, *prev = ln->backward;
+ if (prev == NULL) {
+ key->zer = 1;
+ return 0;
+ } else {
+ /* Are we still within the range? */
+ if (key->ztype == REDISMODULE_ZSET_RANGE_SCORE &&
+ !zslValueGteMin(prev->score,&key->zrs))
+ {
+ key->zer = 1;
+ return 0;
+ } else if (key->ztype == REDISMODULE_ZSET_RANGE_LEX) {
+ if (!zslLexValueGteMin(prev->ele,&key->zlrs)) {
+ key->zer = 1;
+ return 0;
+ }
+ }
+ key->zcurrent = prev;
+ return 1;
+ }
+ } else {
+ serverPanic("Unsupported zset encoding");
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Key API for Hash type
+ * -------------------------------------------------------------------------- */
+
+/* Set the field of the specified hash field to the specified value.
+ * If the key is an empty key open for writing, it is created with an empty
+ * hash value, in order to set the specified field.
+ *
+ * The function is variadic and the user must specify pairs of field
+ * names and values, both as RedisModuleString pointers (unless the
+ * CFIELD option is set, see later).
+ *
+ * Example to set the hash argv[1] to the value argv[2]:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],argv[2],NULL);
+ *
+ * The function can also be used in order to delete fields (if they exist)
+ * by setting them to the specified value of REDISMODULE_HASH_DELETE:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[1],
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * The behavior of the command changes with the specified flags, that can be
+ * set to REDISMODULE_HASH_NONE if no special behavior is needed.
+ *
+ * REDISMODULE_HASH_NX: The operation is performed only if the field was not
+ * already existing in the hash.
+ * REDISMODULE_HASH_XX: The operation is performed only if the field was
+ * already existing, so that a new value could be
+ * associated to an existing filed, but no new fields
+ * are created.
+ * REDISMODULE_HASH_CFIELDS: The field names passed are null terminated C
+ * strings instead of RedisModuleString objects.
+ *
+ * Unless NX is specified, the command overwrites the old field value with
+ * the new one.
+ *
+ * When using REDISMODULE_HASH_CFIELDS, field names are reported using
+ * normal C strings, so for example to delete the field "foo" the following
+ * code can be used:
+ *
+ * RedisModule_HashSet(key,REDISMODULE_HASH_CFIELDS,"foo",
+ * REDISMODULE_HASH_DELETE,NULL);
+ *
+ * Return value:
+ *
+ * The number of fields updated (that may be less than the number of fields
+ * specified because of the XX or NX options).
+ *
+ * In the following case the return value is always zero:
+ *
+ * * The key was not open for writing.
+ * * The key was associated with a non Hash value.
+ */
+int RM_HashSet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (!(key->mode & REDISMODULE_WRITE)) return 0;
+ if (key->value && key->value->type != OBJ_HASH) return 0;
+ if (key->value == NULL) moduleCreateEmptyKey(key,REDISMODULE_KEYTYPE_HASH);
+
+ int updated = 0;
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, *value;
+ /* Get the field and value objects. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+ value = va_arg(ap,RedisModuleString*);
+
+ /* Handle XX and NX */
+ if (flags & (REDISMODULE_HASH_XX|REDISMODULE_HASH_NX)) {
+ int exists = hashTypeExists(key->value, field->ptr);
+ if (((flags & REDISMODULE_HASH_XX) && !exists) ||
+ ((flags & REDISMODULE_HASH_NX) && exists))
+ {
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+ }
+
+ /* Handle deletion if value is REDISMODULE_HASH_DELETE. */
+ if (value == REDISMODULE_HASH_DELETE) {
+ updated += hashTypeDelete(key->value, field->ptr);
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ continue;
+ }
+
+ int low_flags = HASH_SET_COPY;
+ /* If CFIELDS is active, we can pass the ownership of the
+ * SDS object to the low level function that sets the field
+ * to avoid a useless copy. */
+ if (flags & REDISMODULE_HASH_CFIELDS)
+ low_flags |= HASH_SET_TAKE_FIELD;
+
+ robj *argv[2] = {field,value};
+ hashTypeTryConversion(key->value,argv,0,1);
+ updated += hashTypeSet(key->value, field->ptr, value->ptr, low_flags);
+
+ /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(),
+ * however we still have to release the 'field' object shell. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ field->ptr = NULL; /* Prevent the SDS string from being freed. */
+ decrRefCount(field);
+ }
+ }
+ va_end(ap);
+ moduleDelKeyIfEmpty(key);
+ return updated;
+}
+
+/* Get fields from an hash value. This function is called using a variable
+ * number of arguments, alternating a field name (as a StringRedisModule
+ * pointer) with a pointer to a StringRedisModule pointer, that is set to the
+ * value of the field if the field exist, or NULL if the field did not exist.
+ * At the end of the field/value-ptr pairs, NULL must be specified as last
+ * argument to signal the end of the arguments in the variadic function.
+ *
+ * This is an example usage:
+ *
+ * RedisModuleString *first, *second;
+ * RedisModule_HashGet(mykey,REDISMODULE_HASH_NONE,argv[1],&first,
+ * argv[2],&second,NULL);
+ *
+ * As with RedisModule_HashSet() the behavior of the command can be specified
+ * passing flags different than REDISMODULE_HASH_NONE:
+ *
+ * REDISMODULE_HASH_CFIELD: field names as null terminated C strings.
+ *
+ * REDISMODULE_HASH_EXISTS: instead of setting the value of the field
+ * expecting a RedisModuleString pointer to pointer, the function just
+ * reports if the field esists or not and expects an integer pointer
+ * as the second element of each pair.
+ *
+ * Example of REDISMODULE_HASH_CFIELD:
+ *
+ * RedisModuleString *username, *hashedpass;
+ * RedisModule_HashGet(mykey,"username",&username,"hp",&hashedpass, NULL);
+ *
+ * Example of REDISMODULE_HASH_EXISTS:
+ *
+ * int exists;
+ * RedisModule_HashGet(mykey,argv[1],&exists,NULL);
+ *
+ * The function returns REDISMODULE_OK on success and REDISMODULE_ERR if
+ * the key is not an hash value.
+ *
+ * Memory management:
+ *
+ * The returned RedisModuleString objects should be released with
+ * RedisModule_FreeString(), or by enabling automatic memory management.
+ */
+int RM_HashGet(RedisModuleKey *key, int flags, ...) {
+ va_list ap;
+ if (key->value && key->value->type != OBJ_HASH) return REDISMODULE_ERR;
+
+ va_start(ap, flags);
+ while(1) {
+ RedisModuleString *field, **valueptr;
+ int *existsptr;
+ /* Get the field object and the value pointer to pointer. */
+ if (flags & REDISMODULE_HASH_CFIELDS) {
+ char *cfield = va_arg(ap,char*);
+ if (cfield == NULL) break;
+ field = createRawStringObject(cfield,strlen(cfield));
+ } else {
+ field = va_arg(ap,RedisModuleString*);
+ if (field == NULL) break;
+ }
+
+ /* Query the hash for existence or value object. */
+ if (flags & REDISMODULE_HASH_EXISTS) {
+ existsptr = va_arg(ap,int*);
+ if (key->value)
+ *existsptr = hashTypeExists(key->value,field->ptr);
+ else
+ *existsptr = 0;
+ } else {
+ valueptr = va_arg(ap,RedisModuleString**);
+ if (key->value) {
+ *valueptr = hashTypeGetValueObject(key->value,field->ptr);
+ if (*valueptr) {
+ robj *decoded = getDecodedObject(*valueptr);
+ decrRefCount(*valueptr);
+ *valueptr = decoded;
+ }
+ if (*valueptr)
+ autoMemoryAdd(key->ctx,REDISMODULE_AM_STRING,*valueptr);
+ } else {
+ *valueptr = NULL;
+ }
+ }
+
+ /* Cleanup */
+ if (flags & REDISMODULE_HASH_CFIELDS) decrRefCount(field);
+ }
+ va_end(ap);
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Redis <-> Modules generic Call() API
+ * -------------------------------------------------------------------------- */
+
+/* Create a new RedisModuleCallReply object. The processing of the reply
+ * is lazy, the object is just populated with the raw protocol and later
+ * is processed as needed. Initially we just make sure to set the right
+ * reply type, which is extremely cheap to do. */
+RedisModuleCallReply *moduleCreateCallReplyFromProto(RedisModuleCtx *ctx, sds proto) {
+ RedisModuleCallReply *reply = zmalloc(sizeof(*reply));
+ reply->ctx = ctx;
+ reply->proto = proto;
+ reply->protolen = sdslen(proto);
+ reply->flags = REDISMODULE_REPLYFLAG_TOPARSE; /* Lazy parsing. */
+ switch(proto[0]) {
+ case '$':
+ case '+': reply->type = REDISMODULE_REPLY_STRING; break;
+ case '-': reply->type = REDISMODULE_REPLY_ERROR; break;
+ case ':': reply->type = REDISMODULE_REPLY_INTEGER; break;
+ case '*': reply->type = REDISMODULE_REPLY_ARRAY; break;
+ default: reply->type = REDISMODULE_REPLY_UNKNOWN; break;
+ }
+ if ((proto[0] == '*' || proto[0] == '$') && proto[1] == '-')
+ reply->type = REDISMODULE_REPLY_NULL;
+ return reply;
+}
+
+void moduleParseCallReply_Int(RedisModuleCallReply *reply);
+void moduleParseCallReply_BulkString(RedisModuleCallReply *reply);
+void moduleParseCallReply_SimpleString(RedisModuleCallReply *reply);
+void moduleParseCallReply_Array(RedisModuleCallReply *reply);
+
+/* Do nothing if REDISMODULE_REPLYFLAG_TOPARSE is false, otherwise
+ * use the protcol of the reply in reply->proto in order to fill the
+ * reply with parsed data according to the reply type. */
+void moduleParseCallReply(RedisModuleCallReply *reply) {
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_TOPARSE)) return;
+ reply->flags &= ~REDISMODULE_REPLYFLAG_TOPARSE;
+
+ switch(reply->proto[0]) {
+ case ':': moduleParseCallReply_Int(reply); break;
+ case '$': moduleParseCallReply_BulkString(reply); break;
+ case '-': /* handled by next item. */
+ case '+': moduleParseCallReply_SimpleString(reply); break;
+ case '*': moduleParseCallReply_Array(reply); break;
+ }
+}
+
+void moduleParseCallReply_Int(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+
+ string2ll(proto+1,p-proto-1,&reply->val.ll);
+ reply->protolen = p-proto+2;
+ reply->type = REDISMODULE_REPLY_INTEGER;
+}
+
+void moduleParseCallReply_BulkString(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+ long long bulklen;
+
+ string2ll(proto+1,p-proto-1,&bulklen);
+ if (bulklen == -1) {
+ reply->protolen = p-proto+2;
+ reply->type = REDISMODULE_REPLY_NULL;
+ } else {
+ reply->val.str = p+2;
+ reply->len = bulklen;
+ reply->protolen = p-proto+2+bulklen+2;
+ reply->type = REDISMODULE_REPLY_STRING;
+ }
+}
+
+void moduleParseCallReply_SimpleString(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+
+ reply->val.str = proto+1;
+ reply->len = p-proto-1;
+ reply->protolen = p-proto+2;
+ reply->type = proto[0] == '+' ? REDISMODULE_REPLY_STRING :
+ REDISMODULE_REPLY_ERROR;
+}
+
+void moduleParseCallReply_Array(RedisModuleCallReply *reply) {
+ char *proto = reply->proto;
+ char *p = strchr(proto+1,'\r');
+ long long arraylen, j;
+
+ string2ll(proto+1,p-proto-1,&arraylen);
+ p += 2;
+
+ if (arraylen == -1) {
+ reply->protolen = p-proto;
+ reply->type = REDISMODULE_REPLY_NULL;
+ return;
+ }
+
+ reply->val.array = zmalloc(sizeof(RedisModuleCallReply)*arraylen);
+ reply->len = arraylen;
+ for (j = 0; j < arraylen; j++) {
+ RedisModuleCallReply *ele = reply->val.array+j;
+ ele->flags = REDISMODULE_REPLYFLAG_NESTED |
+ REDISMODULE_REPLYFLAG_TOPARSE;
+ ele->proto = p;
+ ele->ctx = reply->ctx;
+ moduleParseCallReply(ele);
+ p += ele->protolen;
+ }
+ reply->protolen = p-proto;
+ reply->type = REDISMODULE_REPLY_ARRAY;
+}
+
+/* Free a Call reply and all the nested replies it contains if it's an
+ * array. */
+void RM_FreeCallReply_Rec(RedisModuleCallReply *reply, int freenested){
+ /* Don't free nested replies by default: the user must always free the
+ * toplevel reply. However be gentle and don't crash if the module
+ * misuses the API. */
+ if (!freenested && reply->flags & REDISMODULE_REPLYFLAG_NESTED) return;
+
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_TOPARSE)) {
+ if (reply->type == REDISMODULE_REPLY_ARRAY) {
+ size_t j;
+ for (j = 0; j < reply->len; j++)
+ RM_FreeCallReply_Rec(reply->val.array+j,1);
+ zfree(reply->val.array);
+ }
+ }
+
+ /* For nested replies, we don't free reply->proto (which if not NULL
+ * references the parent reply->proto buffer), nor the structure
+ * itself which is allocated as an array of structures, and is freed
+ * when the array value is released. */
+ if (!(reply->flags & REDISMODULE_REPLYFLAG_NESTED)) {
+ if (reply->proto) sdsfree(reply->proto);
+ zfree(reply);
+ }
+}
+
+/* Wrapper for the recursive free reply function. This is needed in order
+ * to have the first level function to return on nested replies, but only
+ * if called by the module API. */
+void RM_FreeCallReply(RedisModuleCallReply *reply) {
+
+ RedisModuleCtx *ctx = reply->ctx;
+ RM_FreeCallReply_Rec(reply,0);
+ autoMemoryFreed(ctx,REDISMODULE_AM_REPLY,reply);
+}
+
+/* Return the reply type. */
+int RM_CallReplyType(RedisModuleCallReply *reply) {
+ if (!reply) return REDISMODULE_REPLY_UNKNOWN;
+ return reply->type;
+}
+
+/* Return the reply type length, where applicable. */
+size_t RM_CallReplyLength(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ switch(reply->type) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ case REDISMODULE_REPLY_ARRAY:
+ return reply->len;
+ default:
+ return 0;
+ }
+}
+
+/* Return the 'idx'-th nested call reply element of an array reply, or NULL
+ * if the reply type is wrong or the index is out of range. */
+RedisModuleCallReply *RM_CallReplyArrayElement(RedisModuleCallReply *reply, size_t idx) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_ARRAY) return NULL;
+ if (idx >= reply->len) return NULL;
+ return reply->val.array+idx;
+}
+
+/* Return the long long of an integer reply. */
+long long RM_CallReplyInteger(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_INTEGER) return LLONG_MIN;
+ return reply->val.ll;
+}
+
+/* Return the pointer and length of a string or error reply. */
+const char *RM_CallReplyStringPtr(RedisModuleCallReply *reply, size_t *len) {
+ moduleParseCallReply(reply);
+ if (reply->type != REDISMODULE_REPLY_STRING &&
+ reply->type != REDISMODULE_REPLY_ERROR) return NULL;
+ if (len) *len = reply->len;
+ return reply->val.str;
+}
+
+/* Return a new string object from a call reply of type string, error or
+ * integer. Otherwise (wrong reply type) return NULL. */
+RedisModuleString *RM_CreateStringFromCallReply(RedisModuleCallReply *reply) {
+ moduleParseCallReply(reply);
+ switch(reply->type) {
+ case REDISMODULE_REPLY_STRING:
+ case REDISMODULE_REPLY_ERROR:
+ return RM_CreateString(reply->ctx,reply->val.str,reply->len);
+ case REDISMODULE_REPLY_INTEGER: {
+ char buf[64];
+ int len = ll2string(buf,sizeof(buf),reply->val.ll);
+ return RM_CreateString(reply->ctx,buf,len);
+ }
+ default: return NULL;
+ }
+}
+
+/* Returns an array of robj pointers, and populates *argc with the number
+ * of items, by parsing the format specifier "fmt" as described for
+ * the RM_Call(), RM_Replicate() and other module APIs.
+ *
+ * The integer pointed by 'flags' is populated with flags according
+ * to special modifiers in "fmt". For now only one exists:
+ *
+ * "!" -> REDISMODULE_ARGV_REPLICATE
+ *
+ * On error (format specifier error) NULL is returned and nothing is
+ * allocated. On success the argument vector is returned. */
+
+#define REDISMODULE_ARGV_REPLICATE (1<<0)
+
+robj **moduleCreateArgvFromUserFormat(const char *cmdname, const char *fmt, int *argcp, int *flags, va_list ap) {
+ int argc = 0, argv_size, j;
+ robj **argv = NULL;
+
+ /* As a first guess to avoid useless reallocations, size argv to
+ * hold one argument for each char specifier in 'fmt'. */
+ argv_size = strlen(fmt)+1; /* +1 because of the command name. */
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ /* Build the arguments vector based on the format specifier. */
+ argv[0] = createStringObject(cmdname,strlen(cmdname));
+ argc++;
+
+ /* Create the client and dispatch the command. */
+ const char *p = fmt;
+ while(*p) {
+ if (*p == 'c') {
+ char *cstr = va_arg(ap,char*);
+ argv[argc++] = createStringObject(cstr,strlen(cstr));
+ } else if (*p == 's') {
+ robj *obj = va_arg(ap,void*);
+ argv[argc++] = obj;
+ incrRefCount(obj);
+ } else if (*p == 'b') {
+ char *buf = va_arg(ap,char*);
+ size_t len = va_arg(ap,size_t);
+ argv[argc++] = createStringObject(buf,len);
+ } else if (*p == 'l') {
+ long ll = va_arg(ap,long long);
+ argv[argc++] = createObject(OBJ_STRING,sdsfromlonglong(ll));
+ } else if (*p == 'v') {
+ /* A vector of strings */
+ robj **v = va_arg(ap, void*);
+ size_t vlen = va_arg(ap, size_t);
+
+ /* We need to grow argv to hold the vector's elements.
+ * We resize by vector_len-1 elements, because we held
+ * one element in argv for the vector already */
+ argv_size += vlen-1;
+ argv = zrealloc(argv,sizeof(robj*)*argv_size);
+
+ size_t i = 0;
+ for (i = 0; i < vlen; i++) {
+ incrRefCount(v[i]);
+ argv[argc++] = v[i];
+ }
+ } else if (*p == '!') {
+ if (flags) (*flags) |= REDISMODULE_ARGV_REPLICATE;
+ } else {
+ goto fmterr;
+ }
+ p++;
+ }
+ *argcp = argc;
+ return argv;
+
+fmterr:
+ for (j = 0; j < argc; j++)
+ decrRefCount(argv[j]);
+ zfree(argv);
+ return NULL;
+}
+
+/* Exported API to call any Redis command from modules.
+ * On success a RedisModuleCallReply object is returned, otherwise
+ * NULL is returned and errno is set to the following values:
+ *
+ * EINVAL: command non existing, wrong arity, wrong format specifier.
+ * EPERM: operation in Cluster instance with key in non local slot. */
+RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
+ struct redisCommand *cmd;
+ client *c = NULL;
+ robj **argv = NULL;
+ int argc = 0, flags = 0;
+ va_list ap;
+ RedisModuleCallReply *reply = NULL;
+ int replicate = 0; /* Replicate this command? */
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* Create the client and dispatch the command. */
+ va_start(ap, fmt);
+ c = createClient(-1);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ replicate = flags & REDISMODULE_ARGV_REPLICATE;
+ va_end(ap);
+
+ /* Setup our fake client for command execution. */
+ c->flags |= CLIENT_MODULE;
+ c->db = ctx->client->db;
+ c->argv = argv;
+ c->argc = argc;
+ c->cmd = c->lastcmd = cmd;
+ /* We handle the above format error only when the client is setup so that
+ * we can free it normally. */
+ if (argv == NULL) goto cleanup;
+
+ /* Basic arity checks. */
+ if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) {
+ errno = EINVAL;
+ goto cleanup;
+ }
+
+ /* If this is a Redis Cluster node, we need to make sure the module is not
+ * trying to access non-local keys, with the exception of commands
+ * received from our master. */
+ if (server.cluster_enabled && !(ctx->client->flags & CLIENT_MASTER)) {
+ /* Duplicate relevant flags in the module client. */
+ c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
+ c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING);
+ if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+ server.cluster->myself)
+ {
+ errno = EPERM;
+ goto cleanup;
+ }
+ }
+
+ /* If we are using single commands replication, we need to wrap what
+ * we propagate into a MULTI/EXEC block, so that it will be atomic like
+ * a Lua script in the context of AOF and slaves. */
+ if (replicate) moduleReplicateMultiIfNeeded(ctx);
+
+ /* Run the command */
+ int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
+ if (replicate) {
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c,call_flags);
+
+ /* Convert the result of the Redis command into a suitable Lua type.
+ * The first thing we need is to create a single string from the client
+ * output buffers. */
+ sds proto = sdsnewlen(c->buf,c->bufpos);
+ c->bufpos = 0;
+ while(listLength(c->reply)) {
+ clientReplyBlock *o = listNodeValue(listFirst(c->reply));
+
+ proto = sdscatlen(proto,o->buf,o->used);
+ listDelNode(c->reply,listFirst(c->reply));
+ }
+ reply = moduleCreateCallReplyFromProto(ctx,proto);
+ autoMemoryAdd(ctx,REDISMODULE_AM_REPLY,reply);
+
+cleanup:
+ freeClient(c);
+ return reply;
+}
+
+/* Return a pointer, and a length, to the protocol returned by the command
+ * that returned the reply object. */
+const char *RM_CallReplyProto(RedisModuleCallReply *reply, size_t *len) {
+ if (reply->proto) *len = sdslen(reply->proto);
+ return reply->proto;
+}
+
+/* --------------------------------------------------------------------------
+ * Modules data types
+ *
+ * When String DMA or using existing data structures is not enough, it is
+ * possible to create new data types from scratch and export them to
+ * Redis. The module must provide a set of callbacks for handling the
+ * new values exported (for example in order to provide RDB saving/loading,
+ * AOF rewrite, and so forth). In this section we define this API.
+ * -------------------------------------------------------------------------- */
+
+/* Turn a 9 chars name in the specified charset and a 10 bit encver into
+ * a single 64 bit unsigned integer that represents this exact module name
+ * and version. This final number is called a "type ID" and is used when
+ * writing module exported values to RDB files, in order to re-associate the
+ * value to the right module to load them during RDB loading.
+ *
+ * If the string is not of the right length or the charset is wrong, or
+ * if encver is outside the unsigned 10 bit integer range, 0 is returned,
+ * otherwise the function returns the right type ID.
+ *
+ * The resulting 64 bit integer is composed as follows:
+ *
+ * (high order bits) 6|6|6|6|6|6|6|6|6|10 (low order bits)
+ *
+ * The first 6 bits value is the first character, name[0], while the last
+ * 6 bits value, immediately before the 10 bits integer, is name[8].
+ * The last 10 bits are the encoding version.
+ *
+ * Note that a name and encver combo of "AAAAAAAAA" and 0, will produce
+ * zero as return value, that is the same we use to signal errors, thus
+ * this combination is invalid, and also useless since type names should
+ * try to be vary to avoid collisions. */
+
+const char *ModuleTypeNameCharSet =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_";
+
+uint64_t moduleTypeEncodeId(const char *name, int encver) {
+ /* We use 64 symbols so that we can map each character into 6 bits
+ * of the final output. */
+ const char *cset = ModuleTypeNameCharSet;
+ if (strlen(name) != 9) return 0;
+ if (encver < 0 || encver > 1023) return 0;
+
+ uint64_t id = 0;
+ for (int j = 0; j < 9; j++) {
+ char *p = strchr(cset,name[j]);
+ if (!p) return 0;
+ unsigned long pos = p-cset;
+ id = (id << 6) | pos;
+ }
+ id = (id << 10) | encver;
+ return id;
+}
+
+/* Search, in the list of exported data types of all the modules registered,
+ * a type with the same name as the one given. Returns the moduleType
+ * structure pointer if such a module is found, or NULL otherwise. */
+moduleType *moduleTypeLookupModuleByName(const char *name) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *mt = ln->value;
+ if (memcmp(name,mt->name,sizeof(mt->name)) == 0) {
+ dictReleaseIterator(di);
+ return mt;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+ return NULL;
+}
+
+/* Lookup a module by ID, with caching. This function is used during RDB
+ * loading. Modules exporting data types should never be able to unload, so
+ * our cache does not need to expire. */
+#define MODULE_LOOKUP_CACHE_SIZE 3
+
+moduleType *moduleTypeLookupModuleByID(uint64_t id) {
+ static struct {
+ uint64_t id;
+ moduleType *mt;
+ } cache[MODULE_LOOKUP_CACHE_SIZE];
+
+ /* Search in cache to start. */
+ int j;
+ for (j = 0; j < MODULE_LOOKUP_CACHE_SIZE && cache[j].mt != NULL; j++)
+ if (cache[j].id == id) return cache[j].mt;
+
+ /* Slow module by module lookup. */
+ moduleType *mt = NULL;
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ while ((de = dictNext(di)) != NULL && mt == NULL) {
+ struct RedisModule *module = dictGetVal(de);
+ listIter li;
+ listNode *ln;
+
+ listRewind(module->types,&li);
+ while((ln = listNext(&li))) {
+ moduleType *this_mt = ln->value;
+ /* Compare only the 54 bit module identifier and not the
+ * encoding version. */
+ if (this_mt->id >> 10 == id >> 10) {
+ mt = this_mt;
+ break;
+ }
+ }
+ }
+ dictReleaseIterator(di);
+
+ /* Add to cache if possible. */
+ if (mt && j < MODULE_LOOKUP_CACHE_SIZE) {
+ cache[j].id = id;
+ cache[j].mt = mt;
+ }
+ return mt;
+}
+
+/* Turn an (unresolved) module ID into a type name, to show the user an
+ * error when RDB files contain module data we can't load.
+ * The buffer pointed by 'name' must be 10 bytes at least. The function will
+ * fill it with a null terminated module name. */
+void moduleTypeNameByID(char *name, uint64_t moduleid) {
+ const char *cset = ModuleTypeNameCharSet;
+
+ name[9] = '\0';
+ char *p = name+8;
+ moduleid >>= 10;
+ for (int j = 0; j < 9; j++) {
+ *p-- = cset[moduleid & 63];
+ moduleid >>= 6;
+ }
+}
+
+/* Register a new data type exported by the module. The parameters are the
+ * following. Please for in depth documentation check the modules API
+ * documentation, especially the TYPES.md file.
+ *
+ * * **name**: A 9 characters data type name that MUST be unique in the Redis
+ * Modules ecosystem. Be creative... and there will be no collisions. Use
+ * the charset A-Z a-z 9-0, plus the two "-_" characters. A good
+ * idea is to use, for example `<typename>-<vendor>`. For example
+ * "tree-AntZ" may mean "Tree data structure by @antirez". To use both
+ * lower case and upper case letters helps in order to prevent collisions.
+ * * **encver**: Encoding version, which is, the version of the serialization
+ * that a module used in order to persist data. As long as the "name"
+ * matches, the RDB loading will be dispatched to the type callbacks
+ * whatever 'encver' is used, however the module can understand if
+ * the encoding it must load are of an older version of the module.
+ * For example the module "tree-AntZ" initially used encver=0. Later
+ * after an upgrade, it started to serialize data in a different format
+ * and to register the type with encver=1. However this module may
+ * still load old data produced by an older version if the rdb_load
+ * callback is able to check the encver value and act accordingly.
+ * The encver must be a positive value between 0 and 1023.
+ * * **typemethods_ptr** is a pointer to a RedisModuleTypeMethods structure
+ * that should be populated with the methods callbacks and structure
+ * version, like in the following example:
+ *
+ * RedisModuleTypeMethods tm = {
+ * .version = REDISMODULE_TYPE_METHOD_VERSION,
+ * .rdb_load = myType_RDBLoadCallBack,
+ * .rdb_save = myType_RDBSaveCallBack,
+ * .aof_rewrite = myType_AOFRewriteCallBack,
+ * .free = myType_FreeCallBack,
+ *
+ * // Optional fields
+ * .digest = myType_DigestCallBack,
+ * .mem_usage = myType_MemUsageCallBack,
+ * }
+ *
+ * * **rdb_load**: A callback function pointer that loads data from RDB files.
+ * * **rdb_save**: A callback function pointer that saves data to RDB files.
+ * * **aof_rewrite**: A callback function pointer that rewrites data as commands.
+ * * **digest**: A callback function pointer that is used for `DEBUG DIGEST`.
+ * * **free**: A callback function pointer that can free a type value.
+ *
+ * The **digest* and **mem_usage** methods should currently be omitted since
+ * they are not yet implemented inside the Redis modules core.
+ *
+ * Note: the module name "AAAAAAAAA" is reserved and produces an error, it
+ * happens to be pretty lame as well.
+ *
+ * If there is already a module registering a type with the same name,
+ * and if the module name or encver is invalid, NULL is returned.
+ * Otherwise the new type is registered into Redis, and a reference of
+ * type RedisModuleType is returned: the caller of the function should store
+ * this reference into a gobal variable to make future use of it in the
+ * modules type API, since a single module may register multiple types.
+ * Example code fragment:
+ *
+ * static RedisModuleType *BalancedTreeType;
+ *
+ * int RedisModule_OnLoad(RedisModuleCtx *ctx) {
+ * // some code here ...
+ * BalancedTreeType = RM_CreateDataType(...);
+ * }
+ */
+moduleType *RM_CreateDataType(RedisModuleCtx *ctx, const char *name, int encver, void *typemethods_ptr) {
+ uint64_t id = moduleTypeEncodeId(name,encver);
+ if (id == 0) return NULL;
+ if (moduleTypeLookupModuleByName(name) != NULL) return NULL;
+
+ long typemethods_version = ((long*)typemethods_ptr)[0];
+ if (typemethods_version == 0) return NULL;
+
+ struct typemethods {
+ uint64_t version;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ } *tms = (struct typemethods*) typemethods_ptr;
+
+ moduleType *mt = zcalloc(sizeof(*mt));
+ mt->id = id;
+ mt->module = ctx->module;
+ mt->rdb_load = tms->rdb_load;
+ mt->rdb_save = tms->rdb_save;
+ mt->aof_rewrite = tms->aof_rewrite;
+ mt->mem_usage = tms->mem_usage;
+ mt->digest = tms->digest;
+ mt->free = tms->free;
+ memcpy(mt->name,name,sizeof(mt->name));
+ listAddNodeTail(ctx->module->types,mt);
+ return mt;
+}
+
+/* If the key is open for writing, set the specified module type object
+ * as the value of the key, deleting the old value if any.
+ * On success REDISMODULE_OK is returned. If the key is not open for
+ * writing or there is an active iterator, REDISMODULE_ERR is returned. */
+int RM_ModuleTypeSetValue(RedisModuleKey *key, moduleType *mt, void *value) {
+ if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR;
+ RM_DeleteKey(key);
+ robj *o = createModuleObject(mt,value);
+ setKey(key->db,key->key,o);
+ decrRefCount(o);
+ key->value = o;
+ return REDISMODULE_OK;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the moduel type pointer of the value stored at key.
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+moduleType *RM_ModuleTypeGetType(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->type;
+}
+
+/* Assuming RedisModule_KeyType() returned REDISMODULE_KEYTYPE_MODULE on
+ * the key, returns the module type low-level value stored at key, as
+ * it was set by the user via RedisModule_ModuleTypeSet().
+ *
+ * If the key is NULL, is not associated with a module type, or is empty,
+ * then NULL is returned instead. */
+void *RM_ModuleTypeGetValue(RedisModuleKey *key) {
+ if (key == NULL ||
+ key->value == NULL ||
+ RM_KeyType(key) != REDISMODULE_KEYTYPE_MODULE) return NULL;
+ moduleValue *mv = key->value->ptr;
+ return mv->value;
+}
+
+/* --------------------------------------------------------------------------
+ * RDB loading and saving functions
+ * -------------------------------------------------------------------------- */
+
+/* Called when there is a load error in the context of a module. This cannot
+ * be recovered like for the built-in types. */
+void moduleRDBLoadError(RedisModuleIO *io) {
+ serverLog(LL_WARNING,
+ "Error loading data from RDB (short read or EOF). "
+ "Read performed by module '%s' about type '%s' "
+ "after reading '%llu' bytes of a value.",
+ io->type->module->name,
+ io->type->name,
+ (unsigned long long)io->bytes);
+ exit(1);
+}
+
+/* Save an unsigned 64 bit value into the RDB file. This function should only
+ * be called in the context of the rdb_save method of modules implementing new
+ * data types. */
+void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveLen(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Load an unsigned 64 bit value from the RDB file. This function should only
+ * be called in the context of the rdb_load method of modules implementing
+ * new data types. */
+uint64_t RM_LoadUnsigned(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_UINT) goto loaderr;
+ }
+ uint64_t value;
+ int retval = rdbLoadLenByRef(io->rio, NULL, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* Like RedisModule_SaveUnsigned() but for signed 64 bit values. */
+void RM_SaveSigned(RedisModuleIO *io, int64_t value) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.i = value;
+ RM_SaveUnsigned(io,conv.u);
+}
+
+/* Like RedisModule_LoadUnsigned() but for signed 64 bit values. */
+int64_t RM_LoadSigned(RedisModuleIO *io) {
+ union {uint64_t u; int64_t i;} conv;
+ conv.u = RM_LoadUnsigned(io);
+ return conv.i;
+}
+
+/* In the context of the rdb_save method of a module type, saves a
+ * string into the RDB file taking as input a RedisModuleString.
+ *
+ * The string can be later loaded with RedisModule_LoadString() or
+ * other Load family functions expecting a serialized string inside
+ * the RDB file. */
+void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) {
+ if (io->error) return;
+ /* Save opcode. */
+ ssize_t retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveStringObject(io->rio, s);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Like RedisModule_SaveString() but takes a raw C pointer and length
+ * as input. */
+void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) {
+ if (io->error) return;
+ /* Save opcode. */
+ ssize_t retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveRawString(io->rio, (unsigned char*)str,len);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* Implements RM_LoadString() and RM_LoadStringBuffer() */
+void *moduleLoadString(RedisModuleIO *io, int plain, size_t *lenptr) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_STRING) goto loaderr;
+ }
+ void *s = rdbGenericLoadStringObject(io->rio,
+ plain ? RDB_LOAD_PLAIN : RDB_LOAD_NONE, lenptr);
+ if (s == NULL) goto loaderr;
+ return s;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return NULL; /* Never reached. */
+}
+
+/* In the context of the rdb_load method of a module data type, loads a string
+ * from the RDB file, that was previously saved with RedisModule_SaveString()
+ * functions family.
+ *
+ * The returned string is a newly allocated RedisModuleString object, and
+ * the user should at some point free it with a call to RedisModule_FreeString().
+ *
+ * If the data structure does not store strings as RedisModuleString objects,
+ * the similar function RedisModule_LoadStringBuffer() could be used instead. */
+RedisModuleString *RM_LoadString(RedisModuleIO *io) {
+ return moduleLoadString(io,0,NULL);
+}
+
+/* Like RedisModule_LoadString() but returns an heap allocated string that
+ * was allocated with RedisModule_Alloc(), and can be resized or freed with
+ * RedisModule_Realloc() or RedisModule_Free().
+ *
+ * The size of the string is stored at '*lenptr' if not NULL.
+ * The returned string is not automatically NULL termianted, it is loaded
+ * exactly as it was stored inisde the RDB file. */
+char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) {
+ return moduleLoadString(io,1,lenptr);
+}
+
+/* In the context of the rdb_save method of a module data type, saves a double
+ * value to the RDB file. The double can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadDouble(). */
+void RM_SaveDouble(RedisModuleIO *io, double value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryDoubleValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * double value saved by RedisModule_SaveDouble(). */
+double RM_LoadDouble(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_DOUBLE) goto loaderr;
+ }
+ double value;
+ int retval = rdbLoadBinaryDoubleValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* In the context of the rdb_save method of a module data type, saves a float
+ * value to the RDB file. The float can be a valid number, a NaN or infinity.
+ * It is possible to load back the value with RedisModule_LoadFloat(). */
+void RM_SaveFloat(RedisModuleIO *io, float value) {
+ if (io->error) return;
+ /* Save opcode. */
+ int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ /* Save value. */
+ retval = rdbSaveBinaryFloatValue(io->rio, value);
+ if (retval == -1) goto saveerr;
+ io->bytes += retval;
+ return;
+
+saveerr:
+ io->error = 1;
+}
+
+/* In the context of the rdb_save method of a module data type, loads back the
+ * float value saved by RedisModule_SaveFloat(). */
+float RM_LoadFloat(RedisModuleIO *io) {
+ if (io->ver == 2) {
+ uint64_t opcode = rdbLoadLen(io->rio,NULL);
+ if (opcode != RDB_MODULE_OPCODE_FLOAT) goto loaderr;
+ }
+ float value;
+ int retval = rdbLoadBinaryFloatValue(io->rio, &value);
+ if (retval == -1) goto loaderr;
+ return value;
+
+loaderr:
+ moduleRDBLoadError(io);
+ return 0; /* Never reached. */
+}
+
+/* --------------------------------------------------------------------------
+ * Key digest API (DEBUG DIGEST interface for modules types)
+ * -------------------------------------------------------------------------- */
+
+/* Add a new element to the digest. This function can be called multiple times
+ * one element after the other, for all the elements that constitute a given
+ * data structure. The function call must be followed by the call to
+ * `RedisModule_DigestEndSequence` eventually, when all the elements that are
+ * always in a given order are added. See the Redis Modules data types
+ * documentation for more info. However this is a quick example that uses Redis
+ * data types as an example.
+ *
+ * To add a sequence of unordered elements (for example in the case of a Redis
+ * Set), the pattern to use is:
+ *
+ * foreach element {
+ * AddElement(element);
+ * EndSequence();
+ * }
+ *
+ * Because Sets are not ordered, so every element added has a position that
+ * does not depend from the other. However if instead our elements are
+ * ordered in pairs, like field-value pairs of an Hash, then one should
+ * use:
+ *
+ * foreach key,value {
+ * AddElement(key);
+ * AddElement(value);
+ * EndSquence();
+ * }
+ *
+ * Because the key and value will be always in the above order, while instead
+ * the single key-value pairs, can appear in any position into a Redis hash.
+ *
+ * A list of ordered elements would be implemented with:
+ *
+ * foreach element {
+ * AddElement(element);
+ * }
+ * EndSequence();
+ *
+ */
+void RM_DigestAddStringBuffer(RedisModuleDigest *md, unsigned char *ele, size_t len) {
+ mixDigest(md->o,ele,len);
+}
+
+/* Like `RedisModule_DigestAddStringBuffer()` but takes a long long as input
+ * that gets converted into a string before adding it to the digest. */
+void RM_DigestAddLongLong(RedisModuleDigest *md, long long ll) {
+ char buf[LONG_STR_SIZE];
+ size_t len = ll2string(buf,sizeof(buf),ll);
+ mixDigest(md->o,buf,len);
+}
+
+/* See the doucmnetation for `RedisModule_DigestAddElement()`. */
+void RM_DigestEndSequence(RedisModuleDigest *md) {
+ xorDigest(md->x,md->o,sizeof(md->o));
+ memset(md->o,0,sizeof(md->o));
+}
+
+/* --------------------------------------------------------------------------
+ * AOF API for modules data types
+ * -------------------------------------------------------------------------- */
+
+/* Emits a command into the AOF during the AOF rewriting process. This function
+ * is only called in the context of the aof_rewrite method of data types exported
+ * by a module. The command works exactly like RedisModule_Call() in the way
+ * the parameters are passed, but it does not return anything as the error
+ * handling is performed by Redis itself. */
+void RM_EmitAOF(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) {
+ if (io->error) return;
+ struct redisCommand *cmd;
+ robj **argv = NULL;
+ int argc = 0, flags = 0, j;
+ va_list ap;
+
+ cmd = lookupCommandByCString((char*)cmdname);
+ if (!cmd) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "emit unknown command '%s'",
+ io->type->name, cmdname);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Emit the arguments into the AOF in Redis protocol format. */
+ va_start(ap, fmt);
+ argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
+ va_end(ap);
+ if (argv == NULL) {
+ serverLog(LL_WARNING,
+ "Fatal: AOF method for module data type '%s' tried to "
+ "call RedisModule_EmitAOF() with wrong format specifiers '%s'",
+ io->type->name, fmt);
+ io->error = 1;
+ errno = EINVAL;
+ return;
+ }
+
+ /* Bulk count. */
+ if (!io->error && rioWriteBulkCount(io->rio,'*',argc) == 0)
+ io->error = 1;
+
+ /* Arguments. */
+ for (j = 0; j < argc; j++) {
+ if (!io->error && rioWriteBulkObject(io->rio,argv[j]) == 0)
+ io->error = 1;
+ decrRefCount(argv[j]);
+ }
+ zfree(argv);
+ return;
+}
+
+/* --------------------------------------------------------------------------
+ * IO context handling
+ * -------------------------------------------------------------------------- */
+
+RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) {
+ if (io->ctx) return io->ctx; /* Can't have more than one... */
+ RedisModuleCtx ctxtemplate = REDISMODULE_CTX_INIT;
+ io->ctx = zmalloc(sizeof(RedisModuleCtx));
+ *(io->ctx) = ctxtemplate;
+ io->ctx->module = io->type->module;
+ io->ctx->client = NULL;
+ return io->ctx;
+}
+
+/* --------------------------------------------------------------------------
+ * Logging
+ * -------------------------------------------------------------------------- */
+
+/* This is the low level function implementing both:
+ *
+ * RM_Log()
+ * RM_LogIOError()
+ *
+ */
+void RM_LogRaw(RedisModule *module, const char *levelstr, const char *fmt, va_list ap) {
+ char msg[LOG_MAX_LEN];
+ size_t name_len;
+ int level;
+
+ if (!strcasecmp(levelstr,"debug")) level = LL_DEBUG;
+ else if (!strcasecmp(levelstr,"verbose")) level = LL_VERBOSE;
+ else if (!strcasecmp(levelstr,"notice")) level = LL_NOTICE;
+ else if (!strcasecmp(levelstr,"warning")) level = LL_WARNING;
+ else level = LL_VERBOSE; /* Default. */
+
+ name_len = snprintf(msg, sizeof(msg),"<%s> ", module->name);
+ vsnprintf(msg + name_len, sizeof(msg) - name_len, fmt, ap);
+ serverLogRaw(level,msg);
+}
+
+/* Produces a log message to the standard Redis log, the format accepts
+ * printf-alike specifiers, while level is a string describing the log
+ * level to use when emitting the log, and must be one of the following:
+ *
+ * * "debug"
+ * * "verbose"
+ * * "notice"
+ * * "warning"
+ *
+ * If the specified log level is invalid, verbose is used by default.
+ * There is a fixed limit to the length of the log line this function is able
+ * to emit, this limit is not specified but is guaranteed to be more than
+ * a few lines of text.
+ */
+void RM_Log(RedisModuleCtx *ctx, const char *levelstr, const char *fmt, ...) {
+ if (!ctx->module) return; /* Can only log if module is initialized */
+
+ va_list ap;
+ va_start(ap, fmt);
+ RM_LogRaw(ctx->module,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* Log errors from RDB / AOF serialization callbacks.
+ *
+ * This function should be used when a callback is returning a critical
+ * error to the caller since cannot load or save the data for some
+ * critical reason. */
+void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ RM_LogRaw(io->type->module,levelstr,fmt,ap);
+ va_end(ap);
+}
+
+/* --------------------------------------------------------------------------
+ * Blocking clients from modules
+ * -------------------------------------------------------------------------- */
+
+/* Readable handler for the awake pipe. We do nothing here, the awake bytes
+ * will be actually read in a more appropriate place in the
+ * moduleHandleBlockedClients() function that is where clients are actually
+ * served. */
+void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
+ UNUSED(privdata);
+}
+
+/* This is called from blocked.c in order to unblock a client: may be called
+ * for multiple reasons while the client is in the middle of being blocked
+ * because the client is terminated, but is also called for cleanup when a
+ * client is unblocked in a clean way after replaying.
+ *
+ * What we do here is just to set the client to NULL in the redis module
+ * blocked client handle. This way if the client is terminated while there
+ * is a pending threaded operation involving the blocked client, we'll know
+ * that the client no longer exists and no reply callback should be called.
+ *
+ * The structure RedisModuleBlockedClient will be always deallocated when
+ * running the list of clients blocked by a module that need to be unblocked. */
+void unblockClientFromModule(client *c) {
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+
+ /* Call the disconnection callback if any. */
+ if (bc->disconnect_callback) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.blocked_privdata = bc->privdata;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ bc->disconnect_callback(&ctx,bc);
+ moduleFreeContext(&ctx);
+ }
+
+ bc->client = NULL;
+ /* Reset the client for a new query since, for blocking commands implemented
+ * into modules, we do not it immediately after the command returns (and
+ * the client blocks) in order to be still able to access the argument
+ * vector from callbacks. */
+ resetClient(c);
+}
+
+/* Block a client in the context of a blocking command, returning an handle
+ * which will be used, later, in order to unblock the client with a call to
+ * RedisModule_UnblockClient(). The arguments specify callback functions
+ * and a timeout after which the client is unblocked.
+ *
+ * The callbacks are called in the following contexts:
+ *
+ * reply_callback: called after a successful RedisModule_UnblockClient()
+ * call in order to reply to the client and unblock it.
+ *
+ * reply_timeout: called when the timeout is reached in order to send an
+ * error to the client.
+ *
+ * free_privdata: called in order to free the privata data that is passed
+ * by RedisModule_UnblockClient() call.
+ */
+RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms) {
+ client *c = ctx->client;
+ int islua = c->flags & CLIENT_LUA;
+ int ismulti = c->flags & CLIENT_MULTI;
+
+ c->bpop.module_blocked_handle = zmalloc(sizeof(RedisModuleBlockedClient));
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+
+ /* We need to handle the invalid operation of calling modules blocking
+ * commands from Lua or MULTI. We actually create an already aborted
+ * (client set to NULL) blocked client handle, and actually reply with
+ * an error. */
+ bc->client = (islua || ismulti) ? NULL : c;
+ bc->module = ctx->module;
+ bc->reply_callback = reply_callback;
+ bc->timeout_callback = timeout_callback;
+ bc->disconnect_callback = NULL; /* Set by RM_SetDisconnectCallback() */
+ bc->free_privdata = free_privdata;
+ bc->privdata = NULL;
+ bc->reply_client = createClient(-1);
+ bc->reply_client->flags |= CLIENT_MODULE;
+ bc->dbid = c->db->id;
+ c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0;
+
+ if (islua || ismulti) {
+ c->bpop.module_blocked_handle = NULL;
+ addReplyError(c, islua ?
+ "Blocking module command called from Lua script" :
+ "Blocking module command called from transaction");
+ } else {
+ blockClient(c,BLOCKED_MODULE);
+ }
+ return bc;
+}
+
+/* Unblock a client blocked by `RedisModule_BlockedClient`. This will trigger
+ * the reply callbacks to be called in order to reply to the client.
+ * The 'privdata' argument will be accessible by the reply callback, so
+ * the caller of this function can pass any value that is needed in order to
+ * actually reply to the client.
+ *
+ * A common usage for 'privdata' is a thread that computes something that
+ * needs to be passed to the client, included but not limited some slow
+ * to compute reply or some reply obtained via networking.
+ *
+ * Note: this function can be called from threads spawned by the module. */
+int RM_UnblockClient(RedisModuleBlockedClient *bc, void *privdata) {
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ bc->privdata = privdata;
+ listAddNodeTail(moduleUnblockedClients,bc);
+ if (write(server.module_blocked_pipe[1],"A",1) != 1) {
+ /* Ignore the error, this is best-effort. */
+ }
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+ return REDISMODULE_OK;
+}
+
+/* Abort a blocked client blocking operation: the client will be unblocked
+ * without firing any callback. */
+int RM_AbortBlock(RedisModuleBlockedClient *bc) {
+ bc->reply_callback = NULL;
+ bc->disconnect_callback = NULL;
+ return RM_UnblockClient(bc,NULL);
+}
+
+/* Set a callback that will be called if a blocked client disconnects
+ * before the module has a chance to call RedisModule_UnblockClient()
+ *
+ * Usually what you want to do there, is to cleanup your module state
+ * so that you can call RedisModule_UnblockClient() safely, otherwise
+ * the client will remain blocked forever if the timeout is large.
+ *
+ * Notes:
+ *
+ * 1. It is not safe to call Reply* family functions here, it is also
+ * useless since the client is gone.
+ *
+ * 2. This callback is not called if the client disconnects because of
+ * a timeout. In such a case, the client is unblocked automatically
+ * and the timeout callback is called.
+ */
+void RM_SetDisconnectCallback(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback) {
+ bc->disconnect_callback = callback;
+}
+
+/* This function will check the moduleUnblockedClients queue in order to
+ * call the reply callback and really unblock the client.
+ *
+ * Clients end into this list because of calls to RM_UnblockClient(),
+ * however it is possible that while the module was doing work for the
+ * blocked client, it was terminated by Redis (for timeout or other reasons).
+ * When this happens the RedisModuleBlockedClient structure in the queue
+ * will have the 'client' field set to NULL. */
+void moduleHandleBlockedClients(void) {
+ listNode *ln;
+ RedisModuleBlockedClient *bc;
+
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ /* Here we unblock all the pending clients blocked in modules operations
+ * so we can read every pending "awake byte" in the pipe. */
+ char buf[1];
+ while (read(server.module_blocked_pipe[0],buf,1) == 1);
+ while (listLength(moduleUnblockedClients)) {
+ ln = listFirst(moduleUnblockedClients);
+ bc = ln->value;
+ client *c = bc->client;
+ listDelNode(moduleUnblockedClients,ln);
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+
+ /* Release the lock during the loop, as long as we don't
+ * touch the shared list. */
+
+ /* Call the reply callback if the client is valid and we have
+ * any callback. */
+ if (c && bc->reply_callback) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.flags |= REDISMODULE_CTX_BLOCKED_REPLY;
+ ctx.blocked_privdata = bc->privdata;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ bc->reply_callback(&ctx,(void**)c->argv,c->argc);
+ moduleHandlePropagationAfterCommandCallback(&ctx);
+ moduleFreeContext(&ctx);
+ }
+
+ /* Free privdata if any. */
+ if (bc->privdata && bc->free_privdata) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ if (c == NULL)
+ ctx.flags |= REDISMODULE_CTX_BLOCKED_DISCONNECTED;
+ ctx.blocked_privdata = bc->privdata;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ bc->free_privdata(&ctx,bc->privdata);
+ moduleFreeContext(&ctx);
+ }
+
+ /* It is possible that this blocked client object accumulated
+ * replies to send to the client in a thread safe context.
+ * We need to glue such replies to the client output buffer and
+ * free the temporary client we just used for the replies. */
+ if (c) {
+ if (bc->reply_client->bufpos)
+ addReplyString(c,bc->reply_client->buf,
+ bc->reply_client->bufpos);
+ if (listLength(bc->reply_client->reply))
+ listJoin(c->reply,bc->reply_client->reply);
+ c->reply_bytes += bc->reply_client->reply_bytes;
+ }
+ freeClient(bc->reply_client);
+
+ if (c != NULL) {
+ /* Before unblocking the client, set the disconnect callback
+ * to NULL, because if we reached this point, the client was
+ * properly unblocked by the module. */
+ bc->disconnect_callback = NULL;
+ unblockClient(c);
+ /* Put the client in the list of clients that need to write
+ * if there are pending replies here. This is needed since
+ * during a non blocking command the client may receive output. */
+ if (clientHasPendingReplies(c) &&
+ !(c->flags & CLIENT_PENDING_WRITE))
+ {
+ c->flags |= CLIENT_PENDING_WRITE;
+ listAddNodeHead(server.clients_pending_write,c);
+ }
+ }
+
+ /* Free 'bc' only after unblocking the client, since it is
+ * referenced in the client blocking context, and must be valid
+ * when calling unblockClient(). */
+ zfree(bc);
+
+ /* Lock again before to iterate the loop. */
+ pthread_mutex_lock(&moduleUnblockedClientsMutex);
+ }
+ pthread_mutex_unlock(&moduleUnblockedClientsMutex);
+}
+
+/* Called when our client timed out. After this function unblockClient()
+ * is called, and it will invalidate the blocked client. So this function
+ * does not need to do any cleanup. Eventually the module will call the
+ * API to unblock the client and the memory will be released. */
+void moduleBlockedClientTimedOut(client *c) {
+ RedisModuleBlockedClient *bc = c->bpop.module_blocked_handle;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.flags |= REDISMODULE_CTX_BLOCKED_TIMEOUT;
+ ctx.module = bc->module;
+ ctx.client = bc->client;
+ ctx.blocked_client = bc;
+ bc->timeout_callback(&ctx,(void**)c->argv,c->argc);
+ moduleFreeContext(&ctx);
+ /* For timeout events, we do not want to call the disconnect callback,
+ * because the blocekd client will be automatically disconnected in
+ * this case, and the user can still hook using the timeout callback. */
+ bc->disconnect_callback = NULL;
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client. */
+int RM_IsBlockedReplyRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_REPLY) != 0;
+}
+
+/* Return non-zero if a module command was called in order to fill the
+ * reply for a blocked client that timed out. */
+int RM_IsBlockedTimeoutRequest(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_TIMEOUT) != 0;
+}
+
+/* Get the privata data set by RedisModule_UnblockClient() */
+void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) {
+ return ctx->blocked_privdata;
+}
+
+/* Get the blocked client associated with a given context.
+ * This is useful in the reply and timeout callbacks of blocked clients,
+ * before sometimes the module has the blocked client handle references
+ * around, and wants to cleanup it. */
+RedisModuleBlockedClient *RM_GetBlockedClientHandle(RedisModuleCtx *ctx) {
+ return ctx->blocked_client;
+}
+
+/* Return true if when the free callback of a blocked client is called,
+ * the reason for the client to be unblocked is that it disconnected
+ * while it was blocked. */
+int RM_BlockedClientDisconnected(RedisModuleCtx *ctx) {
+ return (ctx->flags & REDISMODULE_CTX_BLOCKED_DISCONNECTED) != 0;
+}
+
+/* --------------------------------------------------------------------------
+ * Thread Safe Contexts
+ * -------------------------------------------------------------------------- */
+
+/* Return a context which can be used inside threads to make Redis context
+ * calls with certain modules APIs. If 'bc' is not NULL then the module will
+ * be bound to a blocked client, and it will be possible to use the
+ * `RedisModule_Reply*` family of functions to accumulate a reply for when the
+ * client will be unblocked. Otherwise the thread safe context will be
+ * detached by a specific client.
+ *
+ * To call non-reply APIs, the thread safe context must be prepared with:
+ *
+ * RedisModule_ThreadSafeCallStart(ctx);
+ * ... make your call here ...
+ * RedisModule_ThreadSafeCallStop(ctx);
+ *
+ * This is not needed when using `RedisModule_Reply*` functions, assuming
+ * that a blocked client was used when the context was created, otherwise
+ * no RedisModule_Reply* call should be made at all.
+ *
+ * TODO: thread safe contexts do not inherit the blocked client
+ * selected database. */
+RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) {
+ RedisModuleCtx *ctx = zmalloc(sizeof(*ctx));
+ RedisModuleCtx empty = REDISMODULE_CTX_INIT;
+ memcpy(ctx,&empty,sizeof(empty));
+ if (bc) {
+ ctx->blocked_client = bc;
+ ctx->module = bc->module;
+ }
+ ctx->flags |= REDISMODULE_CTX_THREAD_SAFE;
+ /* Even when the context is associated with a blocked client, we can't
+ * access it safely from another thread, so we create a fake client here
+ * in order to keep things like the currently selected database and similar
+ * things. */
+ ctx->client = createClient(-1);
+ if (bc) selectDb(ctx->client,bc->dbid);
+ return ctx;
+}
+
+/* Release a thread safe context. */
+void RM_FreeThreadSafeContext(RedisModuleCtx *ctx) {
+ moduleFreeContext(ctx);
+ zfree(ctx);
+}
+
+/* Acquire the server lock before executing a thread safe API call.
+ * This is not needed for `RedisModule_Reply*` calls when there is
+ * a blocked client connected to the thread safe context. */
+void RM_ThreadSafeContextLock(RedisModuleCtx *ctx) {
+ UNUSED(ctx);
+ moduleAcquireGIL();
+}
+
+/* Release the server lock after a thread safe API call was executed. */
+void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) {
+ UNUSED(ctx);
+ moduleReleaseGIL();
+}
+
+void moduleAcquireGIL(void) {
+ pthread_mutex_lock(&moduleGIL);
+}
+
+void moduleReleaseGIL(void) {
+ pthread_mutex_unlock(&moduleGIL);
+}
+
+
+/* --------------------------------------------------------------------------
+ * Module Keyspace Notifications API
+ * -------------------------------------------------------------------------- */
+
+/* Subscribe to keyspace notifications. This is a low-level version of the
+ * keyspace-notifications API. A module can register callbacks to be notified
+ * when keyspce events occur.
+ *
+ * Notification events are filtered by their type (string events, set events,
+ * etc), and the subscriber callback receives only events that match a specific
+ * mask of event types.
+ *
+ * When subscribing to notifications with RedisModule_SubscribeToKeyspaceEvents
+ * the module must provide an event type-mask, denoting the events the subscriber
+ * is interested in. This can be an ORed mask of any of the following flags:
+ *
+ * - REDISMODULE_NOTIFY_GENERIC: Generic commands like DEL, EXPIRE, RENAME
+ * - REDISMODULE_NOTIFY_STRING: String events
+ * - REDISMODULE_NOTIFY_LIST: List events
+ * - REDISMODULE_NOTIFY_SET: Set events
+ * - REDISMODULE_NOTIFY_HASH: Hash events
+ * - REDISMODULE_NOTIFY_ZSET: Sorted Set events
+ * - REDISMODULE_NOTIFY_EXPIRED: Expiration events
+ * - REDISMODULE_NOTIFY_EVICTED: Eviction events
+ * - REDISMODULE_NOTIFY_STREAM: Stream events
+ * - REDISMODULE_NOTIFY_ALL: All events
+ *
+ * We do not distinguish between key events and keyspace events, and it is up
+ * to the module to filter the actions taken based on the key.
+ *
+ * The subscriber signature is:
+ *
+ * int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type,
+ * const char *event,
+ * RedisModuleString *key);
+ *
+ * `type` is the event type bit, that must match the mask given at registration
+ * time. The event string is the actual command being executed, and key is the
+ * relevant Redis key.
+ *
+ * Notification callback gets executed with a redis context that can not be
+ * used to send anything to the client, and has the db number where the event
+ * occurred as its selected db number.
+ *
+ * Notice that it is not necessary to enable notifications in redis.conf for
+ * module notifications to work.
+ *
+ * Warning: the notification callbacks are performed in a synchronous manner,
+ * so notification callbacks must to be fast, or they would slow Redis down.
+ * If you need to take long actions, use threads to offload them.
+ *
+ * See https://redis.io/topics/notifications for more information.
+ */
+int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc callback) {
+ RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub));
+ sub->module = ctx->module;
+ sub->event_mask = types;
+ sub->notify_callback = callback;
+ sub->active = 0;
+
+ listAddNodeTail(moduleKeyspaceSubscribers, sub);
+ return REDISMODULE_OK;
+}
+
+/* Dispatcher for keyspace notifications to module subscriber functions.
+ * This gets called only if at least one module requested to be notified on
+ * keyspace notifications */
+void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) {
+ /* Don't do anything if there aren't any subscribers */
+ if (listLength(moduleKeyspaceSubscribers) == 0) return;
+
+ listIter li;
+ listNode *ln;
+ listRewind(moduleKeyspaceSubscribers,&li);
+
+ /* Remove irrelevant flags from the type mask */
+ type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE);
+
+ while((ln = listNext(&li))) {
+ RedisModuleKeyspaceSubscriber *sub = ln->value;
+ /* Only notify subscribers on events matching they registration,
+ * and avoid subscribers triggering themselves */
+ if ((sub->event_mask & type) && sub->active == 0) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.module = sub->module;
+ ctx.client = moduleKeyspaceSubscribersClient;
+ selectDb(ctx.client, dbid);
+
+ /* mark the handler as active to avoid reentrant loops.
+ * If the subscriber performs an action triggering itself,
+ * it will not be notified about it. */
+ sub->active = 1;
+ sub->notify_callback(&ctx, type, event, key);
+ sub->active = 0;
+ moduleFreeContext(&ctx);
+ }
+ }
+}
+
+/* Unsubscribe any notification subscribers this module has upon unloading */
+void moduleUnsubscribeNotifications(RedisModule *module) {
+ listIter li;
+ listNode *ln;
+ listRewind(moduleKeyspaceSubscribers,&li);
+ while((ln = listNext(&li))) {
+ RedisModuleKeyspaceSubscriber *sub = ln->value;
+ if (sub->module == module) {
+ listDelNode(moduleKeyspaceSubscribers, ln);
+ zfree(sub);
+ }
+ }
+}
+
+/* --------------------------------------------------------------------------
+ * Modules Cluster API
+ * -------------------------------------------------------------------------- */
+
+/* The Cluster message callback function pointer type. */
+typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len);
+
+/* This structure identifies a registered caller: it must match a given module
+ * ID, for a given message type. The callback function is just the function
+ * that was registered as receiver. */
+typedef struct moduleClusterReceiver {
+ uint64_t module_id;
+ RedisModuleClusterMessageReceiver callback;
+ struct RedisModule *module;
+ struct moduleClusterReceiver *next;
+} moduleClusterReceiver;
+
+typedef struct moduleClusterNodeInfo {
+ int flags;
+ char ip[NET_IP_STR_LEN];
+ int port;
+ char master_id[40]; /* Only if flags & REDISMODULE_NODE_MASTER is true. */
+} mdouleClusterNodeInfo;
+
+/* We have an array of message types: each bucket is a linked list of
+ * configured receivers. */
+static moduleClusterReceiver *clusterReceivers[UINT8_MAX];
+
+/* Dispatch the message to the right module receiver. */
+void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ moduleClusterReceiver *r = clusterReceivers[type];
+ while(r) {
+ if (r->module_id == module_id) {
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+ ctx.module = r->module;
+ r->callback(&ctx,sender_id,type,payload,len);
+ moduleFreeContext(&ctx);
+ return;
+ }
+ r = r->next;
+ }
+}
+
+/* Register a callback receiver for cluster messages of type 'type'. If there
+ * was already a registered callback, this will replace the callback function
+ * with the one provided, otherwise if the callback is set to NULL and there
+ * is already a callback for this function, the callback is unregistered
+ * (so this API call is also used in order to delete the receiver). */
+void RM_RegisterClusterMessageReceiver(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) {
+ if (!server.cluster_enabled) return;
+
+ uint64_t module_id = moduleTypeEncodeId(ctx->module->name,0);
+ moduleClusterReceiver *r = clusterReceivers[type], *prev = NULL;
+ while(r) {
+ if (r->module_id == module_id) {
+ /* Found! Set or delete. */
+ if (callback) {
+ r->callback = callback;
+ } else {
+ /* Delete the receiver entry if the user is setting
+ * it to NULL. Just unlink the receiver node from the
+ * linked list. */
+ if (prev)
+ prev->next = r->next;
+ else
+ clusterReceivers[type]->next = r->next;
+ zfree(r);
+ }
+ return;
+ }
+ prev = r;
+ r = r->next;
+ }
+
+ /* Not found, let's add it. */
+ if (callback) {
+ r = zmalloc(sizeof(*r));
+ r->module_id = module_id;
+ r->module = ctx->module;
+ r->callback = callback;
+ r->next = clusterReceivers[type];
+ clusterReceivers[type] = r;
+ }
+}
+
+/* Send a message to all the nodes in the cluster if `target` is NULL, otherwise
+ * at the specified target, which is a REDISMODULE_NODE_ID_LEN bytes node ID, as
+ * returned by the receiver callback or by the nodes iteration functions.
+ *
+ * The function returns REDISMODULE_OK if the message was successfully sent,
+ * otherwise if the node is not connected or such node ID does not map to any
+ * known cluster node, REDISMODULE_ERR is returned. */
+int RM_SendClusterMessage(RedisModuleCtx *ctx, char *target_id, uint8_t type, unsigned char *msg, uint32_t len) {
+ if (!server.cluster_enabled) return REDISMODULE_ERR;
+ uint64_t module_id = moduleTypeEncodeId(ctx->module->name,0);
+ if (clusterSendModuleMessageToTarget(target_id,module_id,type,msg,len) == C_OK)
+ return REDISMODULE_OK;
+ else
+ return REDISMODULE_ERR;
+}
+
+/* Return an array of string pointers, each string pointer points to a cluster
+ * node ID of exactly REDISMODULE_NODE_ID_SIZE bytes (without any null term).
+ * The number of returned node IDs is stored into `*numnodes`.
+ * However if this function is called by a module not running an a Redis
+ * instance with Redis Cluster enabled, NULL is returned instead.
+ *
+ * The IDs returned can be used with RedisModule_GetClusterNodeInfo() in order
+ * to get more information about single nodes.
+ *
+ * The array returned by this function must be freed using the function
+ * RedisModule_FreeClusterNodesList().
+ *
+ * Example:
+ *
+ * size_t count, j;
+ * char **ids = RedisModule_GetClusterNodesList(ctx,&count);
+ * for (j = 0; j < count; j++) {
+ * RedisModule_Log("notice","Node %.*s",
+ * REDISMODULE_NODE_ID_LEN,ids[j]);
+ * }
+ * RedisModule_FreeClusterNodesList(ids);
+ */
+char **RM_GetClusterNodesList(RedisModuleCtx *ctx, size_t *numnodes) {
+ UNUSED(ctx);
+
+ if (!server.cluster_enabled) return NULL;
+ size_t count = dictSize(server.cluster->nodes);
+ char **ids = zmalloc((count+1)*REDISMODULE_NODE_ID_LEN);
+ dictIterator *di = dictGetIterator(server.cluster->nodes);
+ dictEntry *de;
+ int j = 0;
+ while((de = dictNext(di)) != NULL) {
+ clusterNode *node = dictGetVal(de);
+ if (node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE)) continue;
+ ids[j] = zmalloc(REDISMODULE_NODE_ID_LEN);
+ memcpy(ids[j],node->name,REDISMODULE_NODE_ID_LEN);
+ j++;
+ }
+ *numnodes = j;
+ ids[j] = NULL; /* Null term so that FreeClusterNodesList does not need
+ * to also get the count argument. */
+ dictReleaseIterator(di);
+ return ids;
+}
+
+/* Free the node list obtained with RedisModule_GetClusterNodesList. */
+void RM_FreeClusterNodesList(char **ids) {
+ if (ids == NULL) return;
+ for (int j = 0; ids[j]; j++) zfree(ids[j]);
+ zfree(ids);
+}
+
+/* Return this node ID (REDISMODULE_CLUSTER_ID_LEN bytes) or NULL if the cluster
+ * is disabled. */
+const char *RM_GetMyClusterID(void) {
+ if (!server.cluster_enabled) return NULL;
+ return server.cluster->myself->name;
+}
+
+/* Return the number of nodes in the cluster, regardless of their state
+ * (handshake, noaddress, ...) so that the number of active nodes may actually
+ * be smaller, but not greater than this number. If the instance is not in
+ * cluster mode, zero is returned. */
+size_t RM_GetClusterSize(void) {
+ if (!server.cluster_enabled) return 0;
+ return dictSize(server.cluster->nodes);
+}
+
+/* Populate the specified info for the node having as ID the specified 'id',
+ * then returns REDISMODULE_OK. Otherwise if the node ID does not exist from
+ * the POV of this local node, REDISMODULE_ERR is returned.
+ *
+ * The arguments ip, master_id, port and flags can be NULL in case we don't
+ * need to populate back certain info. If an ip and master_id (only populated
+ * if the instance is a slave) are specified, they point to buffers holding
+ * at least REDISMODULE_NODE_ID_LEN bytes. The strings written back as ip
+ * and master_id are not null terminated.
+ *
+ * The list of flags reported is the following:
+ *
+ * * REDISMODULE_NODE_MYSELF This node
+ * * REDISMODULE_NODE_MASTER The node is a master
+ * * REDISMODULE_NODE_SLAVE The ndoe is a slave
+ * * REDISMODULE_NODE_PFAIL We see the node as failing
+ * * REDISMODULE_NODE_FAIL The cluster agrees the node is failing
+ * * REDISMODULE_NODE_NOFAILOVER The slave is configured to never failover
+ */
+
+clusterNode *clusterLookupNode(const char *name); /* We need access to internals */
+
+int RM_GetClusterNodeInfo(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) {
+ UNUSED(ctx);
+
+ clusterNode *node = clusterLookupNode(id);
+ if (node->flags & (CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
+ return REDISMODULE_ERR;
+
+ if (ip) memcpy(ip,node->name,REDISMODULE_NODE_ID_LEN);
+
+ if (master_id) {
+ /* If the information is not available, the function will set the
+ * field to zero bytes, so that when the field can't be populated the
+ * function kinda remains predictable. */
+ if (node->flags & CLUSTER_NODE_MASTER && node->slaveof)
+ memcpy(master_id,node->slaveof->name,REDISMODULE_NODE_ID_LEN);
+ else
+ memset(master_id,0,REDISMODULE_NODE_ID_LEN);
+ }
+ if (port) *port = node->port;
+
+ /* As usually we have to remap flags for modules, in order to ensure
+ * we can provide binary compatibility. */
+ if (flags) {
+ *flags = 0;
+ if (node->flags & CLUSTER_NODE_MYSELF) *flags |= REDISMODULE_NODE_MYSELF;
+ if (node->flags & CLUSTER_NODE_MASTER) *flags |= REDISMODULE_NODE_MASTER;
+ if (node->flags & CLUSTER_NODE_SLAVE) *flags |= REDISMODULE_NODE_SLAVE;
+ if (node->flags & CLUSTER_NODE_PFAIL) *flags |= REDISMODULE_NODE_PFAIL;
+ if (node->flags & CLUSTER_NODE_FAIL) *flags |= REDISMODULE_NODE_FAIL;
+ if (node->flags & CLUSTER_NODE_NOFAILOVER) *flags |= REDISMODULE_NODE_NOFAILOVER;
+ }
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Modules Timers API
+ *
+ * Module timers are an high precision "green timers" abstraction where
+ * every module can register even millions of timers without problems, even if
+ * the actual event loop will just have a single timer that is used to awake the
+ * module timers subsystem in order to process the next event.
+ *
+ * All the timers are stored into a radix tree, ordered by expire time, when
+ * the main Redis event loop timer callback is called, we try to process all
+ * the timers already expired one after the other. Then we re-enter the event
+ * loop registering a timer that will expire when the next to process module
+ * timer will expire.
+ *
+ * Every time the list of active timers drops to zero, we unregister the
+ * main event loop timer, so that there is no overhead when such feature is
+ * not used.
+ * -------------------------------------------------------------------------- */
+
+static rax *Timers; /* The radix tree of all the timers sorted by expire. */
+long long aeTimer = -1; /* Main event loop (ae.c) timer identifier. */
+
+typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data);
+
+/* The timer descriptor, stored as value in the radix tree. */
+typedef struct RedisModuleTimer {
+ RedisModule *module; /* Module reference. */
+ RedisModuleTimerProc callback; /* The callback to invoke on expire. */
+ void *data; /* Private data for the callback. */
+} RedisModuleTimer;
+
+/* This is the timer handler that is called by the main event loop. We schedule
+ * this timer to be called when the nearest of our module timers will expire. */
+int moduleTimerHandler(struct aeEventLoop *eventLoop, long long id, void *clientData) {
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
+
+ /* To start let's try to fire all the timers already expired. */
+ raxIterator ri;
+ raxStart(&ri,Timers);
+ uint64_t now = ustime();
+ long long next_period = 0;
+ while(1) {
+ raxSeek(&ri,"^",NULL,0);
+ if (!raxNext(&ri)) break;
+ uint64_t expiretime;
+ memcpy(&expiretime,ri.key,sizeof(expiretime));
+ expiretime = ntohu64(expiretime);
+ if (now >= expiretime) {
+ RedisModuleTimer *timer = ri.data;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ ctx.module = timer->module;
+ timer->callback(&ctx,timer->data);
+ moduleFreeContext(&ctx);
+ raxRemove(Timers,(unsigned char*)ri.key,ri.key_len,NULL);
+ zfree(timer);
+ } else {
+ next_period = (expiretime-now)/1000; /* Scale to milliseconds. */
+ break;
+ }
+ }
+ raxStop(&ri);
+
+ /* Reschedule the next timer or cancel it. */
+ if (next_period <= 0) next_period = 1;
+ return (raxSize(Timers) > 0) ? next_period : AE_NOMORE;
+}
+
+/* Create a new timer that will fire after `period` milliseconds, and will call
+ * the specified function using `data` as argument. The returned timer ID can be
+ * used to get information from the timer or to stop it before it fires. */
+RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisModuleTimerProc callback, void *data) {
+ RedisModuleTimer *timer = zmalloc(sizeof(*timer));
+ timer->module = ctx->module;
+ timer->callback = callback;
+ timer->data = data;
+ uint64_t expiretime = ustime()+period*1000;
+ uint64_t key;
+
+ while(1) {
+ key = htonu64(expiretime);
+ if (raxFind(Timers, (unsigned char*)&key,sizeof(key)) == raxNotFound) {
+ raxInsert(Timers,(unsigned char*)&key,sizeof(key),timer,NULL);
+ break;
+ } else {
+ expiretime++;
+ }
+ }
+
+ /* We need to install the main event loop timer if it's not already
+ * installed, or we may need to refresh its period if we just installed
+ * a timer that will expire sooner than any other else. */
+ if (aeTimer != -1) {
+ raxIterator ri;
+ raxStart(&ri,Timers);
+ raxSeek(&ri,"^",NULL,0);
+ raxNext(&ri);
+ if (memcmp(ri.key,&key,sizeof(key)) == 0) {
+ /* This is the first key, we need to re-install the timer according
+ * to the just added event. */
+ aeDeleteTimeEvent(server.el,aeTimer);
+ aeTimer = -1;
+ }
+ raxStop(&ri);
+ }
+
+ /* If we have no main timer (the old one was invalidated, or this is the
+ * first module timer we have), install one. */
+ if (aeTimer == -1)
+ aeTimer = aeCreateTimeEvent(server.el,period,moduleTimerHandler,NULL,NULL);
+
+ return key;
+}
+
+/* Stop a timer, returns REDISMODULE_OK if the timer was found, belonged to the
+ * calling module, and was stoped, otherwise REDISMODULE_ERR is returned.
+ * If not NULL, the data pointer is set to the value of the data argument when
+ * the timer was created. */
+int RM_StopTimer(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) {
+ RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
+ if (timer == raxNotFound || timer->module != ctx->module)
+ return REDISMODULE_ERR;
+ if (data) *data = timer->data;
+ raxRemove(Timers,(unsigned char*)&id,sizeof(id),NULL);
+ zfree(timer);
+ return REDISMODULE_OK;
+}
+
+/* Obtain information about a timer: its remaining time before firing
+ * (in milliseconds), and the private data pointer associated with the timer.
+ * If the timer specified does not exist or belongs to a different module
+ * no information is returned and the function returns REDISMODULE_ERR, otherwise
+ * REDISMODULE_OK is returned. The argumnets remaining or data can be NULL if
+ * the caller does not need certain information. */
+int RM_GetTimerInfo(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data) {
+ RedisModuleTimer *timer = raxFind(Timers,(unsigned char*)&id,sizeof(id));
+ if (timer == raxNotFound || timer->module != ctx->module)
+ return REDISMODULE_ERR;
+ if (remaining) {
+ int64_t rem = ntohu64(id)-ustime();
+ if (rem < 0) rem = 0;
+ *remaining = rem/1000; /* Scale to milliseconds. */
+ }
+ if (data) *data = timer->data;
+ return REDISMODULE_OK;
+}
+
+/* --------------------------------------------------------------------------
+ * Modules utility APIs
+ * -------------------------------------------------------------------------- */
+
+/* Return random bytes using SHA1 in counter mode with a /dev/urandom
+ * initialized seed. This function is fast so can be used to generate
+ * many bytes without any effect on the operating system entropy pool.
+ * Currently this function is not thread safe. */
+void RM_GetRandomBytes(unsigned char *dst, size_t len) {
+ getRandomBytes(dst,len);
+}
+
+/* Like RedisModule_GetRandomBytes() but instead of setting the string to
+ * random bytes the string is set to random characters in the in the
+ * hex charset [0-9a-f]. */
+void RM_GetRandomHexChars(char *dst, size_t len) {
+ getRandomHexChars(dst,len);
+}
+
+/* --------------------------------------------------------------------------
+ * Modules API internals
+ * -------------------------------------------------------------------------- */
+
+/* server.moduleapi dictionary type. Only uses plain C strings since
+ * this gets queries from modules. */
+
+uint64_t dictCStringKeyHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
+}
+
+int dictCStringKeyCompare(void *privdata, const void *key1, const void *key2) {
+ UNUSED(privdata);
+ return strcmp(key1,key2) == 0;
+}
+
+dictType moduleAPIDictType = {
+ dictCStringKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictCStringKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+int moduleRegisterApi(const char *funcname, void *funcptr) {
+ return dictAdd(server.moduleapi, (char*)funcname, funcptr);
+}
+
+#define REGISTER_API(name) \
+ moduleRegisterApi("RedisModule_" #name, (void *)(unsigned long)RM_ ## name)
+
+/* Global initialization at Redis startup. */
+void moduleRegisterCoreAPI(void);
+
+void moduleInitModulesSystem(void) {
+ moduleUnblockedClients = listCreate();
+ server.loadmodule_queue = listCreate();
+ modules = dictCreate(&modulesDictType,NULL);
+
+ /* Set up the keyspace notification susbscriber list and static client */
+ moduleKeyspaceSubscribers = listCreate();
+ moduleKeyspaceSubscribersClient = createClient(-1);
+ moduleKeyspaceSubscribersClient->flags |= CLIENT_MODULE;
+
+ moduleRegisterCoreAPI();
+ if (pipe(server.module_blocked_pipe) == -1) {
+ serverLog(LL_WARNING,
+ "Can't create the pipe for module blocking commands: %s",
+ strerror(errno));
+ exit(1);
+ }
+ /* Make the pipe non blocking. This is just a best effort aware mechanism
+ * and we do not want to block not in the read nor in the write half. */
+ anetNonBlock(NULL,server.module_blocked_pipe[0]);
+ anetNonBlock(NULL,server.module_blocked_pipe[1]);
+
+ /* Create the timers radix tree. */
+ Timers = raxNew();
+
+ /* Our thread-safe contexts GIL must start with already locked:
+ * it is just unlocked when it's safe. */
+ pthread_mutex_lock(&moduleGIL);
+}
+
+/* Load all the modules in the server.loadmodule_queue list, which is
+ * populated by `loadmodule` directives in the configuration file.
+ * We can't load modules directly when processing the configuration file
+ * because the server must be fully initialized before loading modules.
+ *
+ * The function aborts the server on errors, since to start with missing
+ * modules is not considered sane: clients may rely on the existence of
+ * given commands, loading AOF also may need some modules to exist, and
+ * if this instance is a slave, it must understand commands from master. */
+void moduleLoadFromQueue(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.loadmodule_queue,&li);
+ while((ln = listNext(&li))) {
+ struct moduleLoadQueueEntry *loadmod = ln->value;
+ if (moduleLoad(loadmod->path,(void **)loadmod->argv,loadmod->argc)
+ == C_ERR)
+ {
+ serverLog(LL_WARNING,
+ "Can't load module from %s: server aborting",
+ loadmod->path);
+ exit(1);
+ }
+ }
+}
+
+void moduleFreeModuleStructure(struct RedisModule *module) {
+ listRelease(module->types);
+ sdsfree(module->name);
+ zfree(module);
+}
+
+void moduleUnregisterCommands(struct RedisModule *module) {
+ /* Unregister all the commands registered by this module. */
+ dictIterator *di = dictGetSafeIterator(server.commands);
+ dictEntry *de;
+ while ((de = dictNext(di)) != NULL) {
+ struct redisCommand *cmd = dictGetVal(de);
+ if (cmd->proc == RedisModuleCommandDispatcher) {
+ RedisModuleCommandProxy *cp =
+ (void*)(unsigned long)cmd->getkeys_proc;
+ sds cmdname = cp->rediscmd->name;
+ if (cp->module == module) {
+ dictDelete(server.commands,cmdname);
+ dictDelete(server.orig_commands,cmdname);
+ sdsfree(cmdname);
+ zfree(cp->rediscmd);
+ zfree(cp);
+ }
+ }
+ }
+ dictReleaseIterator(di);
+}
+
+/* Load a module and initialize it. On success C_OK is returned, otherwise
+ * C_ERR is returned. */
+int moduleLoad(const char *path, void **module_argv, int module_argc) {
+ int (*onload)(void *, void **, int);
+ void *handle;
+ RedisModuleCtx ctx = REDISMODULE_CTX_INIT;
+
+ handle = dlopen(path,RTLD_NOW|RTLD_LOCAL);
+ if (handle == NULL) {
+ serverLog(LL_WARNING, "Module %s failed to load: %s", path, dlerror());
+ return C_ERR;
+ }
+ onload = (int (*)(void *, void **, int))(unsigned long) dlsym(handle,"RedisModule_OnLoad");
+ if (onload == NULL) {
+ dlclose(handle);
+ serverLog(LL_WARNING,
+ "Module %s does not export RedisModule_OnLoad() "
+ "symbol. Module not loaded.",path);
+ return C_ERR;
+ }
+ if (onload((void*)&ctx,module_argv,module_argc) == REDISMODULE_ERR) {
+ if (ctx.module) {
+ moduleUnregisterCommands(ctx.module);
+ moduleFreeModuleStructure(ctx.module);
+ }
+ dlclose(handle);
+ serverLog(LL_WARNING,
+ "Module %s initialization failed. Module not loaded",path);
+ return C_ERR;
+ }
+
+ /* Redis module loaded! Register it. */
+ dictAdd(modules,ctx.module->name,ctx.module);
+ ctx.module->handle = handle;
+ serverLog(LL_NOTICE,"Module '%s' loaded from %s",ctx.module->name,path);
+ moduleFreeContext(&ctx);
+ return C_OK;
+}
+
+
+/* Unload the module registered with the specified name. On success
+ * C_OK is returned, otherwise C_ERR is returned and errno is set
+ * to the following values depending on the type of error:
+ *
+ * * ENONET: No such module having the specified name.
+ * * EBUSY: The module exports a new data type and can only be reloaded. */
+int moduleUnload(sds name) {
+ struct RedisModule *module = dictFetchValue(modules,name);
+
+ if (module == NULL) {
+ errno = ENOENT;
+ return REDISMODULE_ERR;
+ }
+
+ if (listLength(module->types)) {
+ errno = EBUSY;
+ return REDISMODULE_ERR;
+ }
+
+ moduleUnregisterCommands(module);
+
+ /* Remvoe any noification subscribers this module might have */
+ moduleUnsubscribeNotifications(module);
+
+ /* Unregister all the hooks. TODO: Yet no hooks support here. */
+
+ /* Unload the dynamic library. */
+ if (dlclose(module->handle) == -1) {
+ char *error = dlerror();
+ if (error == NULL) error = "Unknown error";
+ serverLog(LL_WARNING,"Error when trying to close the %s module: %s",
+ module->name, error);
+ }
+
+ /* Remove from list of modules. */
+ serverLog(LL_NOTICE,"Module %s unloaded",module->name);
+ dictDelete(modules,module->name);
+ module->name = NULL; /* The name was already freed by dictDelete(). */
+ moduleFreeModuleStructure(module);
+
+ return REDISMODULE_OK;
+}
+
+/* Redis MODULE command.
+ *
+ * MODULE LOAD <path> [args...] */
+void moduleCommand(client *c) {
+ char *subcmd = c->argv[1]->ptr;
+ if (c->argc == 2 && !strcasecmp(subcmd,"help")) {
+ const char *help[] = {
+"LIST -- Return a list of loaded modules.",
+"LOAD <path> [arg ...] -- Load a module library from <path>.",
+"UNLOAD <name> -- Unload a module.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else
+ if (!strcasecmp(subcmd,"load") && c->argc >= 3) {
+ robj **argv = NULL;
+ int argc = 0;
+
+ if (c->argc > 3) {
+ argc = c->argc - 3;
+ argv = &c->argv[3];
+ }
+
+ if (moduleLoad(c->argv[2]->ptr,(void **)argv,argc) == C_OK)
+ addReply(c,shared.ok);
+ else
+ addReplyError(c,
+ "Error loading the extension. Please check the server logs.");
+ } else if (!strcasecmp(subcmd,"unload") && c->argc == 3) {
+ if (moduleUnload(c->argv[2]->ptr) == C_OK)
+ addReply(c,shared.ok);
+ else {
+ char *errmsg;
+ switch(errno) {
+ case ENOENT:
+ errmsg = "no such module with that name";
+ break;
+ case EBUSY:
+ errmsg = "the module exports one or more module-side data types, can't unload";
+ break;
+ default:
+ errmsg = "operation not possible.";
+ break;
+ }
+ addReplyErrorFormat(c,"Error unloading module: %s",errmsg);
+ }
+ } else if (!strcasecmp(subcmd,"list") && c->argc == 2) {
+ dictIterator *di = dictGetIterator(modules);
+ dictEntry *de;
+
+ addReplyMultiBulkLen(c,dictSize(modules));
+ while ((de = dictNext(di)) != NULL) {
+ sds name = dictGetKey(de);
+ struct RedisModule *module = dictGetVal(de);
+ addReplyMultiBulkLen(c,4);
+ addReplyBulkCString(c,"name");
+ addReplyBulkCBuffer(c,name,sdslen(name));
+ addReplyBulkCString(c,"ver");
+ addReplyLongLong(c,module->ver);
+ }
+ dictReleaseIterator(di);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ return;
+ }
+}
+
+/* Return the number of registered modules. */
+size_t moduleCount(void) {
+ return dictSize(modules);
+}
+
+/* Register all the APIs we export. Keep this function at the end of the
+ * file so that's easy to seek it to add new entries. */
+void moduleRegisterCoreAPI(void) {
+ server.moduleapi = dictCreate(&moduleAPIDictType,NULL);
+ REGISTER_API(Alloc);
+ REGISTER_API(Calloc);
+ REGISTER_API(Realloc);
+ REGISTER_API(Free);
+ REGISTER_API(Strdup);
+ REGISTER_API(CreateCommand);
+ REGISTER_API(SetModuleAttribs);
+ REGISTER_API(IsModuleNameBusy);
+ REGISTER_API(WrongArity);
+ REGISTER_API(ReplyWithLongLong);
+ REGISTER_API(ReplyWithError);
+ REGISTER_API(ReplyWithSimpleString);
+ REGISTER_API(ReplyWithArray);
+ REGISTER_API(ReplySetArrayLength);
+ REGISTER_API(ReplyWithString);
+ REGISTER_API(ReplyWithStringBuffer);
+ REGISTER_API(ReplyWithNull);
+ REGISTER_API(ReplyWithCallReply);
+ REGISTER_API(ReplyWithDouble);
+ REGISTER_API(GetSelectedDb);
+ REGISTER_API(SelectDb);
+ REGISTER_API(OpenKey);
+ REGISTER_API(CloseKey);
+ REGISTER_API(KeyType);
+ REGISTER_API(ValueLength);
+ REGISTER_API(ListPush);
+ REGISTER_API(ListPop);
+ REGISTER_API(StringToLongLong);
+ REGISTER_API(StringToDouble);
+ REGISTER_API(Call);
+ REGISTER_API(CallReplyProto);
+ REGISTER_API(FreeCallReply);
+ REGISTER_API(CallReplyInteger);
+ REGISTER_API(CallReplyType);
+ REGISTER_API(CallReplyLength);
+ REGISTER_API(CallReplyArrayElement);
+ REGISTER_API(CallReplyStringPtr);
+ REGISTER_API(CreateStringFromCallReply);
+ REGISTER_API(CreateString);
+ REGISTER_API(CreateStringFromLongLong);
+ REGISTER_API(CreateStringFromString);
+ REGISTER_API(CreateStringPrintf);
+ REGISTER_API(FreeString);
+ REGISTER_API(StringPtrLen);
+ REGISTER_API(AutoMemory);
+ REGISTER_API(Replicate);
+ REGISTER_API(ReplicateVerbatim);
+ REGISTER_API(DeleteKey);
+ REGISTER_API(UnlinkKey);
+ REGISTER_API(StringSet);
+ REGISTER_API(StringDMA);
+ REGISTER_API(StringTruncate);
+ REGISTER_API(SetExpire);
+ REGISTER_API(GetExpire);
+ REGISTER_API(ZsetAdd);
+ REGISTER_API(ZsetIncrby);
+ REGISTER_API(ZsetScore);
+ REGISTER_API(ZsetRem);
+ REGISTER_API(ZsetRangeStop);
+ REGISTER_API(ZsetFirstInScoreRange);
+ REGISTER_API(ZsetLastInScoreRange);
+ REGISTER_API(ZsetFirstInLexRange);
+ REGISTER_API(ZsetLastInLexRange);
+ REGISTER_API(ZsetRangeCurrentElement);
+ REGISTER_API(ZsetRangeNext);
+ REGISTER_API(ZsetRangePrev);
+ REGISTER_API(ZsetRangeEndReached);
+ REGISTER_API(HashSet);
+ REGISTER_API(HashGet);
+ REGISTER_API(IsKeysPositionRequest);
+ REGISTER_API(KeyAtPos);
+ REGISTER_API(GetClientId);
+ REGISTER_API(GetContextFlags);
+ REGISTER_API(PoolAlloc);
+ REGISTER_API(CreateDataType);
+ REGISTER_API(ModuleTypeSetValue);
+ REGISTER_API(ModuleTypeGetType);
+ REGISTER_API(ModuleTypeGetValue);
+ REGISTER_API(SaveUnsigned);
+ REGISTER_API(LoadUnsigned);
+ REGISTER_API(SaveSigned);
+ REGISTER_API(LoadSigned);
+ REGISTER_API(SaveString);
+ REGISTER_API(SaveStringBuffer);
+ REGISTER_API(LoadString);
+ REGISTER_API(LoadStringBuffer);
+ REGISTER_API(SaveDouble);
+ REGISTER_API(LoadDouble);
+ REGISTER_API(SaveFloat);
+ REGISTER_API(LoadFloat);
+ REGISTER_API(EmitAOF);
+ REGISTER_API(Log);
+ REGISTER_API(LogIOError);
+ REGISTER_API(StringAppendBuffer);
+ REGISTER_API(RetainString);
+ REGISTER_API(StringCompare);
+ REGISTER_API(GetContextFromIO);
+ REGISTER_API(BlockClient);
+ REGISTER_API(UnblockClient);
+ REGISTER_API(IsBlockedReplyRequest);
+ REGISTER_API(IsBlockedTimeoutRequest);
+ REGISTER_API(GetBlockedClientPrivateData);
+ REGISTER_API(AbortBlock);
+ REGISTER_API(Milliseconds);
+ REGISTER_API(GetThreadSafeContext);
+ REGISTER_API(FreeThreadSafeContext);
+ REGISTER_API(ThreadSafeContextLock);
+ REGISTER_API(ThreadSafeContextUnlock);
+ REGISTER_API(DigestAddStringBuffer);
+ REGISTER_API(DigestAddLongLong);
+ REGISTER_API(DigestEndSequence);
+ REGISTER_API(SubscribeToKeyspaceEvents);
+ REGISTER_API(RegisterClusterMessageReceiver);
+ REGISTER_API(SendClusterMessage);
+ REGISTER_API(GetClusterNodeInfo);
+ REGISTER_API(GetClusterNodesList);
+ REGISTER_API(FreeClusterNodesList);
+ REGISTER_API(CreateTimer);
+ REGISTER_API(StopTimer);
+ REGISTER_API(GetTimerInfo);
+ REGISTER_API(GetMyClusterID);
+ REGISTER_API(GetClusterSize);
+ REGISTER_API(GetRandomBytes);
+ REGISTER_API(GetRandomHexChars);
+ REGISTER_API(BlockedClientDisconnected);
+ REGISTER_API(SetDisconnectCallback);
+ REGISTER_API(GetBlockedClientHandle);
+}
diff --git a/src/modules/.gitignore b/src/modules/.gitignore
new file mode 100644
index 000000000..4de1735ec
--- /dev/null
+++ b/src/modules/.gitignore
@@ -0,0 +1,2 @@
+*.so
+*.xo
diff --git a/src/modules/Makefile b/src/modules/Makefile
new file mode 100644
index 000000000..cffe68994
--- /dev/null
+++ b/src/modules/Makefile
@@ -0,0 +1,52 @@
+
+# find the OS
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+
+# Compile flags for linux / osx
+ifeq ($(uname_S),Linux)
+ SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -shared
+else
+ SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2
+ SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
+endif
+
+.SUFFIXES: .c .so .xo .o
+
+all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so
+
+.c.xo:
+ $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
+
+helloworld.xo: ../redismodule.h
+
+helloworld.so: helloworld.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellotype.xo: ../redismodule.h
+
+hellotype.so: hellotype.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+helloblock.xo: ../redismodule.h
+
+helloblock.so: helloblock.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lpthread -lc
+
+hellocluster.xo: ../redismodule.h
+
+hellocluster.so: hellocluster.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+hellotimer.xo: ../redismodule.h
+
+hellotimer.so: hellotimer.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+testmodule.xo: ../redismodule.h
+
+testmodule.so: testmodule.xo
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+
+clean:
+ rm -rf *.xo *.so
diff --git a/src/modules/gendoc.rb b/src/modules/gendoc.rb
new file mode 100644
index 000000000..ee6572884
--- /dev/null
+++ b/src/modules/gendoc.rb
@@ -0,0 +1,51 @@
+# gendoc.rb -- Converts the top-comments inside module.c to modules API
+# reference documentation in markdown format.
+
+# Convert the C comment to markdown
+def markdown(s)
+ s = s.gsub(/\*\/$/,"")
+ s = s.gsub(/^ \* {0,1}/,"")
+ s = s.gsub(/^\/\* /,"")
+ s.chop! while s[-1] == "\n" || s[-1] == " "
+ lines = s.split("\n")
+ newlines = []
+ lines.each{|l|
+ if l[0] != ' '
+ l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"}
+ l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"}
+ l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"}
+ end
+ newlines << l
+ }
+ return newlines.join("\n")
+end
+
+# Given the source code array and the index at which an exported symbol was
+# detected, extracts and outputs the documentation.
+def docufy(src,i)
+ m = /RM_[A-z0-9]+/.match(src[i])
+ name = m[0]
+ name = name.sub("RM_","RedisModule_")
+ proto = src[i].sub("{","").strip+";\n"
+ proto = proto.sub("RM_","RedisModule_")
+ puts "## `#{name}`\n\n"
+ puts " #{proto}\n"
+ comment = ""
+ while true
+ i = i-1
+ comment = src[i]+comment
+ break if src[i] =~ /\/\*/
+ end
+ comment = markdown(comment)
+ puts comment+"\n\n"
+end
+
+puts "# Modules API reference\n\n"
+src = File.open("../module.c").to_a
+src.each_with_index{|line,i|
+ if line =~ /RM_/ && line[0] != ' ' && line[0] != '#' && line[0] != '/'
+ if src[i-1] =~ /\*\//
+ docufy(src,i)
+ end
+ end
+}
diff --git a/src/modules/helloblock.c b/src/modules/helloblock.c
new file mode 100644
index 000000000..6bba17d33
--- /dev/null
+++ b/src/modules/helloblock.c
@@ -0,0 +1,219 @@
+/* Helloblock module -- An example of blocking command implementation
+ * with threads.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_EXPERIMENTAL_API
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+
+/* Reply callback for blocking command HELLO.BLOCK */
+int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
+ return RedisModule_ReplyWithLongLong(ctx,*myint);
+}
+
+/* Timeout callback for blocking command HELLO.BLOCK */
+int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
+}
+
+/* Private data freeing callback for HELLO.BLOCK command. */
+void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
+ REDISMODULE_NOT_USED(ctx);
+ RedisModule_Free(privdata);
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.BLOCK. */
+void *HelloBlock_ThreadMain(void *arg) {
+ void **targ = arg;
+ RedisModuleBlockedClient *bc = targ[0];
+ long long delay = (unsigned long)targ[1];
+ RedisModule_Free(targ);
+
+ sleep(delay);
+ int *r = RedisModule_Alloc(sizeof(int));
+ *r = rand();
+ RedisModule_UnblockClient(bc,r);
+ return NULL;
+}
+
+/* An example blocked client disconnection callback.
+ *
+ * Note that in the case of the HELLO.BLOCK command, the blocked client is now
+ * owned by the thread calling sleep(). In this speciifc case, there is not
+ * much we can do, however normally we could instead implement a way to
+ * signal the thread that the client disconnected, and sleep the specified
+ * amount of seconds with a while loop calling sleep(1), so that once we
+ * detect the client disconnection, we can terminate the thread ASAP. */
+void HelloBlock_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
+ RedisModule_Log(ctx,"warning","Blocked client %p disconnected!",
+ (void*)bc);
+
+ /* Here you should cleanup your state / threads, and if possible
+ * call RedisModule_UnblockClient(), or notify the thread that will
+ * call the function ASAP. */
+}
+
+/* HELLO.BLOCK <delay> <timeout> -- Block for <count> seconds, then reply with
+ * a random number. Timeout is the command timeout, so that you can test
+ * what happens when the delay is greater than the timeout. */
+int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ long long delay;
+ long long timeout;
+
+ if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ pthread_t tid;
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
+
+ /* Here we set a disconnection handler, however since this module will
+ * block in sleep() in a thread, there is not much we can do in the
+ * callback, so this is just to show you the API. */
+ RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the delay and a reference to the blocked client handle. */
+ void **targ = RedisModule_Alloc(sizeof(void*)*2);
+ targ[0] = bc;
+ targ[1] = (void*)(unsigned long) delay;
+
+ if (pthread_create(&tid,NULL,HelloBlock_ThreadMain,targ) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* The thread entry point that actually executes the blocking part
+ * of the command HELLO.KEYS.
+ *
+ * Note: this implementation is very simple on purpose, so no duplicated
+ * keys (returned by SCAN) are filtered. However adding such a functionality
+ * would be trivial just using any data structure implementing a dictionary
+ * in order to filter the duplicated items. */
+void *HelloKeys_ThreadMain(void *arg) {
+ RedisModuleBlockedClient *bc = arg;
+ RedisModuleCtx *ctx = RedisModule_GetThreadSafeContext(bc);
+ long long cursor = 0;
+ size_t replylen = 0;
+
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ do {
+ RedisModule_ThreadSafeContextLock(ctx);
+ RedisModuleCallReply *reply = RedisModule_Call(ctx,
+ "SCAN","l",(long long)cursor);
+ RedisModule_ThreadSafeContextUnlock(ctx);
+
+ RedisModuleCallReply *cr_cursor =
+ RedisModule_CallReplyArrayElement(reply,0);
+ RedisModuleCallReply *cr_keys =
+ RedisModule_CallReplyArrayElement(reply,1);
+
+ RedisModuleString *s = RedisModule_CreateStringFromCallReply(cr_cursor);
+ RedisModule_StringToLongLong(s,&cursor);
+ RedisModule_FreeString(ctx,s);
+
+ size_t items = RedisModule_CallReplyLength(cr_keys);
+ for (size_t j = 0; j < items; j++) {
+ RedisModuleCallReply *ele =
+ RedisModule_CallReplyArrayElement(cr_keys,j);
+ RedisModule_ReplyWithCallReply(ctx,ele);
+ replylen++;
+ }
+ RedisModule_FreeCallReply(reply);
+ } while (cursor != 0);
+ RedisModule_ReplySetArrayLength(ctx,replylen);
+
+ RedisModule_FreeThreadSafeContext(ctx);
+ RedisModule_UnblockClient(bc,NULL);
+ return NULL;
+}
+
+/* HELLO.KEYS -- Return all the keys in the current database without blocking
+ * the server. The keys do not represent a point-in-time state so only the keys
+ * that were in the database from the start to the end are guaranteed to be
+ * there. */
+int HelloKeys_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ if (argc != 1) return RedisModule_WrongArity(ctx);
+
+ pthread_t tid;
+
+ /* Note that when blocking the client we do not set any callback: no
+ * timeout is possible since we passed '0', nor we need a reply callback
+ * because we'll use the thread safe context to accumulate a reply. */
+ RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,NULL,NULL,NULL,0);
+
+ /* Now that we setup a blocking client, we need to pass the control
+ * to the thread. However we need to pass arguments to the thread:
+ * the reference to the blocked client handle. */
+ if (pthread_create(&tid,NULL,HelloKeys_ThreadMain,bc) != 0) {
+ RedisModule_AbortBlock(bc);
+ return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
+ }
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"helloblock",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.block",
+ HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+ if (RedisModule_CreateCommand(ctx,"hello.keys",
+ HelloKeys_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellocluster.c b/src/modules/hellocluster.c
new file mode 100644
index 000000000..75d18f3e2
--- /dev/null
+++ b/src/modules/hellocluster.c
@@ -0,0 +1,108 @@
+/* Helloworld cluster -- A ping/pong cluster API example.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_EXPERIMENTAL_API
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#define MSGTYPE_PING 1
+#define MSGTYPE_PONG 2
+
+/* HELLOCLUSTER.PINGALL */
+int PingallCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_SendClusterMessage(ctx,NULL,MSGTYPE_PING,(unsigned char*)"Hey",3);
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* HELLOCLUSTER.LIST */
+int ListCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ size_t numnodes;
+ char **ids = RedisModule_GetClusterNodesList(ctx,&numnodes);
+ if (ids == NULL) {
+ return RedisModule_ReplyWithError(ctx,"Cluster not enabled");
+ }
+
+ RedisModule_ReplyWithArray(ctx,numnodes);
+ for (size_t j = 0; j < numnodes; j++) {
+ int port;
+ RedisModule_GetClusterNodeInfo(ctx,ids[j],NULL,NULL,&port,NULL);
+ RedisModule_ReplyWithArray(ctx,2);
+ RedisModule_ReplyWithStringBuffer(ctx,ids[j],REDISMODULE_NODE_ID_LEN);
+ RedisModule_ReplyWithLongLong(ctx,port);
+ }
+ RedisModule_FreeClusterNodesList(ids);
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* Callback for message MSGTYPE_PING */
+void PingReceiver(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ RedisModule_Log(ctx,"notice","PING (type %d) RECEIVED from %.*s: '%.*s'",
+ type,REDISMODULE_NODE_ID_LEN,sender_id,(int)len, payload);
+ RedisModule_SendClusterMessage(ctx,NULL,MSGTYPE_PONG,(unsigned char*)"Ohi!",4);
+}
+
+/* Callback for message MSGTYPE_PONG. */
+void PongReceiver(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len) {
+ RedisModule_Log(ctx,"notice","PONG (type %d) RECEIVED from %.*s: '%.*s'",
+ type,REDISMODULE_NODE_ID_LEN,sender_id,(int)len, payload);
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellocluster",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellocluster.pingall",
+ PingallCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellocluster.list",
+ ListCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ RedisModule_RegisterClusterMessageReceiver(ctx,MSGTYPE_PING,PingReceiver);
+ RedisModule_RegisterClusterMessageReceiver(ctx,MSGTYPE_PONG,PongReceiver);
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellotimer.c b/src/modules/hellotimer.c
new file mode 100644
index 000000000..6c3e1d7f4
--- /dev/null
+++ b/src/modules/hellotimer.c
@@ -0,0 +1,79 @@
+/* Helloworld cluster -- A ping/pong cluster API example.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2018, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_EXPERIMENTAL_API
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+#define MSGTYPE_PING 1
+#define MSGTYPE_PONG 2
+
+/* Timer callback. */
+void timerHandler(RedisModuleCtx *ctx, void *data) {
+ REDISMODULE_NOT_USED(ctx);
+ printf("Fired %s!\n", data);
+ RedisModule_Free(data);
+}
+
+/* HELLOTIMER.TIMER*/
+int TimerCommand_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ for (int j = 0; j < 10; j++) {
+ int delay = rand() % 5000;
+ char *buf = RedisModule_Alloc(256);
+ snprintf(buf,256,"After %d", delay);
+ RedisModuleTimerID tid = RedisModule_CreateTimer(ctx,delay,timerHandler,buf);
+ REDISMODULE_NOT_USED(tid);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellotimer",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotimer.timer",
+ TimerCommand_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/hellotype.c b/src/modules/hellotype.c
new file mode 100644
index 000000000..ba634c4a1
--- /dev/null
+++ b/src/modules/hellotype.c
@@ -0,0 +1,286 @@
+/* This file implements a new module native data type called "HELLOTYPE".
+ * The data structure implemented is a very simple ordered linked list of
+ * 64 bit integers, in order to have something that is real world enough, but
+ * at the same time, extremely simple to understand, to show how the API
+ * works, how a new data type is created, and how to write basic methods
+ * for RDB loading, saving and AOF rewriting.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdint.h>
+
+static RedisModuleType *HelloType;
+
+/* ========================== Internal data structure =======================
+ * This is just a linked list of 64 bit integers where elements are inserted
+ * in-place, so it's ordered. There is no pop/push operation but just insert
+ * because it is enough to show the implementation of new data types without
+ * making things complex. */
+
+struct HelloTypeNode {
+ int64_t value;
+ struct HelloTypeNode *next;
+};
+
+struct HelloTypeObject {
+ struct HelloTypeNode *head;
+ size_t len; /* Number of elements added. */
+};
+
+struct HelloTypeObject *createHelloTypeObject(void) {
+ struct HelloTypeObject *o;
+ o = RedisModule_Alloc(sizeof(*o));
+ o->head = NULL;
+ o->len = 0;
+ return o;
+}
+
+void HelloTypeInsert(struct HelloTypeObject *o, int64_t ele) {
+ struct HelloTypeNode *next = o->head, *newnode, *prev = NULL;
+
+ while(next && next->value < ele) {
+ prev = next;
+ next = next->next;
+ }
+ newnode = RedisModule_Alloc(sizeof(*newnode));
+ newnode->value = ele;
+ newnode->next = next;
+ if (prev) {
+ prev->next = newnode;
+ } else {
+ o->head = newnode;
+ }
+ o->len++;
+}
+
+void HelloTypeReleaseObject(struct HelloTypeObject *o) {
+ struct HelloTypeNode *cur, *next;
+ cur = o->head;
+ while(cur) {
+ next = cur->next;
+ RedisModule_Free(cur);
+ cur = next;
+ }
+ RedisModule_Free(o);
+}
+
+/* ========================= "hellotype" type commands ======================= */
+
+/* HELLOTYPE.INSERT key value */
+int HelloTypeInsert_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long value;
+ if ((RedisModule_StringToLongLong(argv[2],&value) != REDISMODULE_OK)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid value: must be a signed 64 bit integer");
+ }
+
+ /* Create an empty value object if the key is currently empty. */
+ struct HelloTypeObject *hto;
+ if (type == REDISMODULE_KEYTYPE_EMPTY) {
+ hto = createHelloTypeObject();
+ RedisModule_ModuleTypeSetValue(key,HelloType,hto);
+ } else {
+ hto = RedisModule_ModuleTypeGetValue(key);
+ }
+
+ /* Insert the new element. */
+ HelloTypeInsert(hto,value);
+
+ RedisModule_ReplyWithLongLong(ctx,hto->len);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.RANGE key first count */
+int HelloTypeRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long first, count;
+ if (RedisModule_StringToLongLong(argv[2],&first) != REDISMODULE_OK ||
+ RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK ||
+ first < 0 || count < 0)
+ {
+ return RedisModule_ReplyWithError(ctx,
+ "ERR invalid first or count parameters");
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ struct HelloTypeNode *node = hto ? hto->head : NULL;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ long long arraylen = 0;
+ while(node && count--) {
+ RedisModule_ReplyWithLongLong(ctx,node->value);
+ arraylen++;
+ node = node->next;
+ }
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ return REDISMODULE_OK;
+}
+
+/* HELLOTYPE.LEN key */
+int HelloTypeLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_EMPTY &&
+ RedisModule_ModuleTypeGetType(key) != HelloType)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ struct HelloTypeObject *hto = RedisModule_ModuleTypeGetValue(key);
+ RedisModule_ReplyWithLongLong(ctx,hto ? hto->len : 0);
+ return REDISMODULE_OK;
+}
+
+
+/* ========================== "hellotype" type methods ======================= */
+
+void *HelloTypeRdbLoad(RedisModuleIO *rdb, int encver) {
+ if (encver != 0) {
+ /* RedisModule_Log("warning","Can't load data with version %d", encver);*/
+ return NULL;
+ }
+ uint64_t elements = RedisModule_LoadUnsigned(rdb);
+ struct HelloTypeObject *hto = createHelloTypeObject();
+ while(elements--) {
+ int64_t ele = RedisModule_LoadSigned(rdb);
+ HelloTypeInsert(hto,ele);
+ }
+ return hto;
+}
+
+void HelloTypeRdbSave(RedisModuleIO *rdb, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ RedisModule_SaveUnsigned(rdb,hto->len);
+ while(node) {
+ RedisModule_SaveSigned(rdb,node->value);
+ node = node->next;
+ }
+}
+
+void HelloTypeAofRewrite(RedisModuleIO *aof, RedisModuleString *key, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_EmitAOF(aof,"HELLOTYPE.INSERT","sl",key,node->value);
+ node = node->next;
+ }
+}
+
+/* The goal of this function is to return the amount of memory used by
+ * the HelloType value. */
+size_t HelloTypeMemUsage(const void *value) {
+ const struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ return sizeof(*hto) + sizeof(*node)*hto->len;
+}
+
+void HelloTypeFree(void *value) {
+ HelloTypeReleaseObject(value);
+}
+
+void HelloTypeDigest(RedisModuleDigest *md, void *value) {
+ struct HelloTypeObject *hto = value;
+ struct HelloTypeNode *node = hto->head;
+ while(node) {
+ RedisModule_DigestAddLongLong(md,node->value);
+ node = node->next;
+ }
+ RedisModule_DigestEndSequence(md);
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"hellotype",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ RedisModuleTypeMethods tm = {
+ .version = REDISMODULE_TYPE_METHOD_VERSION,
+ .rdb_load = HelloTypeRdbLoad,
+ .rdb_save = HelloTypeRdbSave,
+ .aof_rewrite = HelloTypeAofRewrite,
+ .mem_usage = HelloTypeMemUsage,
+ .free = HelloTypeFree,
+ .digest = HelloTypeDigest
+ };
+
+ HelloType = RedisModule_CreateDataType(ctx,"hellotype",0,&tm);
+ if (HelloType == NULL) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.insert",
+ HelloTypeInsert_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.range",
+ HelloTypeRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hellotype.len",
+ HelloTypeLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/helloworld.c b/src/modules/helloworld.c
new file mode 100644
index 000000000..3b00dea77
--- /dev/null
+++ b/src/modules/helloworld.c
@@ -0,0 +1,621 @@
+/* Helloworld module -- A few examples of the Redis Modules API in the form
+ * of commands showing how to accomplish common tasks.
+ *
+ * This module does not do anything useful, if not for a few commands. The
+ * examples are designed in order to show the API.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "../redismodule.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+
+/* HELLO.SIMPLE is among the simplest commands you can implement.
+ * It just returns the currently selected DB id, a functionality which is
+ * missing in Redis. The command uses two important API calls: one to
+ * fetch the currently selected DB, the other in order to send the client
+ * an integer reply as response. */
+int HelloSimple_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_ReplyWithLongLong(ctx,RedisModule_GetSelectedDb(ctx));
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.NATIVE re-implements RPUSH, and shows the low level modules API
+ * where you can "open" keys, make low level operations, create new keys by
+ * pushing elements into non-existing keys, and so forth.
+ *
+ * You'll find this command to be roughly as fast as the actual RPUSH
+ * command. */
+int HelloPushNative_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ RedisModule_ListPush(key,REDISMODULE_LIST_TAIL,argv[2]);
+ size_t newlen = RedisModule_ValueLength(key);
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithLongLong(ctx,newlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL implements RPUSH using an higher level approach, calling
+ * a Redis command instead of working with the key in a low level way. This
+ * approach is useful when you need to call Redis commands that are not
+ * available as low level APIs, or when you don't need the maximum speed
+ * possible but instead prefer implementation simplicity. */
+int HelloPushCall_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ long long len = RedisModule_CallReplyInteger(reply);
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.PUSH.CALL2
+ * This is exaxctly as HELLO.PUSH.CALL, but shows how we can reply to the
+ * client using directly a reply object that Call() returned. */
+int HelloPushCall2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"RPUSH","ss",argv[1],argv[2]);
+ RedisModule_ReplyWithCallReply(ctx,reply);
+ RedisModule_FreeCallReply(reply);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SUM.LEN returns the total length of all the items inside
+ * a Redis list, by using the high level Call() API.
+ * This command is an example of the array reply access. */
+int HelloListSumLen_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleCallReply *reply;
+
+ reply = RedisModule_Call(ctx,"LRANGE","sll",argv[1],(long long)0,(long long)-1);
+ size_t strlen = 0;
+ size_t items = RedisModule_CallReplyLength(reply);
+ size_t j;
+ for (j = 0; j < items; j++) {
+ RedisModuleCallReply *ele = RedisModule_CallReplyArrayElement(reply,j);
+ strlen += RedisModule_CallReplyLength(ele);
+ }
+ RedisModule_FreeCallReply(reply);
+ RedisModule_ReplyWithLongLong(ctx,strlen);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LIST.SPLICE srclist dstlist count
+ * Moves 'count' elements from the tail of 'srclist' to the head of
+ * 'dstlist'. If less than count elements are available, it moves as much
+ * elements as possible. */
+int HelloListSplice_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0)) {
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ RedisModule_FreeString(ctx,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_CloseKey(srckey);
+ RedisModule_CloseKey(dstkey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* Like the HELLO.LIST.SPLICE above, but uses automatic memory management
+ * in order to avoid freeing stuff. */
+int HelloListSpliceAuto_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx);
+
+ RedisModuleKey *srckey = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ RedisModuleKey *dstkey = RedisModule_OpenKey(ctx,argv[2],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ /* Src and dst key must be empty or lists. */
+ if ((RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(srckey) != REDISMODULE_KEYTYPE_EMPTY) ||
+ (RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_LIST &&
+ RedisModule_KeyType(dstkey) != REDISMODULE_KEYTYPE_EMPTY))
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ long long count;
+ if ((RedisModule_StringToLongLong(argv[3],&count) != REDISMODULE_OK) ||
+ (count < 0))
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+ }
+
+ while(count-- > 0) {
+ RedisModuleString *ele;
+
+ ele = RedisModule_ListPop(srckey,REDISMODULE_LIST_TAIL);
+ if (ele == NULL) break;
+ RedisModule_ListPush(dstkey,REDISMODULE_LIST_HEAD,ele);
+ }
+
+ size_t len = RedisModule_ValueLength(srckey);
+ RedisModule_ReplyWithLongLong(ctx,len);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.RAND.ARRAY <count>
+ * Shows how to generate arrays as commands replies.
+ * It just outputs <count> random numbers. */
+int HelloRandArray_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+ long long count;
+ if (RedisModule_StringToLongLong(argv[1],&count) != REDISMODULE_OK ||
+ count < 0)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid count");
+
+ /* To reply with an array, we call RedisModule_ReplyWithArray() followed
+ * by other "count" calls to other reply functions in order to generate
+ * the elements of the array. */
+ RedisModule_ReplyWithArray(ctx,count);
+ while(count--) RedisModule_ReplyWithLongLong(ctx,rand());
+ return REDISMODULE_OK;
+}
+
+/* This is a simple command to test replication. Because of the "!" modified
+ * in the RedisModule_Call() call, the two INCRs get replicated.
+ * Also note how the ECHO is replicated in an unexpected position (check
+ * comments the function implementation). */
+int HelloRepl1_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
+{
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+ RedisModule_AutoMemory(ctx);
+
+ /* This will be replicated *after* the two INCR statements, since
+ * the Call() replication has precedence, so the actual replication
+ * stream will be:
+ *
+ * MULTI
+ * INCR foo
+ * INCR bar
+ * ECHO c foo
+ * EXEC
+ */
+ RedisModule_Replicate(ctx,"ECHO","c","foo");
+
+ /* Using the "!" modifier we replicate the command if it
+ * modified the dataset in some way. */
+ RedisModule_Call(ctx,"INCR","c!","foo");
+ RedisModule_Call(ctx,"INCR","c!","bar");
+
+ RedisModule_ReplyWithLongLong(ctx,0);
+
+ return REDISMODULE_OK;
+}
+
+/* Another command to show replication. In this case, we call
+ * RedisModule_ReplicateVerbatim() to mean we want just the command to be
+ * propagated to slaves / AOF exactly as it was called by the user.
+ *
+ * This command also shows how to work with string objects.
+ * It takes a list, and increments all the elements (that must have
+ * a numerical value) by 1, returning the sum of all the elements
+ * as reply.
+ *
+ * Usage: HELLO.REPL2 <list-key> */
+int HelloRepl2_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST)
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+
+ size_t listlen = RedisModule_ValueLength(key);
+ long long sum = 0;
+
+ /* Rotate and increment. */
+ while(listlen--) {
+ RedisModuleString *ele = RedisModule_ListPop(key,REDISMODULE_LIST_TAIL);
+ long long val;
+ if (RedisModule_StringToLongLong(ele,&val) != REDISMODULE_OK) val = 0;
+ val++;
+ sum += val;
+ RedisModuleString *newele = RedisModule_CreateStringFromLongLong(ctx,val);
+ RedisModule_ListPush(key,REDISMODULE_LIST_HEAD,newele);
+ }
+ RedisModule_ReplyWithLongLong(ctx,sum);
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* This is an example of strings DMA access. Given a key containing a string
+ * it toggles the case of each character from lower to upper case or the
+ * other way around.
+ *
+ * No automatic memory management is used in this example (for the sake
+ * of variety).
+ *
+ * HELLO.TOGGLE.CASE key */
+int HelloToggleCase_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (argc != 2) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+
+ int keytype = RedisModule_KeyType(key);
+ if (keytype != REDISMODULE_KEYTYPE_STRING &&
+ keytype != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ RedisModule_CloseKey(key);
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (keytype == REDISMODULE_KEYTYPE_STRING) {
+ size_t len, j;
+ char *s = RedisModule_StringDMA(key,&len,REDISMODULE_WRITE);
+ for (j = 0; j < len; j++) {
+ if (isupper(s[j])) {
+ s[j] = tolower(s[j]);
+ } else {
+ s[j] = toupper(s[j]);
+ }
+ }
+ }
+
+ RedisModule_CloseKey(key);
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ RedisModule_ReplicateVerbatim(ctx);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.MORE.EXPIRE key milliseconds.
+ *
+ * If they key has already an associated TTL, extends it by "milliseconds"
+ * milliseconds. Otherwise no operation is performed. */
+int HelloMoreExpire_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ if (argc != 3) return RedisModule_WrongArity(ctx);
+
+ mstime_t addms, expire;
+
+ if (RedisModule_StringToLongLong(argv[2],&addms) != REDISMODULE_OK)
+ return RedisModule_ReplyWithError(ctx,"ERR invalid expire time");
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ expire = RedisModule_GetExpire(key);
+ if (expire != REDISMODULE_NO_EXPIRE) {
+ expire += addms;
+ RedisModule_SetExpire(key,expire);
+ }
+ return RedisModule_ReplyWithSimpleString(ctx,"OK");
+}
+
+/* HELLO.ZSUMRANGE key startscore endscore
+ * Return the sum of all the scores elements between startscore and endscore.
+ *
+ * The computation is performed two times, one time from start to end and
+ * another time backward. The two scores, returned as a two element array,
+ * should match.*/
+int HelloZsumRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ double score_start, score_end;
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if (RedisModule_StringToDouble(argv[2],&score_start) != REDISMODULE_OK ||
+ RedisModule_StringToDouble(argv[3],&score_end) != REDISMODULE_OK)
+ {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid range");
+ }
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ double scoresum_a = 0;
+ double scoresum_b = 0;
+
+ RedisModule_ZsetFirstInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_a += score;
+ RedisModule_ZsetRangeNext(key);
+ }
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_ZsetLastInScoreRange(key,score_start,score_end,0,0);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_FreeString(ctx,ele);
+ scoresum_b += score;
+ RedisModule_ZsetRangePrev(key);
+ }
+
+ RedisModule_ZsetRangeStop(key);
+
+ RedisModule_CloseKey(key);
+
+ RedisModule_ReplyWithArray(ctx,2);
+ RedisModule_ReplyWithDouble(ctx,scoresum_a);
+ RedisModule_ReplyWithDouble(ctx,scoresum_b);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEXRANGE key min_lex max_lex min_age max_age
+ * This command expects a sorted set stored at key in the following form:
+ * - All the elements have score 0.
+ * - Elements are pairs of "<name>:<age>", for example "Anna:52".
+ * The command will return all the sorted set items that are lexicographically
+ * between the specified range (using the same format as ZRANGEBYLEX)
+ * and having an age between min_age and max_age. */
+int HelloLexRange_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 6) return RedisModule_WrongArity(ctx);
+
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_ZSET) {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ if (RedisModule_ZsetFirstInLexRange(key,argv[2],argv[3]) != REDISMODULE_OK) {
+ return RedisModule_ReplyWithError(ctx,"invalid range");
+ }
+
+ int arraylen = 0;
+ RedisModule_ReplyWithArray(ctx,REDISMODULE_POSTPONED_ARRAY_LEN);
+ while(!RedisModule_ZsetRangeEndReached(key)) {
+ double score;
+ RedisModuleString *ele = RedisModule_ZsetRangeCurrentElement(key,&score);
+ RedisModule_ReplyWithString(ctx,ele);
+ RedisModule_FreeString(ctx,ele);
+ RedisModule_ZsetRangeNext(key);
+ arraylen++;
+ }
+ RedisModule_ZsetRangeStop(key);
+ RedisModule_ReplySetArrayLength(ctx,arraylen);
+ RedisModule_CloseKey(key);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.HCOPY key srcfield dstfield
+ * This is just an example command that sets the hash field dstfield to the
+ * same value of srcfield. If srcfield does not exist no operation is
+ * performed.
+ *
+ * The command returns 1 if the copy is performed (srcfield exists) otherwise
+ * 0 is returned. */
+int HelloHCopy_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+ RedisModuleKey *key = RedisModule_OpenKey(ctx,argv[1],
+ REDISMODULE_READ|REDISMODULE_WRITE);
+ int type = RedisModule_KeyType(key);
+ if (type != REDISMODULE_KEYTYPE_HASH &&
+ type != REDISMODULE_KEYTYPE_EMPTY)
+ {
+ return RedisModule_ReplyWithError(ctx,REDISMODULE_ERRORMSG_WRONGTYPE);
+ }
+
+ /* Get the old field value. */
+ RedisModuleString *oldval;
+ RedisModule_HashGet(key,REDISMODULE_HASH_NONE,argv[2],&oldval,NULL);
+ if (oldval) {
+ RedisModule_HashSet(key,REDISMODULE_HASH_NONE,argv[3],oldval,NULL);
+ }
+ RedisModule_ReplyWithLongLong(ctx,oldval != NULL);
+ return REDISMODULE_OK;
+}
+
+/* HELLO.LEFTPAD str len ch
+ * This is an implementation of the infamous LEFTPAD function, that
+ * was at the center of an issue with the npm modules system in March 2016.
+ *
+ * LEFTPAD is a good example of using a Redis Modules API called
+ * "pool allocator", that was a famous way to allocate memory in yet another
+ * open source project, the Apache web server.
+ *
+ * The concept is very simple: there is memory that is useful to allocate
+ * only in the context of serving a request, and must be freed anyway when
+ * the callback implementing the command returns. So in that case the module
+ * does not need to retain a reference to these allocations, it is just
+ * required to free the memory before returning. When this is the case the
+ * module can call RedisModule_PoolAlloc() instead, that works like malloc()
+ * but will automatically free the memory when the module callback returns.
+ *
+ * Note that PoolAlloc() does not necessarily require AutoMemory to be
+ * active. */
+int HelloLeftPad_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx); /* Use automatic memory management. */
+ long long padlen;
+
+ if (argc != 4) return RedisModule_WrongArity(ctx);
+
+ if ((RedisModule_StringToLongLong(argv[2],&padlen) != REDISMODULE_OK) ||
+ (padlen< 0)) {
+ return RedisModule_ReplyWithError(ctx,"ERR invalid padding length");
+ }
+ size_t strlen, chlen;
+ const char *str = RedisModule_StringPtrLen(argv[1], &strlen);
+ const char *ch = RedisModule_StringPtrLen(argv[3], &chlen);
+
+ /* If the string is already larger than the target len, just return
+ * the string itself. */
+ if (strlen >= (size_t)padlen)
+ return RedisModule_ReplyWithString(ctx,argv[1]);
+
+ /* Padding must be a single character in this simple implementation. */
+ if (chlen != 1)
+ return RedisModule_ReplyWithError(ctx,
+ "ERR padding must be a single char");
+
+ /* Here we use our pool allocator, for our throw-away allocation. */
+ padlen -= strlen;
+ char *buf = RedisModule_PoolAlloc(ctx,padlen+strlen);
+ for (long long j = 0; j < padlen; j++) buf[j] = *ch;
+ memcpy(buf+padlen,str,strlen);
+
+ RedisModule_ReplyWithStringBuffer(ctx,buf,padlen+strlen);
+ return REDISMODULE_OK;
+}
+
+/* This function must be present on each Redis module. It is used in order to
+ * register the commands into the Redis server. */
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ if (RedisModule_Init(ctx,"helloworld",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ /* Log the list of parameters passing loading the module. */
+ for (int j = 0; j < argc; j++) {
+ const char *s = RedisModule_StringPtrLen(argv[j],NULL);
+ printf("Module loaded with ARGV[%d] = %s\n", j, s);
+ }
+
+ if (RedisModule_CreateCommand(ctx,"hello.simple",
+ HelloSimple_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.native",
+ HelloPushNative_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call",
+ HelloPushCall_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.push.call2",
+ HelloPushCall2_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.sum.len",
+ HelloListSumLen_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice",
+ HelloListSplice_RedisCommand,"write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.list.splice.auto",
+ HelloListSpliceAuto_RedisCommand,
+ "write deny-oom",1,2,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.rand.array",
+ HelloRandArray_RedisCommand,"readonly",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl1",
+ HelloRepl1_RedisCommand,"write",0,0,0) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.repl2",
+ HelloRepl2_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.toggle.case",
+ HelloToggleCase_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.more.expire",
+ HelloMoreExpire_RedisCommand,"write",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.zsumrange",
+ HelloZsumRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.lexrange",
+ HelloLexRange_RedisCommand,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.hcopy",
+ HelloHCopy_RedisCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"hello.leftpad",
+ HelloLeftPad_RedisCommand,"",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c
new file mode 100644
index 000000000..67a861704
--- /dev/null
+++ b/src/modules/testmodule.c
@@ -0,0 +1,445 @@
+/* Module designed to test the Redis modules subsystem.
+ *
+ * -----------------------------------------------------------------------------
+ *
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define REDISMODULE_EXPERIMENTAL_API
+#include "../redismodule.h"
+#include <string.h>
+
+/* --------------------------------- Helpers -------------------------------- */
+
+/* Return true if the reply and the C null term string matches. */
+int TestMatchReply(RedisModuleCallReply *reply, char *str) {
+ RedisModuleString *mystr;
+ mystr = RedisModule_CreateStringFromCallReply(reply);
+ if (!mystr) return 0;
+ const char *ptr = RedisModule_StringPtrLen(mystr,NULL);
+ return strcmp(ptr,str) == 0;
+}
+
+/* ------------------------------- Test units ------------------------------- */
+
+/* TEST.CALL -- Test Call() API. */
+int TestCall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleCallReply *reply;
+
+ RedisModule_Call(ctx,"DEL","c","mylist");
+ RedisModuleString *mystr = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_Call(ctx,"RPUSH","csl","mylist",mystr,(long long)1234);
+ reply = RedisModule_Call(ctx,"LRANGE","ccc","mylist","0","-1");
+ long long items = RedisModule_CallReplyLength(reply);
+ if (items != 2) goto fail;
+
+ RedisModuleCallReply *item0, *item1;
+
+ item0 = RedisModule_CallReplyArrayElement(reply,0);
+ item1 = RedisModule_CallReplyArrayElement(reply,1);
+ if (!TestMatchReply(item0,"foo")) goto fail;
+ if (!TestMatchReply(item1,"1234")) goto fail;
+
+ RedisModule_ReplyWithSimpleString(ctx,"OK");
+ return REDISMODULE_OK;
+
+fail:
+ RedisModule_ReplyWithSimpleString(ctx,"ERR");
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.APPEND -- Test appending to an existing string object. */
+int TestStringAppend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_StringAppendBuffer(ctx,s,"bar",3);
+ RedisModule_ReplyWithString(ctx,s);
+ RedisModule_FreeString(ctx,s);
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.APPEND.AM -- Test append with retain when auto memory is on. */
+int TestStringAppendAM(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleString *s = RedisModule_CreateString(ctx,"foo",3);
+ RedisModule_RetainString(ctx,s);
+ RedisModule_StringAppendBuffer(ctx,s,"bar",3);
+ RedisModule_ReplyWithString(ctx,s);
+ RedisModule_FreeString(ctx,s);
+ return REDISMODULE_OK;
+}
+
+/* TEST.STRING.PRINTF -- Test string formatting. */
+int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx);
+ if (argc < 3) {
+ return RedisModule_WrongArity(ctx);
+ }
+ RedisModuleString *s = RedisModule_CreateStringPrintf(ctx,
+ "Got %d args. argv[1]: %s, argv[2]: %s",
+ argc,
+ RedisModule_StringPtrLen(argv[1], NULL),
+ RedisModule_StringPtrLen(argv[2], NULL)
+ );
+
+ RedisModule_ReplyWithString(ctx,s);
+
+ return REDISMODULE_OK;
+}
+
+int failTest(RedisModuleCtx *ctx, const char *msg) {
+ RedisModule_ReplyWithError(ctx, msg);
+ return REDISMODULE_ERR;
+}
+
+int TestUnlink(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ RedisModule_AutoMemory(ctx);
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModuleKey *k = RedisModule_OpenKey(ctx, RedisModule_CreateStringPrintf(ctx, "unlinked"), REDISMODULE_WRITE | REDISMODULE_READ);
+ if (!k) return failTest(ctx, "Could not create key");
+
+ if (REDISMODULE_ERR == RedisModule_StringSet(k, RedisModule_CreateStringPrintf(ctx, "Foobar"))) {
+ return failTest(ctx, "Could not set string value");
+ }
+
+ RedisModuleCallReply *rep = RedisModule_Call(ctx, "EXISTS", "c", "unlinked");
+ if (!rep || RedisModule_CallReplyInteger(rep) != 1) {
+ return failTest(ctx, "Key does not exist before unlink");
+ }
+
+ if (REDISMODULE_ERR == RedisModule_UnlinkKey(k)) {
+ return failTest(ctx, "Could not unlink key");
+ }
+
+ rep = RedisModule_Call(ctx, "EXISTS", "c", "unlinked");
+ if (!rep || RedisModule_CallReplyInteger(rep) != 0) {
+ return failTest(ctx, "Could not verify key to be unlinked");
+ }
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+
+}
+
+int NotifyCallback(RedisModuleCtx *ctx, int type, const char *event,
+ RedisModuleString *key) {
+ /* Increment a counter on the notifications: for each key notified we
+ * increment a counter */
+ RedisModule_Log(ctx, "notice", "Got event type %d, event %s, key %s", type,
+ event, RedisModule_StringPtrLen(key, NULL));
+
+ RedisModule_Call(ctx, "HINCRBY", "csc", "notifications", key, "1");
+ return REDISMODULE_OK;
+}
+
+/* TEST.NOTIFICATIONS -- Test Keyspace Notifications. */
+int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+#define FAIL(msg, ...) \
+ { \
+ RedisModule_Log(ctx, "warning", "Failed NOTIFY Test. Reason: " #msg, ##__VA_ARGS__); \
+ goto err; \
+ }
+ RedisModule_Call(ctx, "FLUSHDB", "");
+
+ RedisModule_Call(ctx, "SET", "cc", "foo", "bar");
+ RedisModule_Call(ctx, "SET", "cc", "foo", "baz");
+ RedisModule_Call(ctx, "SADD", "cc", "bar", "x");
+ RedisModule_Call(ctx, "SADD", "cc", "bar", "y");
+
+ RedisModule_Call(ctx, "HSET", "ccc", "baz", "x", "y");
+ /* LPUSH should be ignored and not increment any counters */
+ RedisModule_Call(ctx, "LPUSH", "cc", "l", "y");
+ RedisModule_Call(ctx, "LPUSH", "cc", "l", "y");
+
+ size_t sz;
+ const char *rep;
+ RedisModuleCallReply *r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "foo");
+ if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_STRING) {
+ FAIL("Wrong or no reply for foo");
+ } else {
+ rep = RedisModule_CallReplyStringPtr(r, &sz);
+ if (sz != 1 || *rep != '2') {
+ FAIL("Got reply '%s'. expected '2'", RedisModule_CallReplyStringPtr(r, NULL));
+ }
+ }
+
+ r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "bar");
+ if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_STRING) {
+ FAIL("Wrong or no reply for bar");
+ } else {
+ rep = RedisModule_CallReplyStringPtr(r, &sz);
+ if (sz != 1 || *rep != '2') {
+ FAIL("Got reply '%s'. expected '2'", rep);
+ }
+ }
+
+ r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "baz");
+ if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_STRING) {
+ FAIL("Wrong or no reply for baz");
+ } else {
+ rep = RedisModule_CallReplyStringPtr(r, &sz);
+ if (sz != 1 || *rep != '1') {
+ FAIL("Got reply '%.*s'. expected '1'", sz, rep);
+ }
+ }
+ /* For l we expect nothing since we didn't subscribe to list events */
+ r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "l");
+ if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_NULL) {
+ FAIL("Wrong reply for l");
+ }
+
+ RedisModule_Call(ctx, "FLUSHDB", "");
+
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+err:
+ RedisModule_Call(ctx, "FLUSHDB", "");
+
+ return RedisModule_ReplyWithSimpleString(ctx, "ERR");
+}
+
+/* TEST.CTXFLAGS -- Test GetContextFlags. */
+int TestCtxFlags(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argc);
+ REDISMODULE_NOT_USED(argv);
+
+ RedisModule_AutoMemory(ctx);
+
+ int ok = 1;
+ const char *errString = NULL;
+#undef FAIL
+#define FAIL(msg) \
+ { \
+ ok = 0; \
+ errString = msg; \
+ goto end; \
+ }
+
+ int flags = RedisModule_GetContextFlags(ctx);
+ if (flags == 0) {
+ FAIL("Got no flags");
+ }
+
+ if (flags & REDISMODULE_CTX_FLAGS_LUA) FAIL("Lua flag was set");
+ if (flags & REDISMODULE_CTX_FLAGS_MULTI) FAIL("Multi flag was set");
+
+ if (flags & REDISMODULE_CTX_FLAGS_AOF) FAIL("AOF Flag was set")
+ /* Enable AOF to test AOF flags */
+ RedisModule_Call(ctx, "config", "ccc", "set", "appendonly", "yes");
+ flags = RedisModule_GetContextFlags(ctx);
+ if (!(flags & REDISMODULE_CTX_FLAGS_AOF)) FAIL("AOF Flag not set after config set");
+
+ if (flags & REDISMODULE_CTX_FLAGS_RDB) FAIL("RDB Flag was set");
+ /* Enable RDB to test RDB flags */
+ RedisModule_Call(ctx, "config", "ccc", "set", "save", "900 1");
+ flags = RedisModule_GetContextFlags(ctx);
+ if (!(flags & REDISMODULE_CTX_FLAGS_RDB)) FAIL("RDB Flag was not set after config set");
+
+ if (!(flags & REDISMODULE_CTX_FLAGS_MASTER)) FAIL("Master flag was not set");
+ if (flags & REDISMODULE_CTX_FLAGS_SLAVE) FAIL("Slave flag was set");
+ if (flags & REDISMODULE_CTX_FLAGS_READONLY) FAIL("Read-only flag was set");
+ if (flags & REDISMODULE_CTX_FLAGS_CLUSTER) FAIL("Cluster flag was set");
+
+ if (flags & REDISMODULE_CTX_FLAGS_MAXMEMORY) FAIL("Maxmemory flag was set");
+
+ RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory", "100000000");
+ flags = RedisModule_GetContextFlags(ctx);
+ if (!(flags & REDISMODULE_CTX_FLAGS_MAXMEMORY))
+ FAIL("Maxmemory flag was not set after config set");
+
+ if (flags & REDISMODULE_CTX_FLAGS_EVICT) FAIL("Eviction flag was set");
+ RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory-policy", "allkeys-lru");
+ flags = RedisModule_GetContextFlags(ctx);
+ if (!(flags & REDISMODULE_CTX_FLAGS_EVICT)) FAIL("Eviction flag was not set after config set");
+
+end:
+ /* Revert config changes */
+ RedisModule_Call(ctx, "config", "ccc", "set", "appendonly", "no");
+ RedisModule_Call(ctx, "config", "ccc", "set", "save", "");
+ RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory", "0");
+ RedisModule_Call(ctx, "config", "ccc", "set", "maxmemory-policy", "noeviction");
+
+ if (!ok) {
+ RedisModule_Log(ctx, "warning", "Failed CTXFLAGS Test. Reason: %s", errString);
+ return RedisModule_ReplyWithSimpleString(ctx, "ERR");
+ }
+
+ return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/* ----------------------------- Test framework ----------------------------- */
+
+/* Return 1 if the reply matches the specified string, otherwise log errors
+ * in the server log and return 0. */
+int TestAssertStringReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, char *str, size_t len) {
+ RedisModuleString *mystr, *expected;
+
+ if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_STRING) {
+ RedisModule_Log(ctx,"warning","Unexpected reply type %d",
+ RedisModule_CallReplyType(reply));
+ return 0;
+ }
+ mystr = RedisModule_CreateStringFromCallReply(reply);
+ expected = RedisModule_CreateString(ctx,str,len);
+ if (RedisModule_StringCompare(mystr,expected) != 0) {
+ const char *mystr_ptr = RedisModule_StringPtrLen(mystr,NULL);
+ const char *expected_ptr = RedisModule_StringPtrLen(expected,NULL);
+ RedisModule_Log(ctx,"warning",
+ "Unexpected string reply '%s' (instead of '%s')",
+ mystr_ptr, expected_ptr);
+ return 0;
+ }
+ return 1;
+}
+
+/* Return 1 if the reply matches the specified integer, otherwise log errors
+ * in the server log and return 0. */
+int TestAssertIntegerReply(RedisModuleCtx *ctx, RedisModuleCallReply *reply, long long expected) {
+ if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_INTEGER) {
+ RedisModule_Log(ctx,"warning","Unexpected reply type %d",
+ RedisModule_CallReplyType(reply));
+ return 0;
+ }
+ long long val = RedisModule_CallReplyInteger(reply);
+ if (val != expected) {
+ RedisModule_Log(ctx,"warning",
+ "Unexpected integer reply '%lld' (instead of '%lld')",
+ val, expected);
+ return 0;
+ }
+ return 1;
+}
+
+#define T(name,...) \
+ do { \
+ RedisModule_Log(ctx,"warning","Testing %s", name); \
+ reply = RedisModule_Call(ctx,name,__VA_ARGS__); \
+ } while (0);
+
+/* TEST.IT -- Run all the tests. */
+int TestIt(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ RedisModule_AutoMemory(ctx);
+ RedisModuleCallReply *reply;
+
+ /* Make sure the DB is empty before to proceed. */
+ T("dbsize","");
+ if (!TestAssertIntegerReply(ctx,reply,0)) goto fail;
+
+ T("ping","");
+ if (!TestAssertStringReply(ctx,reply,"PONG",4)) goto fail;
+
+ T("test.call","");
+ if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail;
+
+ T("test.ctxflags","");
+ if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail;
+
+ T("test.string.append","");
+ if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail;
+
+ T("test.unlink","");
+ if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail;
+
+ T("test.string.append.am","");
+ if (!TestAssertStringReply(ctx,reply,"foobar",6)) goto fail;
+
+ T("test.string.printf", "cc", "foo", "bar");
+ if (!TestAssertStringReply(ctx,reply,"Got 3 args. argv[1]: foo, argv[2]: bar",38)) goto fail;
+
+ T("test.notify", "");
+ if (!TestAssertStringReply(ctx,reply,"OK",2)) goto fail;
+
+ RedisModule_ReplyWithSimpleString(ctx,"ALL TESTS PASSED");
+ return REDISMODULE_OK;
+
+fail:
+ RedisModule_ReplyWithSimpleString(ctx,
+ "SOME TEST NOT PASSED! Check server logs");
+ return REDISMODULE_OK;
+}
+
+int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+ REDISMODULE_NOT_USED(argv);
+ REDISMODULE_NOT_USED(argc);
+
+ if (RedisModule_Init(ctx,"test",1,REDISMODULE_APIVER_1)
+ == REDISMODULE_ERR) return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.call",
+ TestCall,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.append",
+ TestStringAppend,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.append.am",
+ TestStringAppendAM,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.string.printf",
+ TestStringPrintf,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.ctxflags",
+ TestCtxFlags,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.unlink",
+ TestUnlink,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ if (RedisModule_CreateCommand(ctx,"test.it",
+ TestIt,"readonly",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ RedisModule_SubscribeToKeyspaceEvents(ctx,
+ REDISMODULE_NOTIFY_HASH |
+ REDISMODULE_NOTIFY_SET |
+ REDISMODULE_NOTIFY_STRING,
+ NotifyCallback);
+ if (RedisModule_CreateCommand(ctx,"test.notify",
+ TestNotifications,"write deny-oom",1,1,1) == REDISMODULE_ERR)
+ return REDISMODULE_ERR;
+
+ return REDISMODULE_OK;
+}
diff --git a/src/multi.c b/src/multi.c
index c82876456..112ce0605 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -27,18 +27,18 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* ================================ MULTI/EXEC ============================== */
/* Client state initialization for MULTI/EXEC */
-void initClientMultiState(redisClient *c) {
+void initClientMultiState(client *c) {
c->mstate.commands = NULL;
c->mstate.count = 0;
}
/* Release all the resources associated with MULTI/EXEC state */
-void freeClientMultiState(redisClient *c) {
+void freeClientMultiState(client *c) {
int j;
for (j = 0; j < c->mstate.count; j++) {
@@ -53,7 +53,7 @@ void freeClientMultiState(redisClient *c) {
}
/* Add a new command into the MULTI commands queue */
-void queueMultiCommand(redisClient *c) {
+void queueMultiCommand(client *c) {
multiCmd *mc;
int j;
@@ -69,31 +69,31 @@ void queueMultiCommand(redisClient *c) {
c->mstate.count++;
}
-void discardTransaction(redisClient *c) {
+void discardTransaction(client *c) {
freeClientMultiState(c);
initClientMultiState(c);
- c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS|REDIS_DIRTY_EXEC);
+ c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
unwatchAllKeys(c);
}
/* Flag the transacation as DIRTY_EXEC so that EXEC will fail.
* Should be called every time there is an error while queueing a command. */
-void flagTransaction(redisClient *c) {
- if (c->flags & REDIS_MULTI)
- c->flags |= REDIS_DIRTY_EXEC;
+void flagTransaction(client *c) {
+ if (c->flags & CLIENT_MULTI)
+ c->flags |= CLIENT_DIRTY_EXEC;
}
-void multiCommand(redisClient *c) {
- if (c->flags & REDIS_MULTI) {
+void multiCommand(client *c) {
+ if (c->flags & CLIENT_MULTI) {
addReplyError(c,"MULTI calls can not be nested");
return;
}
- c->flags |= REDIS_MULTI;
+ c->flags |= CLIENT_MULTI;
addReply(c,shared.ok);
}
-void discardCommand(redisClient *c) {
- if (!(c->flags & REDIS_MULTI)) {
+void discardCommand(client *c) {
+ if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"DISCARD without MULTI");
return;
}
@@ -103,22 +103,23 @@ void discardCommand(redisClient *c) {
/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
* implementation for more information. */
-void execCommandPropagateMulti(redisClient *c) {
+void execCommandPropagateMulti(client *c) {
robj *multistring = createStringObject("MULTI",5);
propagate(server.multiCommand,c->db->id,&multistring,1,
- REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|PROPAGATE_REPL);
decrRefCount(multistring);
}
-void execCommand(redisClient *c) {
+void execCommand(client *c) {
int j;
robj **orig_argv;
int orig_argc;
struct redisCommand *orig_cmd;
int must_propagate = 0; /* Need to propagate MULTI/EXEC to AOF / slaves? */
+ int was_master = server.masterhost == NULL;
- if (!(c->flags & REDIS_MULTI)) {
+ if (!(c->flags & CLIENT_MULTI)) {
addReplyError(c,"EXEC without MULTI");
return;
}
@@ -129,8 +130,8 @@ void execCommand(redisClient *c) {
* A failed EXEC in the first case returns a multi bulk nil object
* (technically it is not an error but a special behavior), while
* in the second an EXECABORT error is returned. */
- if (c->flags & (REDIS_DIRTY_CAS|REDIS_DIRTY_EXEC)) {
- addReply(c, c->flags & REDIS_DIRTY_EXEC ? shared.execaborterr :
+ if (c->flags & (CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC)) {
+ addReply(c, c->flags & CLIENT_DIRTY_EXEC ? shared.execaborterr :
shared.nullmultibulk);
discardTransaction(c);
goto handle_monitor;
@@ -147,16 +148,17 @@ void execCommand(redisClient *c) {
c->argv = c->mstate.commands[j].argv;
c->cmd = c->mstate.commands[j].cmd;
- /* Propagate a MULTI request once we encounter the first write op.
+ /* Propagate a MULTI request once we encounter the first command which
+ * is not readonly nor an administrative one.
* This way we'll deliver the MULTI/..../EXEC block as a whole and
* both the AOF and the replication link will have the same consistency
* and atomicity guarantees. */
- if (!must_propagate && !(c->cmd->flags & REDIS_CMD_READONLY)) {
+ if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
- call(c,REDIS_CALL_FULL);
+ call(c,CMD_CALL_FULL);
/* Commands may alter argc/argv, restore mstate. */
c->mstate.commands[j].argc = c->argc;
@@ -167,15 +169,28 @@ void execCommand(redisClient *c) {
c->argc = orig_argc;
c->cmd = orig_cmd;
discardTransaction(c);
+
/* Make sure the EXEC command will be propagated as well if MULTI
* was already propagated. */
- if (must_propagate) server.dirty++;
+ if (must_propagate) {
+ int is_master = server.masterhost == NULL;
+ server.dirty++;
+ /* If inside the MULTI/EXEC block this instance was suddenly
+ * switched from master to slave (using the SLAVEOF command), the
+ * initial MULTI was propagated into the replication backlog, but the
+ * rest was not. We need to make sure to at least terminate the
+ * backlog with the final EXEC. */
+ if (server.repl_backlog && was_master && !is_master) {
+ char *execcmd = "*1\r\n$4\r\nEXEC\r\n";
+ feedReplicationBacklog(execcmd,strlen(execcmd));
+ }
+ }
handle_monitor:
/* Send EXEC to clients waiting data from MONITOR. We do it here
* since the natural order of commands execution is actually:
* MUTLI, EXEC, ... commands inside transaction ...
- * Instead EXEC is flagged as REDIS_CMD_SKIP_MONITOR in the command
+ * Instead EXEC is flagged as CMD_SKIP_MONITOR in the command
* table, and we do it here with correct ordering. */
if (listLength(server.monitors) && !server.loading)
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
@@ -199,7 +214,7 @@ typedef struct watchedKey {
} watchedKey;
/* Watch for the specified key */
-void watchForKey(redisClient *c, robj *key) {
+void watchForKey(client *c, robj *key) {
list *clients = NULL;
listIter li;
listNode *ln;
@@ -230,7 +245,7 @@ void watchForKey(redisClient *c, robj *key) {
/* Unwatch all the keys watched by this client. To clean the EXEC dirty
* flag is up to the caller. */
-void unwatchAllKeys(redisClient *c) {
+void unwatchAllKeys(client *c) {
listIter li;
listNode *ln;
@@ -244,7 +259,7 @@ void unwatchAllKeys(redisClient *c) {
* from the list */
wk = listNodeValue(ln);
clients = dictFetchValue(wk->db->watched_keys, wk->key);
- redisAssertWithInfo(c,NULL,clients != NULL);
+ serverAssertWithInfo(c,NULL,clients != NULL);
listDelNode(clients,listSearchKey(clients,c));
/* Kill the entry at all if this was the only client */
if (listLength(clients) == 0)
@@ -267,13 +282,13 @@ void touchWatchedKey(redisDb *db, robj *key) {
clients = dictFetchValue(db->watched_keys, key);
if (!clients) return;
- /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
+ /* Mark all the clients watching this key as CLIENT_DIRTY_CAS */
/* Check if we are already watching for this key */
listRewind(clients,&li);
while((ln = listNext(&li))) {
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
- c->flags |= REDIS_DIRTY_CAS;
+ c->flags |= CLIENT_DIRTY_CAS;
}
}
@@ -288,7 +303,7 @@ void touchWatchedKeysOnFlush(int dbid) {
/* For every client, check all the waited keys */
listRewind(server.clients,&li1);
while((ln = listNext(&li1))) {
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
listRewind(c->watched_keys,&li2);
while((ln = listNext(&li2))) {
watchedKey *wk = listNodeValue(ln);
@@ -298,16 +313,16 @@ void touchWatchedKeysOnFlush(int dbid) {
* removed. */
if (dbid == -1 || wk->db->id == dbid) {
if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
- c->flags |= REDIS_DIRTY_CAS;
+ c->flags |= CLIENT_DIRTY_CAS;
}
}
}
}
-void watchCommand(redisClient *c) {
+void watchCommand(client *c) {
int j;
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
addReplyError(c,"WATCH inside MULTI is not allowed");
return;
}
@@ -316,8 +331,8 @@ void watchCommand(redisClient *c) {
addReply(c,shared.ok);
}
-void unwatchCommand(redisClient *c) {
+void unwatchCommand(client *c) {
unwatchAllKeys(c);
- c->flags &= (~REDIS_DIRTY_CAS);
+ c->flags &= (~CLIENT_DIRTY_CAS);
addReply(c,shared.ok);
}
diff --git a/src/networking.c b/src/networking.c
index 607d225fd..af7422178 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -27,45 +27,66 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
+#include "atomicvar.h"
#include <sys/uio.h>
#include <math.h>
+#include <ctype.h>
-static void setProtocolError(redisClient *c, int pos);
+static void setProtocolError(const char *errstr, client *c, long pos);
-/* To evaluate the output buffer size of a client we need to get size of
- * allocated objects, however we can't used zmalloc_size() directly on sds
- * strings because of the trick they use to work (the header is before the
- * returned pointer), so we use this helper function. */
-size_t zmalloc_size_sds(sds s) {
- return zmalloc_size(s-sizeof(struct sdshdr));
+/* Return the size consumed from the allocator, for the specified SDS string,
+ * including internal fragmentation. This function is used in order to compute
+ * the client output buffer size. */
+size_t sdsZmallocSize(sds s) {
+ void *sh = sdsAllocPtr(s);
+ return zmalloc_size(sh);
}
/* Return the amount of memory used by the sds string at object->ptr
* for a string object. */
size_t getStringObjectSdsUsedMemory(robj *o) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
switch(o->encoding) {
- case REDIS_ENCODING_RAW: return zmalloc_size_sds(o->ptr);
- case REDIS_ENCODING_EMBSTR: return sdslen(o->ptr);
+ case OBJ_ENCODING_RAW: return sdsZmallocSize(o->ptr);
+ case OBJ_ENCODING_EMBSTR: return zmalloc_size(o)-sizeof(robj);
default: return 0; /* Just integer encoding for now. */
}
}
+/* Client.reply list dup and free methods. */
void *dupClientReplyValue(void *o) {
- incrRefCount((robj*)o);
- return o;
+ clientReplyBlock *old = o;
+ clientReplyBlock *buf = zmalloc(sizeof(clientReplyBlock) + old->size);
+ memcpy(buf, o, sizeof(clientReplyBlock) + old->size);
+ return buf;
+}
+
+void freeClientReplyValue(void *o) {
+ zfree(o);
}
int listMatchObjects(void *a, void *b) {
return equalStringObjects(a,b);
}
-redisClient *createClient(int fd) {
- redisClient *c = zmalloc(sizeof(redisClient));
+/* This function links the client to the global linked list of clients.
+ * unlinkClient() does the opposite, among other things. */
+void linkClient(client *c) {
+ listAddNodeTail(server.clients,c);
+ /* Note that we remember the linked list node where the client is stored,
+ * this way removing the client in unlinkClient() will not require
+ * a linear scan, but just a constant time operation. */
+ c->client_list_node = listLast(server.clients);
+ uint64_t id = htonu64(c->id);
+ raxInsert(server.clients_index,(unsigned char*)&id,sizeof(id),c,NULL);
+}
+
+client *createClient(int fd) {
+ client *c = zmalloc(sizeof(client));
/* passing -1 as fd it is possible to create a non connected client.
- * This is useful since all the Redis commands needs to be executed
+ * This is useful since all the commands needs to be executed
* in the context of a client. When commands are executed in other
* contexts (for instance a Lua script) we need a non connected client. */
if (fd != -1) {
@@ -83,11 +104,14 @@ redisClient *createClient(int fd) {
}
selectDb(c,0);
- c->id = server.next_client_id++;
+ uint64_t client_id;
+ atomicGetIncr(server.next_client_id,client_id,1);
+ c->id = client_id;
c->fd = fd;
c->name = NULL;
c->bufpos = 0;
c->querybuf = sdsempty();
+ c->pending_querybuf = sdsempty();
c->querybuf_peak = 0;
c->reqtype = 0;
c->argc = 0;
@@ -99,31 +123,38 @@ redisClient *createClient(int fd) {
c->flags = 0;
c->ctime = c->lastinteraction = server.unixtime;
c->authenticated = 0;
- c->replstate = REDIS_REPL_NONE;
+ c->replstate = REPL_STATE_NONE;
c->repl_put_online_on_ack = 0;
c->reploff = 0;
+ c->read_reploff = 0;
c->repl_ack_off = 0;
c->repl_ack_time = 0;
c->slave_listening_port = 0;
+ c->slave_ip[0] = '\0';
+ c->slave_capa = SLAVE_CAPA_NONE;
c->reply = listCreate();
c->reply_bytes = 0;
c->obuf_soft_limit_reached_time = 0;
- listSetFreeMethod(c->reply,decrRefCountVoid);
+ listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
- c->btype = REDIS_BLOCKED_NONE;
+ c->btype = BLOCKED_NONE;
c->bpop.timeout = 0;
- c->bpop.keys = dictCreate(&setDictType,NULL);
+ c->bpop.keys = dictCreate(&objectKeyHeapPointerValueDictType,NULL);
c->bpop.target = NULL;
+ c->bpop.xread_group = NULL;
+ c->bpop.xread_consumer = NULL;
+ c->bpop.xread_group_noack = 0;
c->bpop.numreplicas = 0;
c->bpop.reploffset = 0;
c->woff = 0;
c->watched_keys = listCreate();
- c->pubsub_channels = dictCreate(&setDictType,NULL);
+ c->pubsub_channels = dictCreate(&objectKeyPointerValueDictType,NULL);
c->pubsub_patterns = listCreate();
c->peerid = NULL;
+ c->client_list_node = NULL;
listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid);
listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
- if (fd != -1) listAddNodeTail(server.clients,c);
+ if (fd != -1) linkClient(c);
initClientMultiState(c);
return c;
}
@@ -132,156 +163,115 @@ redisClient *createClient(int fd) {
* to the client. The behavior is the following:
*
* If the client should receive new data (normal clients will) the function
- * returns REDIS_OK, and make sure to install the write handler in our event
+ * returns C_OK, and make sure to install the write handler in our event
* loop so that when the socket is writable new data gets written.
*
- * If the client should not receive new data, because it is a fake client,
- * a master, a slave not yet online, or because the setup of the write handler
- * failed, the function returns REDIS_ERR.
+ * If the client should not receive new data, because it is a fake client
+ * (used to load AOF in memory), a master or because the setup of the write
+ * handler failed, the function returns C_ERR.
+ *
+ * The function may return C_OK without actually installing the write
+ * event handler in the following cases:
+ *
+ * 1) The event handler should already be installed since the output buffer
+ * already contains something.
+ * 2) The client is a slave but not yet online, so we want to just accumulate
+ * writes in the buffer but not actually sending them yet.
*
* Typically gets called every time a reply is built, before adding more
- * data to the clients output buffers. If the function returns REDIS_ERR no
+ * data to the clients output buffers. If the function returns C_ERR no
* data should be appended to the output buffers. */
-int prepareClientToWrite(redisClient *c) {
- if (c->flags & REDIS_LUA_CLIENT) return REDIS_OK;
- if ((c->flags & REDIS_MASTER) &&
- !(c->flags & REDIS_MASTER_FORCE_REPLY)) return REDIS_ERR;
- if (c->fd <= 0) return REDIS_ERR; /* Fake client */
- if (c->bufpos == 0 && listLength(c->reply) == 0 &&
- (c->replstate == REDIS_REPL_NONE ||
- c->replstate == REDIS_REPL_ONLINE) &&
- aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c) == AE_ERR) return REDIS_ERR;
- return REDIS_OK;
-}
-
-/* Create a duplicate of the last object in the reply list when
- * it is not exclusively owned by the reply list. */
-robj *dupLastObjectIfNeeded(list *reply) {
- robj *new, *cur;
- listNode *ln;
- redisAssert(listLength(reply) > 0);
- ln = listLast(reply);
- cur = listNodeValue(ln);
- if (cur->refcount > 1) {
- new = dupStringObject(cur);
- decrRefCount(cur);
- listNodeValue(ln) = new;
+int prepareClientToWrite(client *c) {
+ /* If it's the Lua client we always return ok without installing any
+ * handler since there is no socket at all. */
+ if (c->flags & (CLIENT_LUA|CLIENT_MODULE)) return C_OK;
+
+ /* CLIENT REPLY OFF / SKIP handling: don't send replies. */
+ if (c->flags & (CLIENT_REPLY_OFF|CLIENT_REPLY_SKIP)) return C_ERR;
+
+ /* Masters don't receive replies, unless CLIENT_MASTER_FORCE_REPLY flag
+ * is set. */
+ if ((c->flags & CLIENT_MASTER) &&
+ !(c->flags & CLIENT_MASTER_FORCE_REPLY)) return C_ERR;
+
+ if (c->fd <= 0) return C_ERR; /* Fake client for AOF loading. */
+
+ /* Schedule the client to write the output buffers to the socket only
+ * if not already done (there were no pending writes already and the client
+ * was yet not flagged), and, for slaves, if the slave can actually
+ * receive writes at this stage. */
+ if (!clientHasPendingReplies(c) &&
+ !(c->flags & CLIENT_PENDING_WRITE) &&
+ (c->replstate == REPL_STATE_NONE ||
+ (c->replstate == SLAVE_STATE_ONLINE && !c->repl_put_online_on_ack)))
+ {
+ /* Here instead of installing the write handler, we just flag the
+ * client and put it into a list of clients that have something
+ * to write to the socket. This way before re-entering the event
+ * loop, we can try to directly write to the client sockets avoiding
+ * a system call. We'll only really install the write handler if
+ * we'll not be able to write the whole reply at once. */
+ c->flags |= CLIENT_PENDING_WRITE;
+ listAddNodeHead(server.clients_pending_write,c);
}
- return listNodeValue(ln);
+
+ /* Authorize the caller to queue in the output buffer of this client. */
+ return C_OK;
}
/* -----------------------------------------------------------------------------
* Low level functions to add more data to output buffers.
* -------------------------------------------------------------------------- */
-int _addReplyToBuffer(redisClient *c, char *s, size_t len) {
+int _addReplyToBuffer(client *c, const char *s, size_t len) {
size_t available = sizeof(c->buf)-c->bufpos;
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return REDIS_OK;
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return C_OK;
/* If there already are entries in the reply list, we cannot
* add anything more to the static buffer. */
- if (listLength(c->reply) > 0) return REDIS_ERR;
+ if (listLength(c->reply) > 0) return C_ERR;
/* Check that the buffer has enough space available for this string. */
- if (len > available) return REDIS_ERR;
+ if (len > available) return C_ERR;
memcpy(c->buf+c->bufpos,s,len);
c->bufpos+=len;
- return REDIS_OK;
+ return C_OK;
}
-void _addReplyObjectToList(redisClient *c, robj *o) {
- robj *tail;
+void _addReplyStringToList(client *c, const char *s, size_t len) {
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return;
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
+ listNode *ln = listLast(c->reply);
+ clientReplyBlock *tail = ln? listNodeValue(ln): NULL;
- if (listLength(c->reply) == 0) {
- incrRefCount(o);
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
- } else {
- tail = listNodeValue(listLast(c->reply));
-
- /* Append to this object when possible. */
- if (tail->ptr != NULL &&
- tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+sdslen(o->ptr) <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,o->ptr,sdslen(o->ptr));
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
- } else {
- incrRefCount(o);
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
- }
- }
- asyncCloseClientOnOutputBufferLimitReached(c);
-}
-
-/* This method takes responsibility over the sds. When it is no longer
- * needed it will be free'd, otherwise it ends up in a robj. */
-void _addReplySdsToList(redisClient *c, sds s) {
- robj *tail;
-
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) {
- sdsfree(s);
- return;
- }
-
- if (listLength(c->reply) == 0) {
- listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
- c->reply_bytes += zmalloc_size_sds(s);
- } else {
- tail = listNodeValue(listLast(c->reply));
+ /* Note that 'tail' may be NULL even if we have a tail node, becuase when
+ * addDeferredMultiBulkLength() is used, it sets a dummy node to NULL just
+ * fo fill it later, when the size of the bulk length is set. */
- /* Append to this object when possible. */
- if (tail->ptr != NULL && tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+sdslen(s) <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,s,sdslen(s));
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
- sdsfree(s);
- } else {
- listAddNodeTail(c->reply,createObject(REDIS_STRING,s));
- c->reply_bytes += zmalloc_size_sds(s);
- }
+ /* Append to tail string when possible. */
+ if (tail) {
+ /* Copy the part we can fit into the tail, and leave the rest for a
+ * new node */
+ size_t avail = tail->size - tail->used;
+ size_t copy = avail >= len? len: avail;
+ memcpy(tail->buf + tail->used, s, copy);
+ tail->used += copy;
+ s += copy;
+ len -= copy;
}
- asyncCloseClientOnOutputBufferLimitReached(c);
-}
-
-void _addReplyStringToList(redisClient *c, char *s, size_t len) {
- robj *tail;
-
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
-
- if (listLength(c->reply) == 0) {
- robj *o = createStringObject(s,len);
-
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
- } else {
- tail = listNodeValue(listLast(c->reply));
-
- /* Append to this object when possible. */
- if (tail->ptr != NULL && tail->encoding == REDIS_ENCODING_RAW &&
- sdslen(tail->ptr)+len <= REDIS_REPLY_CHUNK_BYTES)
- {
- c->reply_bytes -= zmalloc_size_sds(tail->ptr);
- tail = dupLastObjectIfNeeded(c->reply);
- tail->ptr = sdscatlen(tail->ptr,s,len);
- c->reply_bytes += zmalloc_size_sds(tail->ptr);
- } else {
- robj *o = createStringObject(s,len);
-
- listAddNodeTail(c->reply,o);
- c->reply_bytes += getStringObjectSdsUsedMemory(o);
- }
+ if (len) {
+ /* Create a new node, make sure it is allocated to at
+ * least PROTO_REPLY_CHUNK_BYTES */
+ size_t size = len < PROTO_REPLY_CHUNK_BYTES? PROTO_REPLY_CHUNK_BYTES: len;
+ tail = zmalloc(size + sizeof(clientReplyBlock));
+ /* take over the allocation's internal fragmentation */
+ tail->size = zmalloc_usable(tail) - sizeof(clientReplyBlock);
+ tail->used = len;
+ memcpy(tail->buf, s, len);
+ listAddNodeTail(c->reply, tail);
+ c->reply_bytes += tail->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
@@ -291,73 +281,99 @@ void _addReplyStringToList(redisClient *c, char *s, size_t len) {
* The following functions are the ones that commands implementations will call.
* -------------------------------------------------------------------------- */
-void addReply(redisClient *c, robj *obj) {
- if (prepareClientToWrite(c) != REDIS_OK) return;
+/* Add the object 'obj' string representation to the client output buffer. */
+void addReply(client *c, robj *obj) {
+ if (prepareClientToWrite(c) != C_OK) return;
- /* This is an important place where we can avoid copy-on-write
- * when there is a saving child running, avoiding touching the
- * refcount field of the object if it's not needed.
- *
- * If the encoding is RAW and there is room in the static buffer
- * we'll be able to send the object to the client without
- * messing with its page. */
if (sdsEncodedObject(obj)) {
- if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
- _addReplyObjectToList(c,obj);
- } else if (obj->encoding == REDIS_ENCODING_INT) {
- /* Optimization: if there is room in the static buffer for 32 bytes
- * (more than the max chars a 64 bit integer can take as string) we
- * avoid decoding the object and go for the lower level approach. */
- if (listLength(c->reply) == 0 && (sizeof(c->buf) - c->bufpos) >= 32) {
- char buf[32];
- int len;
-
- len = ll2string(buf,sizeof(buf),(long)obj->ptr);
- if (_addReplyToBuffer(c,buf,len) == REDIS_OK)
- return;
- /* else... continue with the normal code path, but should never
- * happen actually since we verified there is room. */
- }
- obj = getDecodedObject(obj);
- if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
- _addReplyObjectToList(c,obj);
- decrRefCount(obj);
+ if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != C_OK)
+ _addReplyStringToList(c,obj->ptr,sdslen(obj->ptr));
+ } else if (obj->encoding == OBJ_ENCODING_INT) {
+ /* For integer encoded strings we just convert it into a string
+ * using our optimized function, and attach the resulting string
+ * to the output buffer. */
+ char buf[32];
+ size_t len = ll2string(buf,sizeof(buf),(long)obj->ptr);
+ if (_addReplyToBuffer(c,buf,len) != C_OK)
+ _addReplyStringToList(c,buf,len);
} else {
- redisPanic("Wrong obj->encoding in addReply()");
+ serverPanic("Wrong obj->encoding in addReply()");
}
}
-void addReplySds(redisClient *c, sds s) {
- if (prepareClientToWrite(c) != REDIS_OK) {
+/* Add the SDS 's' string to the client output buffer, as a side effect
+ * the SDS string is freed. */
+void addReplySds(client *c, sds s) {
+ if (prepareClientToWrite(c) != C_OK) {
/* The caller expects the sds to be free'd. */
sdsfree(s);
return;
}
- if (_addReplyToBuffer(c,s,sdslen(s)) == REDIS_OK) {
- sdsfree(s);
- } else {
- /* This method free's the sds when it is no longer needed. */
- _addReplySdsToList(c,s);
- }
+ if (_addReplyToBuffer(c,s,sdslen(s)) != C_OK)
+ _addReplyStringToList(c,s,sdslen(s));
+ sdsfree(s);
}
-void addReplyString(redisClient *c, char *s, size_t len) {
- if (prepareClientToWrite(c) != REDIS_OK) return;
- if (_addReplyToBuffer(c,s,len) != REDIS_OK)
+/* This low level function just adds whatever protocol you send it to the
+ * client buffer, trying the static buffer initially, and using the string
+ * of objects if not possible.
+ *
+ * It is efficient because does not create an SDS object nor an Redis object
+ * if not needed. The object will only be created by calling
+ * _addReplyStringToList() if we fail to extend the existing tail object
+ * in the list of objects. */
+void addReplyString(client *c, const char *s, size_t len) {
+ if (prepareClientToWrite(c) != C_OK) return;
+ if (_addReplyToBuffer(c,s,len) != C_OK)
_addReplyStringToList(c,s,len);
}
-void addReplyErrorLength(redisClient *c, char *s, size_t len) {
- addReplyString(c,"-ERR ",5);
+/* Low level function called by the addReplyError...() functions.
+ * It emits the protocol for a Redis error, in the form:
+ *
+ * -ERRORCODE Error Message<CR><LF>
+ *
+ * If the error code is already passed in the string 's', the error
+ * code provided is used, otherwise the string "-ERR " for the generic
+ * error code is automatically added. */
+void addReplyErrorLength(client *c, const char *s, size_t len) {
+ /* If the string already starts with "-..." then the error code
+ * is provided by the caller. Otherwise we use "-ERR". */
+ if (!len || s[0] != '-') addReplyString(c,"-ERR ",5);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
+
+ /* Sometimes it could be normal that a slave replies to a master with
+ * an error and this function gets called. Actually the error will never
+ * be sent because addReply*() against master clients has no effect...
+ * A notable example is:
+ *
+ * EVAL 'redis.call("incr",KEYS[1]); redis.call("nonexisting")' 1 x
+ *
+ * Where the master must propagate the first change even if the second
+ * will produce an error. However it is useful to log such events since
+ * they are rare and may hint at errors in a script or a bug in Redis. */
+ if (c->flags & (CLIENT_MASTER|CLIENT_SLAVE)) {
+ char* to = c->flags & CLIENT_MASTER? "master": "slave";
+ char* from = c->flags & CLIENT_MASTER? "slave": "master";
+ char *cmdname = c->lastcmd ? c->lastcmd->name : "<unknown>";
+ serverLog(LL_WARNING,"== CRITICAL == This %s is sending an error "
+ "to its %s: '%s' after processing the command "
+ "'%s'", from, to, s, cmdname);
+ /* Here we want to panic because when a master is sending an
+ * error to some slave in the context of replication, this can
+ * only create some kind of offset or data desynchronization. Better
+ * to catch it ASAP and crash instead of continuing. */
+ if (c->flags & CLIENT_SLAVE)
+ serverPanic("Continuing is unsafe: replication protocol violation.");
+ }
}
-void addReplyError(redisClient *c, char *err) {
+void addReplyError(client *c, const char *err) {
addReplyErrorLength(c,err,strlen(err));
}
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
+void addReplyErrorFormat(client *c, const char *fmt, ...) {
size_t l, j;
va_list ap;
va_start(ap,fmt);
@@ -373,17 +389,17 @@ void addReplyErrorFormat(redisClient *c, const char *fmt, ...) {
sdsfree(s);
}
-void addReplyStatusLength(redisClient *c, char *s, size_t len) {
+void addReplyStatusLength(client *c, const char *s, size_t len) {
addReplyString(c,"+",1);
addReplyString(c,s,len);
addReplyString(c,"\r\n",2);
}
-void addReplyStatus(redisClient *c, char *status) {
+void addReplyStatus(client *c, const char *status) {
addReplyStatusLength(c,status,strlen(status));
}
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
+void addReplyStatusFormat(client *c, const char *fmt, ...) {
va_list ap;
va_start(ap,fmt);
sds s = sdscatvprintf(sdsempty(),fmt,ap);
@@ -394,44 +410,59 @@ void addReplyStatusFormat(redisClient *c, const char *fmt, ...) {
/* Adds an empty object to the reply list that will contain the multi bulk
* length, which is not known when this function is called. */
-void *addDeferredMultiBulkLength(redisClient *c) {
+void *addDeferredMultiBulkLength(client *c) {
/* Note that we install the write event here even if the object is not
* ready to be sent, since we are sure that before returning to the
* event loop setDeferredMultiBulkLength() will be called. */
- if (prepareClientToWrite(c) != REDIS_OK) return NULL;
- listAddNodeTail(c->reply,createObject(REDIS_STRING,NULL));
+ if (prepareClientToWrite(c) != C_OK) return NULL;
+ listAddNodeTail(c->reply,NULL); /* NULL is our placeholder. */
return listLast(c->reply);
}
/* Populate the length object and try gluing it to the next chunk. */
-void setDeferredMultiBulkLength(redisClient *c, void *node, long length) {
+void setDeferredMultiBulkLength(client *c, void *node, long length) {
listNode *ln = (listNode*)node;
- robj *len, *next;
+ clientReplyBlock *next;
+ char lenstr[128];
+ size_t lenstr_len = sprintf(lenstr, "*%ld\r\n", length);
- /* Abort when *node is NULL (see addDeferredMultiBulkLength). */
+ /* Abort when *node is NULL: when the client should not accept writes
+ * we return NULL in addDeferredMultiBulkLength() */
if (node == NULL) return;
-
- len = listNodeValue(ln);
- len->ptr = sdscatprintf(sdsempty(),"*%ld\r\n",length);
- len->encoding = REDIS_ENCODING_RAW; /* in case it was an EMBSTR. */
- c->reply_bytes += zmalloc_size_sds(len->ptr);
- if (ln->next != NULL) {
- next = listNodeValue(ln->next);
-
- /* Only glue when the next node is non-NULL (an sds in this case) */
- if (next->ptr != NULL) {
- c->reply_bytes -= zmalloc_size_sds(len->ptr);
- c->reply_bytes -= getStringObjectSdsUsedMemory(next);
- len->ptr = sdscatlen(len->ptr,next->ptr,sdslen(next->ptr));
- c->reply_bytes += zmalloc_size_sds(len->ptr);
- listDelNode(c->reply,ln->next);
- }
+ serverAssert(!listNodeValue(ln));
+
+ /* Normally we fill this dummy NULL node, added by addDeferredMultiBulkLength(),
+ * with a new buffer structure containing the protocol needed to specify
+ * the length of the array following. However sometimes when there is
+ * little memory to move, we may instead remove this NULL node, and prefix
+ * our protocol in the node immediately after to it, in order to save a
+ * write(2) syscall later. Conditions needed to do it:
+ *
+ * - The next node is non-NULL,
+ * - It has enough room already allocated
+ * - And not too large (avoid large memmove) */
+ if (ln->next != NULL && (next = listNodeValue(ln->next)) &&
+ next->size - next->used >= lenstr_len &&
+ next->used < PROTO_REPLY_CHUNK_BYTES * 4) {
+ memmove(next->buf + lenstr_len, next->buf, next->used);
+ memcpy(next->buf, lenstr, lenstr_len);
+ next->used += lenstr_len;
+ listDelNode(c->reply,ln);
+ } else {
+ /* Create a new node */
+ clientReplyBlock *buf = zmalloc(lenstr_len + sizeof(clientReplyBlock));
+ /* Take over the allocation's internal fragmentation */
+ buf->size = zmalloc_usable(buf) - sizeof(clientReplyBlock);
+ buf->used = lenstr_len;
+ memcpy(buf->buf, lenstr, lenstr_len);
+ listNodeValue(ln) = buf;
+ c->reply_bytes += buf->size;
}
asyncCloseClientOnOutputBufferLimitReached(c);
}
/* Add a double as a bulk reply */
-void addReplyDouble(redisClient *c, double d) {
+void addReplyDouble(client *c, double d) {
char dbuf[128], sbuf[128];
int dlen, slen;
if (isinf(d)) {
@@ -445,19 +476,28 @@ void addReplyDouble(redisClient *c, double d) {
}
}
+/* Add a long double as a bulk reply, but uses a human readable formatting
+ * of the double instead of exposing the crude behavior of doubles to the
+ * dear user. */
+void addReplyHumanLongDouble(client *c, long double d) {
+ robj *o = createStringObjectFromLongDouble(d,1);
+ addReplyBulk(c,o);
+ decrRefCount(o);
+}
+
/* Add a long long as integer reply or bulk len / multi bulk count.
* Basically this is used to output <prefix><long long><crlf>. */
-void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
+void addReplyLongLongWithPrefix(client *c, long long ll, char prefix) {
char buf[128];
int len;
/* Things like $3\r\n or *2\r\n are emitted very often by the protocol
* so we have a few shared objects to use if the integer is small
* like it is most of the times. */
- if (prefix == '*' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ if (prefix == '*' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.mbulkhdr[ll]);
return;
- } else if (prefix == '$' && ll < REDIS_SHARED_BULKHDR_LEN) {
+ } else if (prefix == '$' && ll < OBJ_SHARED_BULKHDR_LEN && ll >= 0) {
addReply(c,shared.bulkhdr[ll]);
return;
}
@@ -469,7 +509,7 @@ void addReplyLongLongWithPrefix(redisClient *c, long long ll, char prefix) {
addReplyString(c,buf,len+3);
}
-void addReplyLongLong(redisClient *c, long long ll) {
+void addReplyLongLong(client *c, long long ll) {
if (ll == 0)
addReply(c,shared.czero);
else if (ll == 1)
@@ -478,15 +518,15 @@ void addReplyLongLong(redisClient *c, long long ll) {
addReplyLongLongWithPrefix(c,ll,':');
}
-void addReplyMultiBulkLen(redisClient *c, long length) {
- if (length < REDIS_SHARED_BULKHDR_LEN)
+void addReplyMultiBulkLen(client *c, long length) {
+ if (length < OBJ_SHARED_BULKHDR_LEN)
addReply(c,shared.mbulkhdr[length]);
else
addReplyLongLongWithPrefix(c,length,'*');
}
/* Create the length prefix of a bulk reply, example: $2234 */
-void addReplyBulkLen(redisClient *c, robj *obj) {
+void addReplyBulkLen(client *c, robj *obj) {
size_t len;
if (sdsEncodedObject(obj)) {
@@ -505,36 +545,35 @@ void addReplyBulkLen(redisClient *c, robj *obj) {
}
}
- if (len < REDIS_SHARED_BULKHDR_LEN)
+ if (len < OBJ_SHARED_BULKHDR_LEN)
addReply(c,shared.bulkhdr[len]);
else
addReplyLongLongWithPrefix(c,len,'$');
}
/* Add a Redis Object as a bulk reply */
-void addReplyBulk(redisClient *c, robj *obj) {
+void addReplyBulk(client *c, robj *obj) {
addReplyBulkLen(c,obj);
addReply(c,obj);
addReply(c,shared.crlf);
}
/* Add a C buffer as bulk reply */
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len) {
+void addReplyBulkCBuffer(client *c, const void *p, size_t len) {
addReplyLongLongWithPrefix(c,len,'$');
addReplyString(c,p,len);
addReply(c,shared.crlf);
}
/* Add sds to reply (takes ownership of sds and frees it) */
-void addReplyBulkSds(redisClient *c, sds s) {
- addReplySds(c,sdscatfmt(sdsempty(),"$%u\r\n",
- (unsigned long)sdslen(s)));
+void addReplyBulkSds(client *c, sds s) {
+ addReplyLongLongWithPrefix(c,sdslen(s),'$');
addReplySds(c,s);
addReply(c,shared.crlf);
}
-/* Add a C nul term string as bulk reply */
-void addReplyBulkCString(redisClient *c, char *s) {
+/* Add a C null term string as bulk reply */
+void addReplyBulkCString(client *c, const char *s) {
if (s == NULL) {
addReply(c,shared.nullbulk);
} else {
@@ -543,7 +582,7 @@ void addReplyBulkCString(redisClient *c, char *s) {
}
/* Add a long long as a bulk reply */
-void addReplyBulkLongLong(redisClient *c, long long ll) {
+void addReplyBulkLongLong(client *c, long long ll) {
char buf[64];
int len;
@@ -551,22 +590,61 @@ void addReplyBulkLongLong(redisClient *c, long long ll) {
addReplyBulkCBuffer(c,buf,len);
}
+/* Add an array of C strings as status replies with a heading.
+ * This function is typically invoked by from commands that support
+ * subcommands in response to the 'help' subcommand. The help array
+ * is terminated by NULL sentinel. */
+void addReplyHelp(client *c, const char **help) {
+ sds cmd = sdsnew((char*) c->argv[0]->ptr);
+ void *blenp = addDeferredMultiBulkLength(c);
+ int blen = 0;
+
+ sdstoupper(cmd);
+ addReplyStatusFormat(c,
+ "%s <subcommand> arg arg ... arg. Subcommands are:",cmd);
+ sdsfree(cmd);
+
+ while (help[blen]) addReplyStatus(c,help[blen++]);
+
+ blen++; /* Account for the header line(s). */
+ setDeferredMultiBulkLength(c,blenp,blen);
+}
+
+/* Add a suggestive error reply.
+ * This function is typically invoked by from commands that support
+ * subcommands in response to an unknown subcommand or argument error. */
+void addReplySubcommandSyntaxError(client *c) {
+ sds cmd = sdsnew((char*) c->argv[0]->ptr);
+ sdstoupper(cmd);
+ addReplyErrorFormat(c,
+ "Unknown subcommand or wrong number of arguments for '%s'. Try %s HELP.",
+ (char*)c->argv[1]->ptr,cmd);
+ sdsfree(cmd);
+}
+
/* Copy 'src' client output buffers into 'dst' client output buffers.
* The function takes care of freeing the old output buffers of the
* destination client. */
-void copyClientOutputBuffer(redisClient *dst, redisClient *src) {
+void copyClientOutputBuffer(client *dst, client *src) {
listRelease(dst->reply);
+ dst->sentlen = 0;
dst->reply = listDup(src->reply);
memcpy(dst->buf,src->buf,src->bufpos);
dst->bufpos = src->bufpos;
dst->reply_bytes = src->reply_bytes;
}
+/* Return true if the specified client has pending reply buffers to write to
+ * the socket. */
+int clientHasPendingReplies(client *c) {
+ return c->bufpos || listLength(c->reply);
+}
+
#define MAX_ACCEPTS_PER_CALL 1000
-static void acceptCommonHandler(int fd, int flags) {
- redisClient *c;
+static void acceptCommonHandler(int fd, int flags, char *ip) {
+ client *c;
if ((c = createClient(fd)) == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Error registering fd event for the new client: %s (fd=%d)",
strerror(errno),fd);
close(fd); /* May be already closed, just ignore errors */
@@ -587,50 +665,92 @@ static void acceptCommonHandler(int fd, int flags) {
freeClient(c);
return;
}
+
+ /* If the server is running in protected mode (the default) and there
+ * is no password set, nor a specific interface is bound, we don't accept
+ * requests from non loopback interfaces. Instead we try to explain the
+ * user what to do to fix it if needed. */
+ if (server.protected_mode &&
+ server.bindaddr_count == 0 &&
+ server.requirepass == NULL &&
+ !(flags & CLIENT_UNIX_SOCKET) &&
+ ip != NULL)
+ {
+ if (strcmp(ip,"127.0.0.1") && strcmp(ip,"::1")) {
+ char *err =
+ "-DENIED Redis is running in protected mode because protected "
+ "mode is enabled, no bind address was specified, no "
+ "authentication password is requested to clients. In this mode "
+ "connections are only accepted from the loopback interface. "
+ "If you want to connect from external computers to Redis you "
+ "may adopt one of the following solutions: "
+ "1) Just disable protected mode sending the command "
+ "'CONFIG SET protected-mode no' from the loopback interface "
+ "by connecting to Redis from the same host the server is "
+ "running, however MAKE SURE Redis is not publicly accessible "
+ "from internet if you do so. Use CONFIG REWRITE to make this "
+ "change permanent. "
+ "2) Alternatively you can just disable the protected mode by "
+ "editing the Redis configuration file, and setting the protected "
+ "mode option to 'no', and then restarting the server. "
+ "3) If you started the server manually just for testing, restart "
+ "it with the '--protected-mode no' option. "
+ "4) Setup a bind address or an authentication password. "
+ "NOTE: You only need to do one of the above things in order for "
+ "the server to start accepting connections from the outside.\r\n";
+ if (write(c->fd,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ server.stat_rejected_conn++;
+ freeClient(c);
+ return;
+ }
+ }
+
server.stat_numconnections++;
c->flags |= flags;
}
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
- char cip[REDIS_IP_STR_LEN];
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ char cip[NET_IP_STR_LEN];
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
while(max--) {
cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Accepting client connection: %s", server.neterr);
return;
}
- redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
- acceptCommonHandler(cfd,0);
+ serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
+ acceptCommonHandler(cfd,0,cip);
}
}
void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cfd, max = MAX_ACCEPTS_PER_CALL;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- REDIS_NOTUSED(privdata);
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
while(max--) {
cfd = anetUnixAccept(server.neterr, fd);
if (cfd == ANET_ERR) {
if (errno != EWOULDBLOCK)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Accepting client connection: %s", server.neterr);
return;
}
- redisLog(REDIS_VERBOSE,"Accepted connection to %s", server.unixsocket);
- acceptCommonHandler(cfd,REDIS_UNIX_SOCKET);
+ serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
+ acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL);
}
}
-static void freeClientArgv(redisClient *c) {
+static void freeClientArgv(client *c) {
int j;
for (j = 0; j < c->argc; j++)
decrRefCount(c->argv[j]);
@@ -644,41 +764,70 @@ static void freeClientArgv(redisClient *c) {
void disconnectSlaves(void) {
while (listLength(server.slaves)) {
listNode *ln = listFirst(server.slaves);
- freeClient((redisClient*)ln->value);
+ freeClient((client*)ln->value);
}
}
-/* This function is called when the slave lose the connection with the
- * master into an unexpected way. */
-void replicationHandleMasterDisconnection(void) {
- server.master = NULL;
- server.repl_state = REDIS_REPL_CONNECT;
- server.repl_down_since = server.unixtime;
- /* We lost connection with our master, force our slaves to resync
- * with us as well to load the new data set.
- *
- * If server.masterhost is NULL the user called SLAVEOF NO ONE so
- * slave resync is not needed. */
- if (server.masterhost != NULL) disconnectSlaves();
-}
-
-void freeClient(redisClient *c) {
+/* Remove the specified client from global lists where the client could
+ * be referenced, not including the Pub/Sub channels.
+ * This is used by freeClient() and replicationCacheMaster(). */
+void unlinkClient(client *c) {
listNode *ln;
- /* If this is marked as current client unset it */
+ /* If this is marked as current client unset it. */
if (server.current_client == c) server.current_client = NULL;
+ /* Certain operations must be done only if the client has an active socket.
+ * If the client was already unlinked or if it's a "fake client" the
+ * fd is already set to -1. */
+ if (c->fd != -1) {
+ /* Remove from the list of active clients. */
+ if (c->client_list_node) {
+ uint64_t id = htonu64(c->id);
+ raxRemove(server.clients_index,(unsigned char*)&id,sizeof(id),NULL);
+ listDelNode(server.clients,c->client_list_node);
+ c->client_list_node = NULL;
+ }
+
+ /* Unregister async I/O handlers and close the socket. */
+ aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
+ aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ close(c->fd);
+ c->fd = -1;
+ }
+
+ /* Remove from the list of pending writes if needed. */
+ if (c->flags & CLIENT_PENDING_WRITE) {
+ ln = listSearchKey(server.clients_pending_write,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.clients_pending_write,ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ }
+
+ /* When client was just unblocked because of a blocking operation,
+ * remove it from the list of unblocked clients. */
+ if (c->flags & CLIENT_UNBLOCKED) {
+ ln = listSearchKey(server.unblocked_clients,c);
+ serverAssert(ln != NULL);
+ listDelNode(server.unblocked_clients,ln);
+ c->flags &= ~CLIENT_UNBLOCKED;
+ }
+}
+
+void freeClient(client *c) {
+ listNode *ln;
+
/* If it is our master that's beging disconnected we should make sure
* to cache the state to try a partial resynchronization later.
*
* Note that before doing this we make sure that the client is not in
* some unexpected state, by checking its flags. */
- if (server.master && c->flags & REDIS_MASTER) {
- redisLog(REDIS_WARNING,"Connection with master lost.");
- if (!(c->flags & (REDIS_CLOSE_AFTER_REPLY|
- REDIS_CLOSE_ASAP|
- REDIS_BLOCKED|
- REDIS_UNBLOCKED)))
+ if (server.master && c->flags & CLIENT_MASTER) {
+ serverLog(LL_WARNING,"Connection with master lost.");
+ if (!(c->flags & (CLIENT_CLOSE_AFTER_REPLY|
+ CLIENT_CLOSE_ASAP|
+ CLIENT_BLOCKED|
+ CLIENT_UNBLOCKED)))
{
replicationCacheMaster(c);
return;
@@ -686,17 +835,18 @@ void freeClient(redisClient *c) {
}
/* Log link disconnection with slave */
- if ((c->flags & REDIS_SLAVE) && !(c->flags & REDIS_MONITOR)) {
- redisLog(REDIS_WARNING,"Connection with slave %s lost.",
+ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR)) {
+ serverLog(LL_WARNING,"Connection with slave %s lost.",
replicationGetSlaveName(c));
}
/* Free the query buffer */
sdsfree(c->querybuf);
+ sdsfree(c->pending_querybuf);
c->querybuf = NULL;
/* Deallocate structures used to block on blocking ops. */
- if (c->flags & REDIS_BLOCKED) unblockClient(c);
+ if (c->flags & CLIENT_BLOCKED) unblockClient(c);
dictRelease(c->bpop.keys);
/* UNWATCH all the keys */
@@ -709,59 +859,43 @@ void freeClient(redisClient *c) {
dictRelease(c->pubsub_channels);
listRelease(c->pubsub_patterns);
- /* Close socket, unregister events, and remove list of replies and
- * accumulated arguments. */
- if (c->fd != -1) {
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- close(c->fd);
- }
+ /* Free data structures. */
listRelease(c->reply);
freeClientArgv(c);
- /* Remove from the list of clients */
- if (c->fd != -1) {
- ln = listSearchKey(server.clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.clients,ln);
- }
-
- /* When client was just unblocked because of a blocking operation,
- * remove it from the list of unblocked clients. */
- if (c->flags & REDIS_UNBLOCKED) {
- ln = listSearchKey(server.unblocked_clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.unblocked_clients,ln);
- }
+ /* Unlink the client: this will close the socket, remove the I/O
+ * handlers, and remove references of the client from different
+ * places where active clients may be referenced. */
+ unlinkClient(c);
/* Master/slave cleanup Case 1:
* we lost the connection with a slave. */
- if (c->flags & REDIS_SLAVE) {
- if (c->replstate == REDIS_REPL_SEND_BULK) {
+ if (c->flags & CLIENT_SLAVE) {
+ if (c->replstate == SLAVE_STATE_SEND_BULK) {
if (c->repldbfd != -1) close(c->repldbfd);
if (c->replpreamble) sdsfree(c->replpreamble);
}
- list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
+ list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.slaves;
ln = listSearchKey(l,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(l,ln);
/* We need to remember the time when we started to have zero
* attached slaves, as after some time we'll free the replication
* backlog. */
- if (c->flags & REDIS_SLAVE && listLength(server.slaves) == 0)
+ if (c->flags & CLIENT_SLAVE && listLength(server.slaves) == 0)
server.repl_no_slaves_since = server.unixtime;
refreshGoodSlavesCount();
}
/* Master/slave cleanup Case 2:
* we lost the connection with the master. */
- if (c->flags & REDIS_MASTER) replicationHandleMasterDisconnection();
+ if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
/* If this client was scheduled for async freeing we need to remove it
* from the queue. */
- if (c->flags & REDIS_CLOSE_ASAP) {
+ if (c->flags & CLIENT_CLOSE_ASAP) {
ln = listSearchKey(server.clients_to_close,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(server.clients_to_close,ln);
}
@@ -778,32 +912,40 @@ void freeClient(redisClient *c) {
* This function is useful when we need to terminate a client but we are in
* a context where calling freeClient() is not possible, because the client
* should be valid for the continuation of the flow of the program. */
-void freeClientAsync(redisClient *c) {
- if (c->flags & REDIS_CLOSE_ASAP) return;
- c->flags |= REDIS_CLOSE_ASAP;
+void freeClientAsync(client *c) {
+ if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return;
+ c->flags |= CLIENT_CLOSE_ASAP;
listAddNodeTail(server.clients_to_close,c);
}
void freeClientsInAsyncFreeQueue(void) {
while (listLength(server.clients_to_close)) {
listNode *ln = listFirst(server.clients_to_close);
- redisClient *c = listNodeValue(ln);
+ client *c = listNodeValue(ln);
- c->flags &= ~REDIS_CLOSE_ASAP;
+ c->flags &= ~CLIENT_CLOSE_ASAP;
freeClient(c);
listDelNode(server.clients_to_close,ln);
}
}
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = privdata;
- int nwritten = 0, totwritten = 0, objlen;
- size_t objmem;
- robj *o;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
-
- while(c->bufpos > 0 || listLength(c->reply)) {
+/* Return a client by ID, or NULL if the client ID is not in the set
+ * of registered clients. Note that "fake clients", created with -1 as FD,
+ * are not registered clients. */
+client *lookupClientByID(uint64_t id) {
+ id = htonu64(id);
+ client *c = raxFind(server.clients_index,(unsigned char*)&id,sizeof(id));
+ return (c == raxNotFound) ? NULL : c;
+}
+
+/* Write data in output buffers to client. Return C_OK if the client
+ * is still valid after the call, C_ERR if it was freed. */
+int writeToClient(int fd, client *c, int handler_installed) {
+ ssize_t nwritten = 0, totwritten = 0;
+ size_t objlen;
+ clientReplyBlock *o;
+
+ while(clientHasPendingReplies(c)) {
if (c->bufpos > 0) {
nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
if (nwritten <= 0) break;
@@ -812,54 +954,62 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
/* If the buffer was sent, set bufpos to zero to continue with
* the remainder of the reply. */
- if (c->sentlen == c->bufpos) {
+ if ((int)c->sentlen == c->bufpos) {
c->bufpos = 0;
c->sentlen = 0;
}
} else {
o = listNodeValue(listFirst(c->reply));
- objlen = sdslen(o->ptr);
- objmem = getStringObjectSdsUsedMemory(o);
+ objlen = o->used;
if (objlen == 0) {
+ c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
- c->reply_bytes -= objmem;
continue;
}
- nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
+ nwritten = write(fd, o->buf + c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
/* If we fully sent the object on head go to the next one */
if (c->sentlen == objlen) {
+ c->reply_bytes -= o->size;
listDelNode(c->reply,listFirst(c->reply));
c->sentlen = 0;
- c->reply_bytes -= objmem;
+ /* If there are no longer objects in the list, we expect
+ * the count of reply bytes to be exactly zero. */
+ if (listLength(c->reply) == 0)
+ serverAssert(c->reply_bytes == 0);
}
}
- /* Note that we avoid to send more than REDIS_MAX_WRITE_PER_EVENT
+ /* Note that we avoid to send more than NET_MAX_WRITES_PER_EVENT
* bytes, in a single threaded server it's a good idea to serve
* other clients as well, even if a very large request comes from
* super fast link that is always able to accept data (in real world
* scenario think about 'KEYS *' against the loopback interface).
*
* However if we are over the maxmemory limit we ignore that and
- * just deliver as much data as it is possible to deliver. */
- server.stat_net_output_bytes += totwritten;
- if (totwritten > REDIS_MAX_WRITE_PER_EVENT &&
+ * just deliver as much data as it is possible to deliver.
+ *
+ * Moreover, we also send as much as possible if the client is
+ * a slave (otherwise, on high-speed traffic, the replication
+ * buffer will grow indefinitely) */
+ if (totwritten > NET_MAX_WRITES_PER_EVENT &&
(server.maxmemory == 0 ||
- zmalloc_used_memory() < server.maxmemory)) break;
+ zmalloc_used_memory() < server.maxmemory) &&
+ !(c->flags & CLIENT_SLAVE)) break;
}
+ server.stat_net_output_bytes += totwritten;
if (nwritten == -1) {
if (errno == EAGAIN) {
nwritten = 0;
} else {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"Error writing to client: %s", strerror(errno));
freeClient(c);
- return;
+ return C_ERR;
}
}
if (totwritten > 0) {
@@ -867,34 +1017,104 @@ void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
* as an interaction, since we always send REPLCONF ACK commands
* that take some time to just fill the socket output buffer.
* We just rely on data / pings received for timeout detection. */
- if (!(c->flags & REDIS_MASTER)) c->lastinteraction = server.unixtime;
+ if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = server.unixtime;
}
- if (c->bufpos == 0 && listLength(c->reply) == 0) {
+ if (!clientHasPendingReplies(c)) {
c->sentlen = 0;
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
/* Close connection after entire reply has been sent. */
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c);
+ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
+ freeClient(c);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
+/* Write event handler. Just send data to the client. */
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
+ UNUSED(el);
+ UNUSED(mask);
+ writeToClient(fd,privdata,1);
+}
+
+/* This function is called just before entering the event loop, in the hope
+ * we can just write the replies to the client output buffer without any
+ * need to use a syscall in order to install the writable event handler,
+ * get it called, and so forth. */
+int handleClientsWithPendingWrites(void) {
+ listIter li;
+ listNode *ln;
+ int processed = listLength(server.clients_pending_write);
+
+ listRewind(server.clients_pending_write,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ c->flags &= ~CLIENT_PENDING_WRITE;
+ listDelNode(server.clients_pending_write,ln);
+
+ /* Try to write buffers to the client socket. */
+ if (writeToClient(c->fd,c,0) == C_ERR) continue;
+
+ /* If after the synchronous writes above we still have data to
+ * output to the client, we need to install the writable handler. */
+ if (clientHasPendingReplies(c)) {
+ int ae_flags = AE_WRITABLE;
+ /* For the fsync=always policy, we want that a given FD is never
+ * served for reading and writing in the same event loop iteration,
+ * so that in the middle of receiving the query, and serving it
+ * to the client, we'll call beforeSleep() that will do the
+ * actual fsync of AOF to disk. AE_BARRIER ensures that. */
+ if (server.aof_state == AOF_ON &&
+ server.aof_fsync == AOF_FSYNC_ALWAYS)
+ {
+ ae_flags |= AE_BARRIER;
+ }
+ if (aeCreateFileEvent(server.el, c->fd, ae_flags,
+ sendReplyToClient, c) == AE_ERR)
+ {
+ freeClientAsync(c);
+ }
+ }
}
+ return processed;
}
/* resetClient prepare the client to process the next command */
-void resetClient(redisClient *c) {
+void resetClient(client *c) {
redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL;
freeClientArgv(c);
c->reqtype = 0;
c->multibulklen = 0;
c->bulklen = -1;
+
/* We clear the ASKING flag as well if we are not inside a MULTI, and
* if what we just executed is not the ASKING command itself. */
- if (!(c->flags & REDIS_MULTI) && prevcmd != askingCommand)
- c->flags &= (~REDIS_ASKING);
+ if (!(c->flags & CLIENT_MULTI) && prevcmd != askingCommand)
+ c->flags &= ~CLIENT_ASKING;
+
+ /* Remove the CLIENT_REPLY_SKIP flag if any so that the reply
+ * to the next command will be sent, but set the flag if the command
+ * we just processed was "CLIENT REPLY SKIP". */
+ c->flags &= ~CLIENT_REPLY_SKIP;
+ if (c->flags & CLIENT_REPLY_SKIP_NEXT) {
+ c->flags |= CLIENT_REPLY_SKIP;
+ c->flags &= ~CLIENT_REPLY_SKIP_NEXT;
+ }
}
-int processInlineBuffer(redisClient *c) {
+/* Like processMultibulkBuffer(), but for the inline protocol instead of RESP,
+ * this function consumes the client query buffer and creates a command ready
+ * to be executed inside the client structure. Returns C_OK if the command
+ * is ready to be executed, or C_ERR if there is still protocol to read to
+ * have a well formed command. The function also returns C_ERR when there is
+ * a protocol error: in such a case the client structure is setup to reply
+ * with the error and close the connection. */
+int processInlineBuffer(client *c) {
char *newline;
- int argc, j;
+ int argc, j, linefeed_chars = 1;
sds *argv, aux;
size_t querylen;
@@ -903,16 +1123,16 @@ int processInlineBuffer(redisClient *c) {
/* Nothing to do without a \r\n */
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,"Protocol error: too big inline request");
- setProtocolError(c,0);
+ setProtocolError("too big inline request",c,0);
}
- return REDIS_ERR;
+ return C_ERR;
}
/* Handle the \r\n case. */
if (newline && newline != c->querybuf && *(newline-1) == '\r')
- newline--;
+ newline--, linefeed_chars++;
/* Split the input buffer up to the \r\n */
querylen = newline-(c->querybuf);
@@ -921,18 +1141,18 @@ int processInlineBuffer(redisClient *c) {
sdsfree(aux);
if (argv == NULL) {
addReplyError(c,"Protocol error: unbalanced quotes in request");
- setProtocolError(c,0);
- return REDIS_ERR;
+ setProtocolError("unbalanced quotes in inline request",c,0);
+ return C_ERR;
}
/* Newline from slaves can be used to refresh the last ACK time.
* This is useful for a slave to ping back while loading a big
* RDB file. */
- if (querylen == 0 && c->flags & REDIS_SLAVE)
+ if (querylen == 0 && c->flags & CLIENT_SLAVE)
c->repl_ack_time = server.unixtime;
/* Leave data after the first line of the query in the buffer */
- sdsrange(c->querybuf,querylen+2,-1);
+ sdsrange(c->querybuf,querylen+linefeed_chars,-1);
/* Setup argv array on client structure */
if (argc) {
@@ -943,66 +1163,96 @@ int processInlineBuffer(redisClient *c) {
/* Create redis objects for all arguments. */
for (c->argc = 0, j = 0; j < argc; j++) {
if (sdslen(argv[j])) {
- c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
+ c->argv[c->argc] = createObject(OBJ_STRING,argv[j]);
c->argc++;
} else {
sdsfree(argv[j]);
}
}
zfree(argv);
- return REDIS_OK;
+ return C_OK;
}
/* Helper function. Trims query buffer to make the function that processes
* multi bulk requests idempotent. */
-static void setProtocolError(redisClient *c, int pos) {
- if (server.verbosity >= REDIS_VERBOSE) {
+#define PROTO_DUMP_LEN 128
+static void setProtocolError(const char *errstr, client *c, long pos) {
+ if (server.verbosity <= LL_VERBOSE) {
sds client = catClientInfoString(sdsempty(),c);
- redisLog(REDIS_VERBOSE,
- "Protocol error from client: %s", client);
+
+ /* Sample some protocol to given an idea about what was inside. */
+ char buf[256];
+ if (sdslen(c->querybuf) < PROTO_DUMP_LEN) {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%s'", c->querybuf);
+ } else {
+ snprintf(buf,sizeof(buf),"Query buffer during protocol error: '%.*s' (... more %zu bytes ...) '%.*s'", PROTO_DUMP_LEN/2, c->querybuf, sdslen(c->querybuf)-PROTO_DUMP_LEN, PROTO_DUMP_LEN/2, c->querybuf+sdslen(c->querybuf)-PROTO_DUMP_LEN/2);
+ }
+
+ /* Remove non printable chars. */
+ char *p = buf;
+ while (*p != '\0') {
+ if (!isprint(*p)) *p = '.';
+ p++;
+ }
+
+ /* Log all the client and protocol info. */
+ serverLog(LL_VERBOSE,
+ "Protocol error (%s) from client: %s. %s", errstr, client, buf);
sdsfree(client);
}
- c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
sdsrange(c->querybuf,pos,-1);
}
-int processMultibulkBuffer(redisClient *c) {
+/* Process the query buffer for client 'c', setting up the client argument
+ * vector for command execution. Returns C_OK if after running the function
+ * the client has a well-formed ready to be processed command, otherwise
+ * C_ERR if there is still to read more buffer to get the full command.
+ * The function also returns C_ERR when there is a protocol error: in such a
+ * case the client structure is setup to reply with the error and close
+ * the connection.
+ *
+ * This function is called if processInputBuffer() detects that the next
+ * command is in RESP format, so the first byte in the command is found
+ * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */
+int processMultibulkBuffer(client *c) {
char *newline = NULL;
- int pos = 0, ok;
+ long pos = 0;
+ int ok;
long long ll;
if (c->multibulklen == 0) {
/* The client should have been reset */
- redisAssertWithInfo(c,NULL,c->argc == 0);
+ serverAssertWithInfo(c,NULL,c->argc == 0);
/* Multi bulk length cannot be read without a \r\n */
newline = strchr(c->querybuf,'\r');
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,"Protocol error: too big mbulk count string");
- setProtocolError(c,0);
+ setProtocolError("too big mbulk count string",c,0);
}
- return REDIS_ERR;
+ return C_ERR;
}
/* Buffer should also contain \n */
if (newline-(c->querybuf) > ((signed)sdslen(c->querybuf)-2))
- return REDIS_ERR;
+ return C_ERR;
/* We know for sure there is a whole line since newline != NULL,
* so go ahead and find out the multi bulk length. */
- redisAssertWithInfo(c,NULL,c->querybuf[0] == '*');
+ serverAssertWithInfo(c,NULL,c->querybuf[0] == '*');
ok = string2ll(c->querybuf+1,newline-(c->querybuf+1),&ll);
if (!ok || ll > 1024*1024) {
addReplyError(c,"Protocol error: invalid multibulk length");
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("invalid mbulk count",c,pos);
+ return C_ERR;
}
pos = (newline-c->querybuf)+2;
if (ll <= 0) {
sdsrange(c->querybuf,pos,-1);
- return REDIS_OK;
+ return C_OK;
}
c->multibulklen = ll;
@@ -1012,17 +1262,17 @@ int processMultibulkBuffer(redisClient *c) {
c->argv = zmalloc(sizeof(robj*)*c->multibulklen);
}
- redisAssertWithInfo(c,NULL,c->multibulklen > 0);
+ serverAssertWithInfo(c,NULL,c->multibulklen > 0);
while(c->multibulklen) {
/* Read bulk length if unknown */
if (c->bulklen == -1) {
newline = strchr(c->querybuf+pos,'\r');
if (newline == NULL) {
- if (sdslen(c->querybuf) > REDIS_INLINE_MAX_SIZE) {
+ if (sdslen(c->querybuf) > PROTO_INLINE_MAX_SIZE) {
addReplyError(c,
"Protocol error: too big bulk count string");
- setProtocolError(c,0);
- return REDIS_ERR;
+ setProtocolError("too big bulk count string",c,0);
+ return C_ERR;
}
break;
}
@@ -1035,19 +1285,19 @@ int processMultibulkBuffer(redisClient *c) {
addReplyErrorFormat(c,
"Protocol error: expected '$', got '%c'",
c->querybuf[pos]);
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("expected $ but got something else",c,pos);
+ return C_ERR;
}
ok = string2ll(c->querybuf+pos+1,newline-(c->querybuf+pos+1),&ll);
- if (!ok || ll < 0 || ll > 512*1024*1024) {
+ if (!ok || ll < 0 || ll > server.proto_max_bulk_len) {
addReplyError(c,"Protocol error: invalid bulk length");
- setProtocolError(c,pos);
- return REDIS_ERR;
+ setProtocolError("invalid bulk length",c,pos);
+ return C_ERR;
}
pos += newline-(c->querybuf+pos)+2;
- if (ll >= REDIS_MBULK_BIG_ARG) {
+ if (ll >= PROTO_MBULK_BIG_ARG) {
size_t qblen;
/* If we are going to read a large object from network
@@ -1066,7 +1316,7 @@ int processMultibulkBuffer(redisClient *c) {
}
/* Read bulk argument */
- if (sdslen(c->querybuf)-pos < (unsigned)(c->bulklen+2)) {
+ if (sdslen(c->querybuf)-pos < (size_t)(c->bulklen+2)) {
/* Not enough data (+2 == trailing \r\n) */
break;
} else {
@@ -1074,15 +1324,15 @@ int processMultibulkBuffer(redisClient *c) {
* instead of creating a new object by *copying* the sds we
* just use the current sds string. */
if (pos == 0 &&
- c->bulklen >= REDIS_MBULK_BIG_ARG &&
- (signed) sdslen(c->querybuf) == c->bulklen+2)
+ c->bulklen >= PROTO_MBULK_BIG_ARG &&
+ sdslen(c->querybuf) == (size_t)(c->bulklen+2))
{
- c->argv[c->argc++] = createObject(REDIS_STRING,c->querybuf);
+ c->argv[c->argc++] = createObject(OBJ_STRING,c->querybuf);
sdsIncrLen(c->querybuf,-2); /* remove CRLF */
- c->querybuf = sdsempty();
/* Assume that if we saw a fat argument we'll see another one
* likely... */
- c->querybuf = sdsMakeRoomFor(c->querybuf,c->bulklen+2);
+ c->querybuf = sdsnewlen(SDS_NOINIT,c->bulklen+2);
+ sdsclear(c->querybuf);
pos = 0;
} else {
c->argv[c->argc++] =
@@ -1098,41 +1348,48 @@ int processMultibulkBuffer(redisClient *c) {
if (pos) sdsrange(c->querybuf,pos,-1);
/* We're done when c->multibulk == 0 */
- if (c->multibulklen == 0) return REDIS_OK;
+ if (c->multibulklen == 0) return C_OK;
- /* Still not read to process the command */
- return REDIS_ERR;
+ /* Still not ready to process the command */
+ return C_ERR;
}
-void processInputBuffer(redisClient *c) {
+/* This function is called every time, in the client structure 'c', there is
+ * more query buffer to process, because we read more data from the socket
+ * or because a client was blocked and later reactivated, so there could be
+ * pending query buffer, already representing a full command, to process. */
+void processInputBuffer(client *c) {
+ server.current_client = c;
/* Keep processing while there is something in the input buffer */
while(sdslen(c->querybuf)) {
/* Return if clients are paused. */
- if (!(c->flags & REDIS_SLAVE) && clientsArePaused()) return;
+ if (!(c->flags & CLIENT_SLAVE) && clientsArePaused()) break;
/* Immediately abort if the client is in the middle of something. */
- if (c->flags & REDIS_BLOCKED) return;
+ if (c->flags & CLIENT_BLOCKED) break;
- /* REDIS_CLOSE_AFTER_REPLY closes the connection once the reply is
+ /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
* written to the client. Make sure to not let the reply grow after
- * this flag has been set (i.e. don't process more commands). */
- if (c->flags & REDIS_CLOSE_AFTER_REPLY) return;
+ * this flag has been set (i.e. don't process more commands).
+ *
+ * The same applies for clients we want to terminate ASAP. */
+ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break;
/* Determine request type when unknown. */
if (!c->reqtype) {
if (c->querybuf[0] == '*') {
- c->reqtype = REDIS_REQ_MULTIBULK;
+ c->reqtype = PROTO_REQ_MULTIBULK;
} else {
- c->reqtype = REDIS_REQ_INLINE;
+ c->reqtype = PROTO_REQ_INLINE;
}
}
- if (c->reqtype == REDIS_REQ_INLINE) {
- if (processInlineBuffer(c) != REDIS_OK) break;
- } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
- if (processMultibulkBuffer(c) != REDIS_OK) break;
+ if (c->reqtype == PROTO_REQ_INLINE) {
+ if (processInlineBuffer(c) != C_OK) break;
+ } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
+ if (processMultibulkBuffer(c) != C_OK) break;
} else {
- redisPanic("Unknown request type");
+ serverPanic("Unknown request type");
}
/* Multibulk processing could see a <= 0 length. */
@@ -1140,31 +1397,46 @@ void processInputBuffer(redisClient *c) {
resetClient(c);
} else {
/* Only reset the client when the command was executed. */
- if (processCommand(c) == REDIS_OK)
- resetClient(c);
+ if (processCommand(c) == C_OK) {
+ if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
+ /* Update the applied replication offset of our master. */
+ c->reploff = c->read_reploff - sdslen(c->querybuf);
+ }
+
+ /* Don't reset the client structure for clients blocked in a
+ * module blocking command, so that the reply callback will
+ * still be able to access the client argv and argc field.
+ * The client will be reset in unblockClientFromModule(). */
+ if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE)
+ resetClient(c);
+ }
+ /* freeMemoryIfNeeded may flush slave output buffers. This may
+ * result into a slave, that may be the active client, to be
+ * freed. */
+ if (server.current_client == NULL) break;
}
}
+ server.current_client = NULL;
}
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *c = (redisClient*) privdata;
+ client *c = (client*) privdata;
int nread, readlen;
size_t qblen;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(mask);
- server.current_client = c;
- readlen = REDIS_IOBUF_LEN;
+ readlen = PROTO_IOBUF_LEN;
/* If this is a multi bulk request, and we are processing a bulk reply
* that is large enough, try to maximize the probability that the query
* buffer contains exactly the SDS string representing the object, even
* at the risk of requiring more read(2) calls. This way the function
* processMultiBulkBuffer() can avoid copying buffers to create the
* Redis Object representing the argument. */
- if (c->reqtype == REDIS_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
- && c->bulklen >= REDIS_MBULK_BIG_ARG)
+ if (c->reqtype == PROTO_REQ_MULTIBULK && c->multibulklen && c->bulklen != -1
+ && c->bulklen >= PROTO_MBULK_BIG_ARG)
{
- int remaining = (unsigned)(c->bulklen+2)-sdslen(c->querybuf);
+ ssize_t remaining = (size_t)(c->bulklen+2)-sdslen(c->querybuf);
if (remaining < readlen) readlen = remaining;
}
@@ -1175,43 +1447,62 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
nread = read(fd, c->querybuf+qblen, readlen);
if (nread == -1) {
if (errno == EAGAIN) {
- nread = 0;
+ return;
} else {
- redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
+ serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno));
freeClient(c);
return;
}
} else if (nread == 0) {
- redisLog(REDIS_VERBOSE, "Client closed connection");
+ serverLog(LL_VERBOSE, "Client closed connection");
freeClient(c);
return;
+ } else if (c->flags & CLIENT_MASTER) {
+ /* Append the query buffer to the pending (not applied) buffer
+ * of the master. We'll use this buffer later in order to have a
+ * copy of the string applied by the last command executed. */
+ c->pending_querybuf = sdscatlen(c->pending_querybuf,
+ c->querybuf+qblen,nread);
}
- if (nread) {
- sdsIncrLen(c->querybuf,nread);
- c->lastinteraction = server.unixtime;
- if (c->flags & REDIS_MASTER) c->reploff += nread;
- server.stat_net_input_bytes += nread;
- } else {
- server.current_client = NULL;
- return;
- }
+
+ sdsIncrLen(c->querybuf,nread);
+ c->lastinteraction = server.unixtime;
+ if (c->flags & CLIENT_MASTER) c->read_reploff += nread;
+ server.stat_net_input_bytes += nread;
if (sdslen(c->querybuf) > server.client_max_querybuf_len) {
sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
bytes = sdscatrepr(bytes,c->querybuf,64);
- redisLog(REDIS_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
+ serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
sdsfree(ci);
sdsfree(bytes);
freeClient(c);
return;
}
- processInputBuffer(c);
- server.current_client = NULL;
+
+ /* Time to process the buffer. If the client is a master we need to
+ * compute the difference between the applied offset before and after
+ * processing the buffer, to understand how much of the replication stream
+ * was actually applied to the master state: this quantity, and its
+ * corresponding part of the replication stream, will be propagated to
+ * the sub-slaves and to the replication backlog. */
+ if (!(c->flags & CLIENT_MASTER)) {
+ processInputBuffer(c);
+ } else {
+ size_t prev_offset = c->reploff;
+ processInputBuffer(c);
+ size_t applied = c->reploff - prev_offset;
+ if (applied) {
+ replicationFeedSlavesFromMasterStream(server.slaves,
+ c->pending_querybuf, applied);
+ sdsrange(c->pending_querybuf,applied,-1);
+ }
+ }
}
void getClientsMaxBuffers(unsigned long *longest_output_list,
unsigned long *biggest_input_buffer) {
- redisClient *c;
+ client *c;
listNode *ln;
listIter li;
unsigned long lol = 0, bib = 0;
@@ -1232,15 +1523,15 @@ void getClientsMaxBuffers(unsigned long *longest_output_list,
* For IPv6 addresses we use [] around the IP part, like in "[::1]:1234".
* For Unix sockets we use path:0, like in "/tmp/redis:0".
*
- * A Peer ID always fits inside a buffer of REDIS_PEER_ID_LEN bytes, including
+ * A Peer ID always fits inside a buffer of NET_PEER_ID_LEN bytes, including
* the null term.
*
* On failure the function still populates 'peerid' with the "?:0" string
* in case you want to relax error checking or need to display something
* anyway (see anetPeerToString implementation for more info). */
-void genClientPeerId(redisClient *client, char *peerid,
+void genClientPeerId(client *client, char *peerid,
size_t peerid_len) {
- if (client->flags & REDIS_UNIX_SOCKET) {
+ if (client->flags & CLIENT_UNIX_SOCKET) {
/* Unix socket client. */
snprintf(peerid,peerid_len,"%s:0",server.unixsocket);
} else {
@@ -1253,8 +1544,8 @@ void genClientPeerId(redisClient *client, char *peerid,
* if client->peerid is NULL, otherwise returning the cached value.
* The Peer ID never changes during the life of the client, however it
* is expensive to compute. */
-char *getClientPeerId(redisClient *c) {
- char peerid[REDIS_PEER_ID_LEN];
+char *getClientPeerId(client *c) {
+ char peerid[NET_PEER_ID_LEN];
if (c->peerid == NULL) {
genClientPeerId(c,peerid,sizeof(peerid));
@@ -1265,26 +1556,27 @@ char *getClientPeerId(redisClient *c) {
/* Concatenate a string representing the state of a client in an human
* readable format, into the sds string 's'. */
-sds catClientInfoString(sds s, redisClient *client) {
+sds catClientInfoString(sds s, client *client) {
char flags[16], events[3], *p;
int emask;
p = flags;
- if (client->flags & REDIS_SLAVE) {
- if (client->flags & REDIS_MONITOR)
+ if (client->flags & CLIENT_SLAVE) {
+ if (client->flags & CLIENT_MONITOR)
*p++ = 'O';
else
*p++ = 'S';
}
- if (client->flags & REDIS_MASTER) *p++ = 'M';
- if (client->flags & REDIS_MULTI) *p++ = 'x';
- if (client->flags & REDIS_BLOCKED) *p++ = 'b';
- if (client->flags & REDIS_DIRTY_CAS) *p++ = 'd';
- if (client->flags & REDIS_CLOSE_AFTER_REPLY) *p++ = 'c';
- if (client->flags & REDIS_UNBLOCKED) *p++ = 'u';
- if (client->flags & REDIS_CLOSE_ASAP) *p++ = 'A';
- if (client->flags & REDIS_UNIX_SOCKET) *p++ = 'U';
- if (client->flags & REDIS_READONLY) *p++ = 'r';
+ if (client->flags & CLIENT_MASTER) *p++ = 'M';
+ if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
+ if (client->flags & CLIENT_MULTI) *p++ = 'x';
+ if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
+ if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd';
+ if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c';
+ if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u';
+ if (client->flags & CLIENT_CLOSE_ASAP) *p++ = 'A';
+ if (client->flags & CLIENT_UNIX_SOCKET) *p++ = 'U';
+ if (client->flags & CLIENT_READONLY) *p++ = 'r';
if (p == flags) *p++ = 'N';
*p++ = '\0';
@@ -1305,7 +1597,7 @@ sds catClientInfoString(sds s, redisClient *client) {
client->db->id,
(int) dictSize(client->pubsub_channels),
(int) listLength(client->pubsub_patterns),
- (client->flags & REDIS_MULTI) ? client->mstate.count : -1,
+ (client->flags & CLIENT_MULTI) ? client->mstate.count : -1,
(unsigned long long) sdslen(client->querybuf),
(unsigned long long) sdsavail(client->querybuf),
(unsigned long long) client->bufpos,
@@ -1315,32 +1607,79 @@ sds catClientInfoString(sds s, redisClient *client) {
client->lastcmd ? client->lastcmd->name : "NULL");
}
-sds getAllClientsInfoString(void) {
+sds getAllClientsInfoString(int type) {
listNode *ln;
listIter li;
- redisClient *client;
- sds o = sdsempty();
-
- o = sdsMakeRoomFor(o,200*listLength(server.clients));
+ client *client;
+ sds o = sdsnewlen(SDS_NOINIT,200*listLength(server.clients));
+ sdsclear(o);
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
+ if (type != -1 && getClientType(client) != type) continue;
o = catClientInfoString(o,client);
o = sdscatlen(o,"\n",1);
}
return o;
}
-void clientCommand(redisClient *c) {
+void clientCommand(client *c) {
listNode *ln;
listIter li;
- redisClient *client;
-
- if (!strcasecmp(c->argv[1]->ptr,"list") && c->argc == 2) {
+ client *client;
+
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"id -- Return the ID of the current connection.",
+"getname -- Return the name of the current connection.",
+"kill <ip:port> -- Kill connection made from <ip:port>.",
+"kill <option> <value> [option value ...] -- Kill connections. Options are:",
+" addr <ip:port> -- Kill connection made from <ip:port>",
+" type (normal|master|slave|pubsub) -- Kill connections by type.",
+" skipme (yes|no) -- Skip killing current connection (default: yes).",
+"list [options ...] -- Return information about client connections. Options:",
+" type (normal|master|slave|pubsub) -- Return clients of specified type.",
+"pause <timeout> -- Suspend all Redis clients for <timout> milliseconds.",
+"reply (on|off|skip) -- Control the replies sent to the current connection.",
+"setname <name> -- Assign the name <name> to the current connection.",
+"unblock <clientid> [TIMEOUT|ERROR] -- Unblock the specified blocked client.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"id") && c->argc == 2) {
+ /* CLIENT ID */
+ addReplyLongLong(c,c->id);
+ } else if (!strcasecmp(c->argv[1]->ptr,"list")) {
/* CLIENT LIST */
- sds o = getAllClientsInfoString();
+ int type = -1;
+ if (c->argc == 4 && !strcasecmp(c->argv[2]->ptr,"type")) {
+ type = getClientTypeByName(c->argv[3]->ptr);
+ if (type == -1) {
+ addReplyErrorFormat(c,"Unknown client type '%s'",
+ (char*) c->argv[3]->ptr);
+ return;
+ }
+ } else if (c->argc != 2) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ sds o = getAllClientsInfoString(type);
addReplyBulkCBuffer(c,o,sdslen(o));
sdsfree(o);
+ } else if (!strcasecmp(c->argv[1]->ptr,"reply") && c->argc == 3) {
+ /* CLIENT REPLY ON|OFF|SKIP */
+ if (!strcasecmp(c->argv[2]->ptr,"on")) {
+ c->flags &= ~(CLIENT_REPLY_SKIP|CLIENT_REPLY_OFF);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"off")) {
+ c->flags |= CLIENT_REPLY_OFF;
+ } else if (!strcasecmp(c->argv[2]->ptr,"skip")) {
+ if (!(c->flags & CLIENT_REPLY_OFF))
+ c->flags |= CLIENT_REPLY_SKIP_NEXT;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"kill")) {
/* CLIENT KILL <ip:port>
* CLIENT KILL <option> [value] ... <option> [value] */
@@ -1365,7 +1704,7 @@ void clientCommand(redisClient *c) {
long long tmp;
if (getLongLongFromObjectOrReply(c,c->argv[i+1],&tmp,NULL)
- != REDIS_OK) return;
+ != C_OK) return;
id = tmp;
} else if (!strcasecmp(c->argv[i]->ptr,"type") && moreargs) {
type = getClientTypeByName(c->argv[i+1]->ptr);
@@ -1401,9 +1740,7 @@ void clientCommand(redisClient *c) {
while ((ln = listNext(&li)) != NULL) {
client = listNodeValue(ln);
if (addr && strcmp(getClientPeerId(client),addr) != 0) continue;
- if (type != -1 &&
- (client->flags & REDIS_MASTER ||
- getClientType(client) != type)) continue;
+ if (type != -1 && getClientType(client) != type) continue;
if (id != 0 && client->id != id) continue;
if (c == client && skipme) continue;
@@ -1428,7 +1765,39 @@ void clientCommand(redisClient *c) {
/* If this client has to be closed, flag it as CLOSE_AFTER_REPLY
* only after we queued the reply to its output buffers. */
- if (close_this_client) c->flags |= REDIS_CLOSE_AFTER_REPLY;
+ if (close_this_client) c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ } else if (!strcasecmp(c->argv[1]->ptr,"unblock") && (c->argc == 3 ||
+ c->argc == 4))
+ {
+ /* CLIENT UNBLOCK <id> [timeout|error] */
+ long long id;
+ int unblock_error = 0;
+
+ if (c->argc == 4) {
+ if (!strcasecmp(c->argv[3]->ptr,"timeout")) {
+ unblock_error = 0;
+ } else if (!strcasecmp(c->argv[3]->ptr,"error")) {
+ unblock_error = 1;
+ } else {
+ addReplyError(c,
+ "CLIENT UNBLOCK reason should be TIMEOUT or ERROR");
+ return;
+ }
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&id,NULL)
+ != C_OK) return;
+ struct client *target = lookupClientByID(id);
+ if (target && target->flags & CLIENT_BLOCKED) {
+ if (unblock_error)
+ addReplyError(target,
+ "-UNBLOCKED client unblocked via CLIENT UNBLOCK");
+ else
+ replyToBlockedClientTimedOut(target);
+ unblockClient(target);
+ addReply(c,shared.cone);
+ } else {
+ addReply(c,shared.czero);
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) {
int j, len = sdslen(c->argv[2]->ptr);
char *p = c->argv[2]->ptr;
@@ -1466,18 +1835,38 @@ void clientCommand(redisClient *c) {
long long duration;
if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,UNIT_MILLISECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
pauseClients(duration);
addReply(c,shared.ok);
} else {
- addReplyError(c, "Syntax error, try CLIENT (LIST | KILL ip:port | GETNAME | SETNAME connection-name)");
+ addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLIENT HELP", (char*)c->argv[1]->ptr);
+ }
+}
+
+/* This callback is bound to POST and "Host:" command names. Those are not
+ * really commands, but are used in security attacks in order to talk to
+ * Redis instances via HTTP, with a technique called "cross protocol scripting"
+ * which exploits the fact that services like Redis will discard invalid
+ * HTTP headers and will process what follows.
+ *
+ * As a protection against this attack, Redis will terminate the connection
+ * when a POST or "Host:" header is seen, and will log the event from
+ * time to time (to avoid creating a DOS as a result of too many logs). */
+void securityWarningCommand(client *c) {
+ static time_t logged_time;
+ time_t now = time(NULL);
+
+ if (labs(now-logged_time) > 60) {
+ serverLog(LL_WARNING,"Possible SECURITY ATTACK detected. It looks like somebody is sending POST or Host: commands to Redis. This is likely due to an attacker attempting to use Cross Protocol Scripting to compromise your Redis instance. Connection aborted.");
+ logged_time = now;
}
+ freeClientAsync(c);
}
/* Rewrite the command vector of the client. All the new objects ref count
* is incremented. The old command vector is freed, and the old objects
* ref count is decremented. */
-void rewriteClientCommandVector(redisClient *c, int argc, ...) {
+void rewriteClientCommandVector(client *c, int argc, ...) {
va_list ap;
int j;
robj **argv; /* The new argument vector */
@@ -1500,25 +1889,48 @@ void rewriteClientCommandVector(redisClient *c, int argc, ...) {
c->argv = argv;
c->argc = argc;
c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
- redisAssertWithInfo(c,NULL,c->cmd != NULL);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
va_end(ap);
}
+/* Completely replace the client command vector with the provided one. */
+void replaceClientCommandVector(client *c, int argc, robj **argv) {
+ freeClientArgv(c);
+ zfree(c->argv);
+ c->argv = argv;
+ c->argc = argc;
+ c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
+}
+
/* Rewrite a single item in the command vector.
- * The new val ref count is incremented, and the old decremented. */
-void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
+ * The new val ref count is incremented, and the old decremented.
+ *
+ * It is possible to specify an argument over the current size of the
+ * argument vector: in this case the array of objects gets reallocated
+ * and c->argc set to the max value. However it's up to the caller to
+ *
+ * 1. Make sure there are no "holes" and all the arguments are set.
+ * 2. If the original argument vector was longer than the one we
+ * want to end with, it's up to the caller to set c->argc and
+ * free the no longer used objects on c->argv. */
+void rewriteClientCommandArgument(client *c, int i, robj *newval) {
robj *oldval;
- redisAssertWithInfo(c,NULL,i < c->argc);
+ if (i >= c->argc) {
+ c->argv = zrealloc(c->argv,sizeof(robj*)*(i+1));
+ c->argc = i+1;
+ c->argv[i] = NULL;
+ }
oldval = c->argv[i];
c->argv[i] = newval;
incrRefCount(newval);
- decrRefCount(oldval);
+ if (oldval) decrRefCount(oldval);
/* If this is the command name make sure to fix c->cmd. */
if (i == 0) {
c->cmd = lookupCommandOrOriginal(c->argv[0]->ptr);
- redisAssertWithInfo(c,NULL,c->cmd != NULL);
+ serverAssertWithInfo(c,NULL,c->cmd != NULL);
}
}
@@ -1535,9 +1947,8 @@ void rewriteClientCommandArgument(redisClient *c, int i, robj *newval) {
* Note: this function is very fast so can be called as many time as
* the caller wishes. The main usage of this function currently is
* enforcing the client output length limits. */
-unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
- unsigned long list_item_size = sizeof(listNode)+sizeof(robj);
-
+unsigned long getClientOutputBufferMemoryUsage(client *c) {
+ unsigned long list_item_size = sizeof(listNode) + sizeof(clientReplyBlock);
return c->reply_bytes + (list_item_size*listLength(c->reply));
}
@@ -1545,30 +1956,33 @@ unsigned long getClientOutputBufferMemoryUsage(redisClient *c) {
* classes of clients.
*
* The function will return one of the following:
- * REDIS_CLIENT_TYPE_NORMAL -> Normal client
- * REDIS_CLIENT_TYPE_SLAVE -> Slave or client executing MONITOR command
- * REDIS_CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
+ * CLIENT_TYPE_NORMAL -> Normal client
+ * CLIENT_TYPE_SLAVE -> Slave or client executing MONITOR command
+ * CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
+ * CLIENT_TYPE_MASTER -> The client representing our replication master.
*/
-int getClientType(redisClient *c) {
- if ((c->flags & REDIS_SLAVE) && !(c->flags & REDIS_MONITOR))
- return REDIS_CLIENT_TYPE_SLAVE;
- if (c->flags & REDIS_PUBSUB)
- return REDIS_CLIENT_TYPE_PUBSUB;
- return REDIS_CLIENT_TYPE_NORMAL;
+int getClientType(client *c) {
+ if (c->flags & CLIENT_MASTER) return CLIENT_TYPE_MASTER;
+ if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR))
+ return CLIENT_TYPE_SLAVE;
+ if (c->flags & CLIENT_PUBSUB) return CLIENT_TYPE_PUBSUB;
+ return CLIENT_TYPE_NORMAL;
}
int getClientTypeByName(char *name) {
- if (!strcasecmp(name,"normal")) return REDIS_CLIENT_TYPE_NORMAL;
- else if (!strcasecmp(name,"slave")) return REDIS_CLIENT_TYPE_SLAVE;
- else if (!strcasecmp(name,"pubsub")) return REDIS_CLIENT_TYPE_PUBSUB;
+ if (!strcasecmp(name,"normal")) return CLIENT_TYPE_NORMAL;
+ else if (!strcasecmp(name,"slave")) return CLIENT_TYPE_SLAVE;
+ else if (!strcasecmp(name,"pubsub")) return CLIENT_TYPE_PUBSUB;
+ else if (!strcasecmp(name,"master")) return CLIENT_TYPE_MASTER;
else return -1;
}
char *getClientTypeName(int class) {
switch(class) {
- case REDIS_CLIENT_TYPE_NORMAL: return "normal";
- case REDIS_CLIENT_TYPE_SLAVE: return "slave";
- case REDIS_CLIENT_TYPE_PUBSUB: return "pubsub";
+ case CLIENT_TYPE_NORMAL: return "normal";
+ case CLIENT_TYPE_SLAVE: return "slave";
+ case CLIENT_TYPE_PUBSUB: return "pubsub";
+ case CLIENT_TYPE_MASTER: return "master";
default: return NULL;
}
}
@@ -1579,11 +1993,15 @@ char *getClientTypeName(int class) {
*
* Return value: non-zero if the client reached the soft or the hard limit.
* Otherwise zero is returned. */
-int checkClientOutputBufferLimits(redisClient *c) {
+int checkClientOutputBufferLimits(client *c) {
int soft = 0, hard = 0, class;
unsigned long used_mem = getClientOutputBufferMemoryUsage(c);
class = getClientType(c);
+ /* For the purpose of output buffer limiting, masters are handled
+ * like normal clients. */
+ if (class == CLIENT_TYPE_MASTER) class = CLIENT_TYPE_NORMAL;
+
if (server.client_obuf_limits[class].hard_limit_bytes &&
used_mem >= server.client_obuf_limits[class].hard_limit_bytes)
hard = 1;
@@ -1615,40 +2033,48 @@ int checkClientOutputBufferLimits(redisClient *c) {
/* Asynchronously close a client if soft or hard limit is reached on the
* output buffer size. The caller can check if the client will be closed
- * checking if the client REDIS_CLOSE_ASAP flag is set.
+ * checking if the client CLIENT_CLOSE_ASAP flag is set.
*
* Note: we need to close the client asynchronously because this function is
* called from contexts where the client can't be freed safely, i.e. from the
* lower level functions pushing data inside the client output buffers. */
-void asyncCloseClientOnOutputBufferLimitReached(redisClient *c) {
- redisAssert(c->reply_bytes < ULONG_MAX-(1024*64));
- if (c->reply_bytes == 0 || c->flags & REDIS_CLOSE_ASAP) return;
+void asyncCloseClientOnOutputBufferLimitReached(client *c) {
+ serverAssert(c->reply_bytes < SIZE_MAX-(1024*64));
+ if (c->reply_bytes == 0 || c->flags & CLIENT_CLOSE_ASAP) return;
if (checkClientOutputBufferLimits(c)) {
sds client = catClientInfoString(sdsempty(),c);
freeClientAsync(c);
- redisLog(REDIS_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
+ serverLog(LL_WARNING,"Client %s scheduled to be closed ASAP for overcoming of output buffer limits.", client);
sdsfree(client);
}
}
/* Helper function used by freeMemoryIfNeeded() in order to flush slaves
- * output buffers without returning control to the event loop. */
+ * output buffers without returning control to the event loop.
+ * This is also called by SHUTDOWN for a best-effort attempt to send
+ * slaves the latest writes. */
void flushSlavesOutputBuffers(void) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
+ client *slave = listNodeValue(ln);
int events;
+ /* Note that the following will not flush output buffers of slaves
+ * in STATE_ONLINE but having put_online_on_ack set to true: in this
+ * case the writable event is never installed, since the purpose
+ * of put_online_on_ack is to postpone the moment it is installed.
+ * This is what we want since slaves in this state should not receive
+ * writes before the first ACK. */
events = aeGetFileEvents(server.el,slave->fd);
if (events & AE_WRITABLE &&
- slave->replstate == REDIS_REPL_ONLINE &&
- listLength(slave->reply))
+ slave->replstate == SLAVE_STATE_ONLINE &&
+ clientHasPendingReplies(slave))
{
- sendReplyToClient(server.el,slave->fd,slave,0);
+ writeToClient(slave->fd,slave,0);
}
}
}
@@ -1679,10 +2105,12 @@ void pauseClients(mstime_t end) {
/* Return non-zero if clients are currently paused. As a side effect the
* function checks if the pause time was reached and clear it. */
int clientsArePaused(void) {
- if (server.clients_paused && server.clients_pause_end_time < server.mstime) {
+ if (server.clients_paused &&
+ server.clients_pause_end_time < server.mstime)
+ {
listNode *ln;
listIter li;
- redisClient *c;
+ client *c;
server.clients_paused = 0;
@@ -1692,7 +2120,10 @@ int clientsArePaused(void) {
while ((ln = listNext(&li)) != NULL) {
c = listNodeValue(ln);
- if (c->flags & REDIS_SLAVE) continue;
+ /* Don't touch slaves and blocked clients. The latter pending
+ * requests be processed when unblocked. */
+ if (c->flags & (CLIENT_SLAVE|CLIENT_BLOCKED)) continue;
+ c->flags |= CLIENT_UNBLOCKED;
listAddNodeTail(server.unblocked_clients,c);
}
}
@@ -1706,7 +2137,7 @@ int clientsArePaused(void) {
* and so forth.
*
* It calls the event loop in order to process a few events. Specifically we
- * try to call the event loop for times as long as we receive acknowledge that
+ * try to call the event loop 4 times as long as we receive acknowledge that
* some event was processed, in order to go forward with the accept, read,
* write, close sequence needed to serve a client.
*
@@ -1715,7 +2146,9 @@ int processEventsWhileBlocked(void) {
int iterations = 4; /* See the function top-comment. */
int count = 0;
while (iterations--) {
- int events = aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
+ int events = 0;
+ events += aeProcessEvents(server.el, AE_FILE_EVENTS|AE_DONT_WAIT);
+ events += handleClientsWithPendingWrites();
if (!events) break;
count += events;
}
diff --git a/src/notify.c b/src/notify.c
index f77239ecf..1afb36fc0 100644
--- a/src/notify.c
+++ b/src/notify.c
@@ -27,10 +27,10 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
-/* This file implements keyspace events notification via Pub/Sub ad
- * described at http://redis.io/topics/keyspace-events. */
+/* This file implements keyspace events notification via Pub/Sub and
+ * described at https://redis.io/topics/notifications. */
/* Turn a string representing notification classes into an integer
* representing notification classes flags xored.
@@ -43,17 +43,18 @@ int keyspaceEventsStringToFlags(char *classes) {
while((c = *p++) != '\0') {
switch(c) {
- case 'A': flags |= REDIS_NOTIFY_ALL; break;
- case 'g': flags |= REDIS_NOTIFY_GENERIC; break;
- case '$': flags |= REDIS_NOTIFY_STRING; break;
- case 'l': flags |= REDIS_NOTIFY_LIST; break;
- case 's': flags |= REDIS_NOTIFY_SET; break;
- case 'h': flags |= REDIS_NOTIFY_HASH; break;
- case 'z': flags |= REDIS_NOTIFY_ZSET; break;
- case 'x': flags |= REDIS_NOTIFY_EXPIRED; break;
- case 'e': flags |= REDIS_NOTIFY_EVICTED; break;
- case 'K': flags |= REDIS_NOTIFY_KEYSPACE; break;
- case 'E': flags |= REDIS_NOTIFY_KEYEVENT; break;
+ case 'A': flags |= NOTIFY_ALL; break;
+ case 'g': flags |= NOTIFY_GENERIC; break;
+ case '$': flags |= NOTIFY_STRING; break;
+ case 'l': flags |= NOTIFY_LIST; break;
+ case 's': flags |= NOTIFY_SET; break;
+ case 'h': flags |= NOTIFY_HASH; break;
+ case 'z': flags |= NOTIFY_ZSET; break;
+ case 'x': flags |= NOTIFY_EXPIRED; break;
+ case 'e': flags |= NOTIFY_EVICTED; break;
+ case 'K': flags |= NOTIFY_KEYSPACE; break;
+ case 'E': flags |= NOTIFY_KEYEVENT; break;
+ case 't': flags |= NOTIFY_STREAM; break;
default: return -1;
}
}
@@ -68,20 +69,21 @@ sds keyspaceEventsFlagsToString(int flags) {
sds res;
res = sdsempty();
- if ((flags & REDIS_NOTIFY_ALL) == REDIS_NOTIFY_ALL) {
+ if ((flags & NOTIFY_ALL) == NOTIFY_ALL) {
res = sdscatlen(res,"A",1);
} else {
- if (flags & REDIS_NOTIFY_GENERIC) res = sdscatlen(res,"g",1);
- if (flags & REDIS_NOTIFY_STRING) res = sdscatlen(res,"$",1);
- if (flags & REDIS_NOTIFY_LIST) res = sdscatlen(res,"l",1);
- if (flags & REDIS_NOTIFY_SET) res = sdscatlen(res,"s",1);
- if (flags & REDIS_NOTIFY_HASH) res = sdscatlen(res,"h",1);
- if (flags & REDIS_NOTIFY_ZSET) res = sdscatlen(res,"z",1);
- if (flags & REDIS_NOTIFY_EXPIRED) res = sdscatlen(res,"x",1);
- if (flags & REDIS_NOTIFY_EVICTED) res = sdscatlen(res,"e",1);
+ if (flags & NOTIFY_GENERIC) res = sdscatlen(res,"g",1);
+ if (flags & NOTIFY_STRING) res = sdscatlen(res,"$",1);
+ if (flags & NOTIFY_LIST) res = sdscatlen(res,"l",1);
+ if (flags & NOTIFY_SET) res = sdscatlen(res,"s",1);
+ if (flags & NOTIFY_HASH) res = sdscatlen(res,"h",1);
+ if (flags & NOTIFY_ZSET) res = sdscatlen(res,"z",1);
+ if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1);
+ if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1);
+ if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1);
}
- if (flags & REDIS_NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1);
- if (flags & REDIS_NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1);
+ if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1);
+ if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1);
return res;
}
@@ -98,31 +100,37 @@ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) {
int len = -1;
char buf[24];
+ /* If any modules are interested in events, notify the module system now.
+ * This bypasses the notifications configuration, but the module engine
+ * will only call event subscribers if the event type matches the types
+ * they are interested in. */
+ moduleNotifyKeyspaceEvent(type, event, key, dbid);
+
/* If notifications for this class of events are off, return ASAP. */
if (!(server.notify_keyspace_events & type)) return;
eventobj = createStringObject(event,strlen(event));
/* __keyspace@<db>__:<key> <event> notifications. */
- if (server.notify_keyspace_events & REDIS_NOTIFY_KEYSPACE) {
+ if (server.notify_keyspace_events & NOTIFY_KEYSPACE) {
chan = sdsnewlen("__keyspace@",11);
len = ll2string(buf,sizeof(buf),dbid);
chan = sdscatlen(chan, buf, len);
chan = sdscatlen(chan, "__:", 3);
chan = sdscatsds(chan, key->ptr);
- chanobj = createObject(REDIS_STRING, chan);
+ chanobj = createObject(OBJ_STRING, chan);
pubsubPublishMessage(chanobj, eventobj);
decrRefCount(chanobj);
}
- /* __keyevente@<db>__:<event> <key> notifications. */
- if (server.notify_keyspace_events & REDIS_NOTIFY_KEYEVENT) {
+ /* __keyevent@<db>__:<event> <key> notifications. */
+ if (server.notify_keyspace_events & NOTIFY_KEYEVENT) {
chan = sdsnewlen("__keyevent@",11);
if (len == -1) len = ll2string(buf,sizeof(buf),dbid);
chan = sdscatlen(chan, buf, len);
chan = sdscatlen(chan, "__:", 3);
chan = sdscatsds(chan, eventobj->ptr);
- chanobj = createObject(REDIS_STRING, chan);
+ chanobj = createObject(OBJ_STRING, chan);
pubsubPublishMessage(chanobj, key);
decrRefCount(chanobj);
}
diff --git a/src/object.c b/src/object.c
index f75421ee8..406816140 100644
--- a/src/object.c
+++ b/src/object.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
#include <ctype.h>
@@ -36,40 +36,71 @@
#define strtold(a,b) ((long double)strtod((a),(b)))
#endif
+/* ===================== Creation and parsing of objects ==================== */
+
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
- o->encoding = REDIS_ENCODING_RAW;
+ o->encoding = OBJ_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
- /* Set the LRU to the current lruclock (minutes resolution). */
- o->lru = LRU_CLOCK();
+ /* Set the LRU to the current lruclock (minutes resolution), or
+ * alternatively the LFU counter. */
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
+ } else {
+ o->lru = LRU_CLOCK();
+ }
+ return o;
+}
+
+/* Set a special refcount in the object to make it "shared":
+ * incrRefCount and decrRefCount() will test for this special refcount
+ * and will not touch the object. This way it is free to access shared
+ * objects such as small integers from different threads without any
+ * mutex.
+ *
+ * A common patter to create shared objects:
+ *
+ * robj *myobject = makeObjectShared(createObject(...));
+ *
+ */
+robj *makeObjectShared(robj *o) {
+ serverAssert(o->refcount == 1);
+ o->refcount = OBJ_SHARED_REFCOUNT;
return o;
}
-/* Create a string object with encoding REDIS_ENCODING_RAW, that is a plain
+/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
-robj *createRawStringObject(char *ptr, size_t len) {
- return createObject(REDIS_STRING,sdsnewlen(ptr,len));
+robj *createRawStringObject(const char *ptr, size_t len) {
+ return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}
-/* Create a string object with encoding REDIS_ENCODING_EMBSTR, that is
+/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
-robj *createEmbeddedStringObject(char *ptr, size_t len) {
- robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr)+len+1);
- struct sdshdr *sh = (void*)(o+1);
+robj *createEmbeddedStringObject(const char *ptr, size_t len) {
+ robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
+ struct sdshdr8 *sh = (void*)(o+1);
- o->type = REDIS_STRING;
- o->encoding = REDIS_ENCODING_EMBSTR;
+ o->type = OBJ_STRING;
+ o->encoding = OBJ_ENCODING_EMBSTR;
o->ptr = sh+1;
o->refcount = 1;
- o->lru = LRU_CLOCK();
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
+ } else {
+ o->lru = LRU_CLOCK();
+ }
sh->len = len;
- sh->free = 0;
- if (ptr) {
+ sh->alloc = len;
+ sh->flags = SDS_TYPE_8;
+ if (ptr == SDS_NOINIT)
+ sh->buf[len] = '\0';
+ else if (ptr) {
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
} else {
@@ -79,36 +110,66 @@ robj *createEmbeddedStringObject(char *ptr, size_t len) {
}
/* Create a string object with EMBSTR encoding if it is smaller than
- * REIDS_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
+ * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
- * The current limit of 39 is chosen so that the biggest string object
+ * The current limit of 44 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
-#define REDIS_ENCODING_EMBSTR_SIZE_LIMIT 39
-robj *createStringObject(char *ptr, size_t len) {
- if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT)
+#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
+robj *createStringObject(const char *ptr, size_t len) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
}
-robj *createStringObjectFromLongLong(long long value) {
+/* Create a string object from a long long value. When possible returns a
+ * shared integer object, or at least an integer encoded one.
+ *
+ * If valueobj is non zero, the function avoids returning a a shared
+ * integer, because the object is going to be used as value in the Redis key
+ * space (for instance when the INCR command is used), so we want LFU/LRU
+ * values specific for each key. */
+robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
robj *o;
- if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
+
+ if (server.maxmemory == 0 ||
+ !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
+ {
+ /* If the maxmemory policy permits, we can still return shared integers
+ * even if valueobj is true. */
+ valueobj = 0;
+ }
+
+ if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
incrRefCount(shared.integers[value]);
o = shared.integers[value];
} else {
if (value >= LONG_MIN && value <= LONG_MAX) {
- o = createObject(REDIS_STRING, NULL);
- o->encoding = REDIS_ENCODING_INT;
+ o = createObject(OBJ_STRING, NULL);
+ o->encoding = OBJ_ENCODING_INT;
o->ptr = (void*)((long)value);
} else {
- o = createObject(REDIS_STRING,sdsfromlonglong(value));
+ o = createObject(OBJ_STRING,sdsfromlonglong(value));
}
}
return o;
}
+/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
+ * to create a shared object if possible. */
+robj *createStringObjectFromLongLong(long long value) {
+ return createStringObjectFromLongLongWithOptions(value,0);
+}
+
+/* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
+ * object when LFU/LRU info are needed, that is, when the object is used
+ * as a value in the key space, and Redis is configured to evict based on
+ * LFU/LRU. */
+robj *createStringObjectFromLongLongForValue(long long value) {
+ return createStringObjectFromLongLongWithOptions(value,1);
+}
+
/* Create a string object from a long double. If humanfriendly is non-zero
* it does not use exponential format and trims trailing zeroes at the end,
* however this results in loss of precision. Otherwise exp format is used
@@ -116,38 +177,8 @@ robj *createStringObjectFromLongLong(long long value) {
*
* The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
- char buf[256];
- int len;
-
- if (isinf(value)) {
- /* Libc in odd systems (Hi Solaris!) will format infinite in a
- * different way, so better to handle it in an explicit way. */
- if (value > 0) {
- memcpy(buf,"inf",3);
- len = 3;
- } else {
- memcpy(buf,"-inf",4);
- len = 4;
- }
- } else if (humanfriendly) {
- /* We use 17 digits precision since with 128 bit floats that precision
- * after rounding is able to represent most small decimal numbers in a
- * way that is "non surprising" for the user (that is, most small
- * decimal numbers will be represented in a way that when converted
- * back into a string are exactly the same as what the user typed.) */
- len = snprintf(buf,sizeof(buf),"%.17Lf", value);
- /* Now remove trailing zeroes after the '.' */
- if (strchr(buf,'.') != NULL) {
- char *p = buf+len-1;
- while(*p == '0') {
- p--;
- len--;
- }
- if (*p == '.') len--;
- }
- } else {
- len = snprintf(buf,sizeof(buf),"%.17Lg", value);
- }
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf,sizeof(buf),value,humanfriendly);
return createStringObject(buf,len);
}
@@ -159,59 +190,59 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
* will always result in a fresh object that is unshared (refcount == 1).
*
* The resulting object always has refcount set to 1. */
-robj *dupStringObject(robj *o) {
+robj *dupStringObject(const robj *o) {
robj *d;
- redisAssert(o->type == REDIS_STRING);
+ serverAssert(o->type == OBJ_STRING);
switch(o->encoding) {
- case REDIS_ENCODING_RAW:
+ case OBJ_ENCODING_RAW:
return createRawStringObject(o->ptr,sdslen(o->ptr));
- case REDIS_ENCODING_EMBSTR:
+ case OBJ_ENCODING_EMBSTR:
return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
- case REDIS_ENCODING_INT:
- d = createObject(REDIS_STRING, NULL);
- d->encoding = REDIS_ENCODING_INT;
+ case OBJ_ENCODING_INT:
+ d = createObject(OBJ_STRING, NULL);
+ d->encoding = OBJ_ENCODING_INT;
d->ptr = o->ptr;
return d;
default:
- redisPanic("Wrong encoding.");
+ serverPanic("Wrong encoding.");
break;
}
}
robj *createQuicklistObject(void) {
quicklist *l = quicklistCreate();
- robj *o = createObject(REDIS_LIST,l);
- o->encoding = REDIS_ENCODING_QUICKLIST;
+ robj *o = createObject(OBJ_LIST,l);
+ o->encoding = OBJ_ENCODING_QUICKLIST;
return o;
}
robj *createZiplistObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_LIST,zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_LIST,zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
- robj *o = createObject(REDIS_SET,d);
- o->encoding = REDIS_ENCODING_HT;
+ robj *o = createObject(OBJ_SET,d);
+ o->encoding = OBJ_ENCODING_HT;
return o;
}
robj *createIntsetObject(void) {
intset *is = intsetNew();
- robj *o = createObject(REDIS_SET,is);
- o->encoding = REDIS_ENCODING_INTSET;
+ robj *o = createObject(OBJ_SET,is);
+ o->encoding = OBJ_ENCODING_INTSET;
return o;
}
robj *createHashObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_HASH, zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_HASH, zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
@@ -221,96 +252,120 @@ robj *createZsetObject(void) {
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
- o = createObject(REDIS_ZSET,zs);
- o->encoding = REDIS_ENCODING_SKIPLIST;
+ o = createObject(OBJ_ZSET,zs);
+ o->encoding = OBJ_ENCODING_SKIPLIST;
return o;
}
robj *createZsetZiplistObject(void) {
unsigned char *zl = ziplistNew();
- robj *o = createObject(REDIS_ZSET,zl);
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ robj *o = createObject(OBJ_ZSET,zl);
+ o->encoding = OBJ_ENCODING_ZIPLIST;
return o;
}
+robj *createStreamObject(void) {
+ stream *s = streamNew();
+ robj *o = createObject(OBJ_STREAM,s);
+ o->encoding = OBJ_ENCODING_STREAM;
+ return o;
+}
+
+robj *createModuleObject(moduleType *mt, void *value) {
+ moduleValue *mv = zmalloc(sizeof(*mv));
+ mv->type = mt;
+ mv->value = value;
+ return createObject(OBJ_MODULE,mv);
+}
+
void freeStringObject(robj *o) {
- if (o->encoding == REDIS_ENCODING_RAW) {
+ if (o->encoding == OBJ_ENCODING_RAW) {
sdsfree(o->ptr);
}
}
void freeListObject(robj *o) {
- switch (o->encoding) {
- case REDIS_ENCODING_QUICKLIST:
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklistRelease(o->ptr);
- break;
- default:
- redisPanic("Unknown list encoding type");
+ } else {
+ serverPanic("Unknown list encoding type");
}
}
void freeSetObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_HT:
+ case OBJ_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
- case REDIS_ENCODING_INTSET:
+ case OBJ_ENCODING_INTSET:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown set encoding type");
+ serverPanic("Unknown set encoding type");
}
}
void freeZsetObject(robj *o) {
zset *zs;
switch (o->encoding) {
- case REDIS_ENCODING_SKIPLIST:
+ case OBJ_ENCODING_SKIPLIST:
zs = o->ptr;
dictRelease(zs->dict);
zslFree(zs->zsl);
zfree(zs);
break;
- case REDIS_ENCODING_ZIPLIST:
+ case OBJ_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
}
void freeHashObject(robj *o) {
switch (o->encoding) {
- case REDIS_ENCODING_HT:
+ case OBJ_ENCODING_HT:
dictRelease((dict*) o->ptr);
break;
- case REDIS_ENCODING_ZIPLIST:
+ case OBJ_ENCODING_ZIPLIST:
zfree(o->ptr);
break;
default:
- redisPanic("Unknown hash encoding type");
+ serverPanic("Unknown hash encoding type");
break;
}
}
+void freeModuleObject(robj *o) {
+ moduleValue *mv = o->ptr;
+ mv->type->free(mv->value);
+ zfree(mv);
+}
+
+void freeStreamObject(robj *o) {
+ freeStream(o->ptr);
+}
+
void incrRefCount(robj *o) {
- o->refcount++;
+ if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount++;
}
void decrRefCount(robj *o) {
- if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
if (o->refcount == 1) {
switch(o->type) {
- case REDIS_STRING: freeStringObject(o); break;
- case REDIS_LIST: freeListObject(o); break;
- case REDIS_SET: freeSetObject(o); break;
- case REDIS_ZSET: freeZsetObject(o); break;
- case REDIS_HASH: freeHashObject(o); break;
- default: redisPanic("Unknown object type"); break;
+ case OBJ_STRING: freeStringObject(o); break;
+ case OBJ_LIST: freeListObject(o); break;
+ case OBJ_SET: freeSetObject(o); break;
+ case OBJ_ZSET: freeZsetObject(o); break;
+ case OBJ_HASH: freeHashObject(o); break;
+ case OBJ_MODULE: freeModuleObject(o); break;
+ case OBJ_STREAM: freeStreamObject(o); break;
+ default: serverPanic("Unknown object type"); break;
}
zfree(o);
} else {
- o->refcount--;
+ if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
+ if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
}
}
@@ -338,7 +393,7 @@ robj *resetRefCount(robj *obj) {
return obj;
}
-int checkType(redisClient *c, robj *o, int type) {
+int checkType(client *c, robj *o, int type) {
if (o->type != type) {
addReply(c,shared.wrongtypeerr);
return 1;
@@ -346,13 +401,17 @@ int checkType(redisClient *c, robj *o, int type) {
return 0;
}
+int isSdsRepresentableAsLongLong(sds s, long long *llval) {
+ return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
+}
+
int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
- if (o->encoding == REDIS_ENCODING_INT) {
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
+ if (o->encoding == OBJ_ENCODING_INT) {
if (llval) *llval = (long) o->ptr;
- return REDIS_OK;
+ return C_OK;
} else {
- return string2ll(o->ptr,sdslen(o->ptr),llval) ? REDIS_OK : REDIS_ERR;
+ return isSdsRepresentableAsLongLong(o->ptr,llval);
}
}
@@ -366,7 +425,7 @@ robj *tryObjectEncoding(robj *o) {
* in this function. Other types use encoded memory efficient
* representations but are handled by the commands implementing
* the type. */
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
/* We try some specialized encoding only for objects that are
* RAW or EMBSTR encoded, in other words objects that are still
@@ -379,26 +438,25 @@ robj *tryObjectEncoding(robj *o) {
if (o->refcount > 1) return o;
/* Check if we can represent this string as a long integer.
- * Note that we are sure that a string larger than 21 chars is not
+ * Note that we are sure that a string larger than 20 chars is not
* representable as a 32 nor 64 bit integer. */
len = sdslen(s);
- if (len <= 21 && string2l(s,len,&value)) {
+ if (len <= 20 && string2l(s,len,&value)) {
/* This object is encodable as a long. Try to use a shared object.
* Note that we avoid using shared integers when maxmemory is used
* because every object needs to have a private LRU field for the LRU
* algorithm to work well. */
if ((server.maxmemory == 0 ||
- (server.maxmemory_policy != REDIS_MAXMEMORY_VOLATILE_LRU &&
- server.maxmemory_policy != REDIS_MAXMEMORY_ALLKEYS_LRU)) &&
+ !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
value >= 0 &&
- value < REDIS_SHARED_INTEGERS)
+ value < OBJ_SHARED_INTEGERS)
{
decrRefCount(o);
incrRefCount(shared.integers[value]);
return shared.integers[value];
} else {
- if (o->encoding == REDIS_ENCODING_RAW) sdsfree(o->ptr);
- o->encoding = REDIS_ENCODING_INT;
+ if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr);
+ o->encoding = OBJ_ENCODING_INT;
o->ptr = (void*) value;
return o;
}
@@ -408,10 +466,10 @@ robj *tryObjectEncoding(robj *o) {
* try the EMBSTR encoding which is more efficient.
* In this representation the object and the SDS string are allocated
* in the same chunk of memory to save space and cache misses. */
- if (len <= REDIS_ENCODING_EMBSTR_SIZE_LIMIT) {
+ if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
robj *emb;
- if (o->encoding == REDIS_ENCODING_EMBSTR) return o;
+ if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
emb = createEmbeddedStringObject(s,sdslen(s));
decrRefCount(o);
return emb;
@@ -425,8 +483,8 @@ robj *tryObjectEncoding(robj *o) {
*
* We do that only for relatively large strings as this branch
* is only entered if the length of the string is greater than
- * REDIS_ENCODING_EMBSTR_SIZE_LIMIT. */
- if (o->encoding == REDIS_ENCODING_RAW &&
+ * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
+ if (o->encoding == OBJ_ENCODING_RAW &&
sdsavail(s) > len/10)
{
o->ptr = sdsRemoveFreeSpace(o->ptr);
@@ -445,14 +503,14 @@ robj *getDecodedObject(robj *o) {
incrRefCount(o);
return o;
}
- if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
+ if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
char buf[32];
ll2string(buf,32,(long)o->ptr);
dec = createStringObject(buf,strlen(buf));
return dec;
} else {
- redisPanic("Unknown encoding type");
+ serverPanic("Unknown encoding type");
}
}
@@ -468,7 +526,7 @@ robj *getDecodedObject(robj *o) {
#define REDIS_COMPARE_COLL (1<<1)
int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
- redisAssertWithInfo(NULL,a,a->type == REDIS_STRING && b->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
char bufa[128], bufb[128], *astr, *bstr;
size_t alen, blen, minlen;
@@ -514,8 +572,8 @@ int collateStringObjects(robj *a, robj *b) {
* this function is faster then checking for (compareStringObject(a,b) == 0)
* because it can perform some more optimization. */
int equalStringObjects(robj *a, robj *b) {
- if (a->encoding == REDIS_ENCODING_INT &&
- b->encoding == REDIS_ENCODING_INT){
+ if (a->encoding == OBJ_ENCODING_INT &&
+ b->encoding == OBJ_ENCODING_INT){
/* If both strings are integer encoded just check if the stored
* long is the same. */
return a->ptr == b->ptr;
@@ -525,56 +583,54 @@ int equalStringObjects(robj *a, robj *b) {
}
size_t stringObjectLen(robj *o) {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
return sdslen(o->ptr);
} else {
- char buf[32];
-
- return ll2string(buf,32,(long)o->ptr);
+ return sdigits10((long)o->ptr);
}
}
-int getDoubleFromObject(robj *o, double *target) {
+int getDoubleFromObject(const robj *o, double *target) {
double value;
char *eptr;
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
errno = 0;
value = strtod(o->ptr, &eptr);
- if (isspace(((char*)o->ptr)[0]) ||
- eptr[0] != '\0' ||
+ if (sdslen(o->ptr) == 0 ||
+ isspace(((const char*)o->ptr)[0]) ||
+ (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
(errno == ERANGE &&
(value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
- errno == EINVAL ||
isnan(value))
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
double value;
- if (getDoubleFromObject(o, &value) != REDIS_OK) {
+ if (getDoubleFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a valid float");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
int getLongDoubleFromObject(robj *o, long double *target) {
@@ -584,127 +640,593 @@ int getLongDoubleFromObject(robj *o, long double *target) {
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
errno = 0;
value = strtold(o->ptr, &eptr);
- if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' ||
- errno == ERANGE || isnan(value))
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ if (sdslen(o->ptr) == 0 ||
+ isspace(((const char*)o->ptr)[0]) ||
+ (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
+ (errno == ERANGE &&
+ (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
+ isnan(value))
+ return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg) {
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
long double value;
- if (getLongDoubleFromObject(o, &value) != REDIS_OK) {
+ if (getLongDoubleFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not a valid float");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
int getLongLongFromObject(robj *o, long long *target) {
long long value;
- char *eptr;
if (o == NULL) {
value = 0;
} else {
- redisAssertWithInfo(NULL,o,o->type == REDIS_STRING);
+ serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
if (sdsEncodedObject(o)) {
- errno = 0;
- value = strtoll(o->ptr, &eptr, 10);
- if (isspace(((char*)o->ptr)[0]) || eptr[0] != '\0' ||
- errno == ERANGE)
- return REDIS_ERR;
- } else if (o->encoding == REDIS_ENCODING_INT) {
+ if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
+ } else if (o->encoding == OBJ_ENCODING_INT) {
value = (long)o->ptr;
} else {
- redisPanic("Unknown string encoding");
+ serverPanic("Unknown string encoding");
}
}
if (target) *target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
long long value;
- if (getLongLongFromObject(o, &value) != REDIS_OK) {
+ if (getLongLongFromObject(o, &value) != C_OK) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is not an integer or out of range");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
-int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
long long value;
- if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
+ if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
if (value < LONG_MIN || value > LONG_MAX) {
if (msg != NULL) {
addReplyError(c,(char*)msg);
} else {
addReplyError(c,"value is out of range");
}
- return REDIS_ERR;
+ return C_ERR;
}
*target = value;
- return REDIS_OK;
+ return C_OK;
}
char *strEncoding(int encoding) {
switch(encoding) {
- case REDIS_ENCODING_RAW: return "raw";
- case REDIS_ENCODING_INT: return "int";
- case REDIS_ENCODING_HT: return "hashtable";
- case REDIS_ENCODING_QUICKLIST: return "quicklist";
- case REDIS_ENCODING_ZIPLIST: return "ziplist";
- case REDIS_ENCODING_INTSET: return "intset";
- case REDIS_ENCODING_SKIPLIST: return "skiplist";
- case REDIS_ENCODING_EMBSTR: return "embstr";
+ case OBJ_ENCODING_RAW: return "raw";
+ case OBJ_ENCODING_INT: return "int";
+ case OBJ_ENCODING_HT: return "hashtable";
+ case OBJ_ENCODING_QUICKLIST: return "quicklist";
+ case OBJ_ENCODING_ZIPLIST: return "ziplist";
+ case OBJ_ENCODING_INTSET: return "intset";
+ case OBJ_ENCODING_SKIPLIST: return "skiplist";
+ case OBJ_ENCODING_EMBSTR: return "embstr";
default: return "unknown";
}
}
-/* Given an object returns the min number of milliseconds the object was never
- * requested, using an approximated LRU algorithm. */
-unsigned long long estimateObjectIdleTime(robj *o) {
- unsigned long long lruclock = LRU_CLOCK();
- if (lruclock >= o->lru) {
- return (lruclock - o->lru) * REDIS_LRU_CLOCK_RESOLUTION;
+/* =========================== Memory introspection ========================= */
+
+
+/* This is an helper function with the goal of estimating the memory
+ * size of a radix tree that is used to store Stream IDs.
+ *
+ * Note: to guess the size of the radix tree is not trivial, so we
+ * approximate it considering 16 bytes of data overhead for each
+ * key (the ID), and then adding the number of bare nodes, plus some
+ * overhead due by the data and child pointers. This secret recipe
+ * was obtained by checking the average radix tree created by real
+ * workloads, and then adjusting the constants to get numbers that
+ * more or less match the real memory usage.
+ *
+ * Actually the number of nodes and keys may be different depending
+ * on the insertion speed and thus the ability of the radix tree
+ * to compress prefixes. */
+size_t streamRadixTreeMemoryUsage(rax *rax) {
+ size_t size;
+ size = rax->numele * sizeof(streamID);
+ size += rax->numnodes * sizeof(raxNode);
+ /* Add a fixed overhead due to the aux data pointer, children, ... */
+ size += rax->numnodes * sizeof(long)*30;
+ return size;
+}
+
+/* Returns the size in bytes consumed by the key's value in RAM.
+ * Note that the returned value is just an approximation, especially in the
+ * case of aggregated data types where only "sample_size" elements
+ * are checked and averaged to estimate the total size. */
+#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
+size_t objectComputeSize(robj *o, size_t sample_size) {
+ sds ele, ele2;
+ dict *d;
+ dictIterator *di;
+ struct dictEntry *de;
+ size_t asize = 0, elesize = 0, samples = 0;
+
+ if (o->type == OBJ_STRING) {
+ if(o->encoding == OBJ_ENCODING_INT) {
+ asize = sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_RAW) {
+ asize = sdsAllocSize(o->ptr)+sizeof(*o);
+ } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
+ asize = sdslen(o->ptr)+2+sizeof(*o);
+ } else {
+ serverPanic("Unknown string encoding");
+ }
+ } else if (o->type == OBJ_LIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ quicklist *ql = o->ptr;
+ quicklistNode *node = ql->head;
+ asize = sizeof(*o)+sizeof(quicklist);
+ do {
+ elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
+ samples++;
+ } while ((node = node->next) && samples < sample_size);
+ asize += (double)elesize/samples*ql->len;
+ } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+ziplistBlobLen(o->ptr);
+ } else {
+ serverPanic("Unknown list encoding");
+ }
+ } else if (o->type == OBJ_SET) {
+ if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ elesize += sizeof(struct dictEntry) + sdsAllocSize(ele);
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
+ intset *is = o->ptr;
+ asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length;
+ } else {
+ serverPanic("Unknown set encoding");
+ }
+ } else if (o->type == OBJ_ZSET) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
+ d = ((zset*)o->ptr)->dict;
+ zskiplist *zsl = ((zset*)o->ptr)->zsl;
+ zskiplistNode *znode = zsl->header->level[0].forward;
+ asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while(znode != NULL && samples < sample_size) {
+ elesize += sdsAllocSize(znode->ele);
+ elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
+ samples++;
+ znode = znode->level[0].forward;
+ }
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ } else if (o->type == OBJ_HASH) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ d = o->ptr;
+ di = dictGetIterator(d);
+ asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
+ while((de = dictNext(di)) != NULL && samples < sample_size) {
+ ele = dictGetKey(de);
+ ele2 = dictGetVal(de);
+ elesize += sdsAllocSize(ele) + sdsAllocSize(ele2);
+ elesize += sizeof(struct dictEntry);
+ samples++;
+ }
+ dictReleaseIterator(di);
+ if (samples) asize += (double)elesize/samples*dictSize(d);
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_STREAM) {
+ stream *s = o->ptr;
+ asize = sizeof(*o);
+ asize += streamRadixTreeMemoryUsage(s->rax);
+
+ /* Now we have to add the listpacks. The last listpack is often non
+ * complete, so we estimate the size of the first N listpacks, and
+ * use the average to compute the size of the first N-1 listpacks, and
+ * finally add the real size of the last node. */
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"^",NULL,0);
+ size_t lpsize = 0, samples = 0;
+ while(samples < sample_size && raxNext(&ri)) {
+ unsigned char *lp = ri.data;
+ lpsize += lpBytes(lp);
+ samples++;
+ }
+ if (s->rax->numele <= samples) {
+ asize += lpsize;
+ } else {
+ if (samples) lpsize /= samples; /* Compute the average. */
+ asize += lpsize * (s->rax->numele-1);
+ /* No need to check if seek succeeded, we enter this branch only
+ * if there are a few elements in the radix tree. */
+ raxSeek(&ri,"$",NULL,0);
+ raxNext(&ri);
+ asize += lpBytes(ri.data);
+ }
+ raxStop(&ri);
+
+ /* Consumer groups also have a non trivial memory overhead if there
+ * are many consumers and many groups, let's count at least the
+ * overhead of the pending entries in the groups and consumers
+ * PELs. */
+ if (s->cgroups) {
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+ asize += sizeof(*cg);
+ asize += streamRadixTreeMemoryUsage(cg->pel);
+ asize += sizeof(streamNACK)*raxSize(cg->pel);
+
+ /* For each consumer we also need to add the basic data
+ * structures and the PEL memory usage. */
+ raxIterator cri;
+ raxStart(&cri,cg->consumers);
+ raxSeek(&cri,"^",NULL,0);
+ while(raxNext(&cri)) {
+ streamConsumer *consumer = cri.data;
+ asize += sizeof(*consumer);
+ asize += sdslen(consumer->name);
+ asize += streamRadixTreeMemoryUsage(consumer->pel);
+ /* Don't count NACKs again, they are shared with the
+ * consumer group PEL. */
+ }
+ raxStop(&cri);
+ }
+ raxStop(&ri);
+ }
+ } else if (o->type == OBJ_MODULE) {
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ if (mt->mem_usage != NULL) {
+ asize = mt->mem_usage(mv->value);
+ } else {
+ asize = 0;
+ }
+ } else {
+ serverPanic("Unknown object type");
+ }
+ return asize;
+}
+
+/* Release data obtained with getMemoryOverheadData(). */
+void freeMemoryOverheadData(struct redisMemOverhead *mh) {
+ zfree(mh->db);
+ zfree(mh);
+}
+
+/* Return a struct redisMemOverhead filled with memory overhead
+ * information used for the MEMORY OVERHEAD and INFO command. The returned
+ * structure pointer should be freed calling freeMemoryOverheadData(). */
+struct redisMemOverhead *getMemoryOverheadData(void) {
+ int j;
+ size_t mem_total = 0;
+ size_t mem = 0;
+ size_t zmalloc_used = zmalloc_used_memory();
+ struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
+
+ mh->total_allocated = zmalloc_used;
+ mh->startup_allocated = server.initial_memory_usage;
+ mh->peak_allocated = server.stat_peak_memory;
+ mh->total_frag =
+ (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
+ mh->total_frag_bytes =
+ server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
+ mh->allocator_frag =
+ (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
+ mh->allocator_frag_bytes =
+ server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
+ mh->allocator_rss =
+ (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
+ mh->allocator_rss_bytes =
+ server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
+ mh->rss_extra =
+ (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
+ mh->rss_extra_bytes =
+ server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
+
+ mem_total += server.initial_memory_usage;
+
+ mem = 0;
+ if (server.repl_backlog)
+ mem += zmalloc_size(server.repl_backlog);
+ mh->repl_backlog = mem;
+ mem_total += mem;
+
+ mem = 0;
+ if (listLength(server.slaves)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ mem += getClientOutputBufferMemoryUsage(c);
+ mem += sdsAllocSize(c->querybuf);
+ mem += sizeof(client);
+ }
+ }
+ mh->clients_slaves = mem;
+ mem_total+=mem;
+
+ mem = 0;
+ if (listLength(server.clients)) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(server.clients,&li);
+ while((ln = listNext(&li))) {
+ client *c = listNodeValue(ln);
+ if (c->flags & CLIENT_SLAVE && !(c->flags & CLIENT_MONITOR))
+ continue;
+ mem += getClientOutputBufferMemoryUsage(c);
+ mem += sdsAllocSize(c->querybuf);
+ mem += sizeof(client);
+ }
+ }
+ mh->clients_normal = mem;
+ mem_total+=mem;
+
+ mem = 0;
+ if (server.aof_state != AOF_OFF) {
+ mem += sdslen(server.aof_buf);
+ mem += aofRewriteBufferSize();
+ }
+ mh->aof_buffer = mem;
+ mem_total+=mem;
+
+ mem = 0;
+ mem += dictSize(server.lua_scripts) * sizeof(dictEntry) +
+ dictSlots(server.lua_scripts) * sizeof(dictEntry*);
+ mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
+ dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
+ if (listLength(server.repl_scriptcache_fifo) > 0) {
+ mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
+ sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
+ }
+ mh->lua_caches = mem;
+ mem_total+=mem;
+
+ for (j = 0; j < server.dbnum; j++) {
+ redisDb *db = server.db+j;
+ long long keyscount = dictSize(db->dict);
+ if (keyscount==0) continue;
+
+ mh->total_keys += keyscount;
+ mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
+ mh->db[mh->num_dbs].dbid = j;
+
+ mem = dictSize(db->dict) * sizeof(dictEntry) +
+ dictSlots(db->dict) * sizeof(dictEntry*) +
+ dictSize(db->dict) * sizeof(robj);
+ mh->db[mh->num_dbs].overhead_ht_main = mem;
+ mem_total+=mem;
+
+ mem = dictSize(db->expires) * sizeof(dictEntry) +
+ dictSlots(db->expires) * sizeof(dictEntry*);
+ mh->db[mh->num_dbs].overhead_ht_expires = mem;
+ mem_total+=mem;
+
+ mh->num_dbs++;
+ }
+
+ mh->overhead_total = mem_total;
+ mh->dataset = zmalloc_used - mem_total;
+ mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
+
+ /* Metrics computed after subtracting the startup memory from
+ * the total memory. */
+ size_t net_usage = 1;
+ if (zmalloc_used > mh->startup_allocated)
+ net_usage = zmalloc_used - mh->startup_allocated;
+ mh->dataset_perc = (float)mh->dataset*100/net_usage;
+ mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
+
+ return mh;
+}
+
+/* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
+ * stats output. */
+void inputCatSds(void *result, const char *str) {
+ /* result is actually a (sds *), so re-cast it here */
+ sds *info = (sds *)result;
+ *info = sdscat(*info, str);
+}
+
+/* This implements MEMORY DOCTOR. An human readable analysis of the Redis
+ * memory condition. */
+sds getMemoryDoctorReport(void) {
+ int empty = 0; /* Instance is empty or almost empty. */
+ int big_peak = 0; /* Memory peak is much larger than used mem. */
+ int high_frag = 0; /* High fragmentation. */
+ int high_alloc_frag = 0;/* High allocator fragmentation. */
+ int high_proc_rss = 0; /* High process rss overhead. */
+ int high_alloc_rss = 0; /* High rss overhead. */
+ int big_slave_buf = 0; /* Slave buffers are too big. */
+ int big_client_buf = 0; /* Client buffers are too big. */
+ int many_scripts = 0; /* Script cache has too many scripts. */
+ int num_reports = 0;
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ if (mh->total_allocated < (1024*1024*5)) {
+ empty = 1;
+ num_reports++;
} else {
- return (lruclock + (REDIS_LRU_CLOCK_MAX - o->lru)) *
- REDIS_LRU_CLOCK_RESOLUTION;
+ /* Peak is > 150% of current used memory? */
+ if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
+ big_peak = 1;
+ num_reports++;
+ }
+
+ /* Fragmentation is higher than 1.4 and 10MB ?*/
+ if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
+ high_frag = 1;
+ num_reports++;
+ }
+
+ /* External fragmentation is higher than 1.1 and 10MB? */
+ if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
+ high_alloc_frag = 1;
+ num_reports++;
+ }
+
+ /* Allocator fss is higher than 1.1 and 10MB ? */
+ if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
+ high_alloc_rss = 1;
+ num_reports++;
+ }
+
+ /* Non-Allocator fss is higher than 1.1 and 10MB ? */
+ if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
+ high_proc_rss = 1;
+ num_reports++;
+ }
+
+ /* Clients using more than 200k each average? */
+ long numslaves = listLength(server.slaves);
+ long numclients = listLength(server.clients)-numslaves;
+ if (mh->clients_normal / numclients > (1024*200)) {
+ big_client_buf = 1;
+ num_reports++;
+ }
+
+ /* Slaves using more than 10 MB each? */
+ if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
+ big_slave_buf = 1;
+ num_reports++;
+ }
+
+ /* Too many scripts are cached? */
+ if (dictSize(server.lua_scripts) > 1000) {
+ many_scripts = 1;
+ num_reports++;
+ }
+ }
+
+ sds s;
+ if (num_reports == 0) {
+ s = sdsnew(
+ "Hi Sam, I can't find any memory issue in your instance. "
+ "I can only account for what occurs on this base.\n");
+ } else if (empty == 1) {
+ s = sdsnew(
+ "Hi Sam, this instance is empty or is using very little memory, "
+ "my issues detector can't be used in these conditions. "
+ "Please, leave for your mission on Earth and fill it with some data. "
+ "The new Sam and I will be back to our programming as soon as I "
+ "finished rebooting.\n");
+ } else {
+ s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
+ if (big_peak) {
+ s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
+ }
+ if (high_frag) {
+ s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
+ }
+ if (high_alloc_frag) {
+ s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
+ }
+ if (high_alloc_rss) {
+ s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
+ }
+ if (high_proc_rss) {
+ s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
+ }
+ if (big_slave_buf) {
+ s = sdscat(s," * Big slave buffers: The slave output buffers in this instance are greater than 10MB for each slave (on average). This likely means that there is some slave instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what slave is not receiving data correctly and why. You can use the INFO output in order to check the slaves delays and the CLIENT LIST command to check the output buffers of each slave.\n\n");
+ }
+ if (big_client_buf) {
+ s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
+ }
+ if (many_scripts) {
+ s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
+ }
+ s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
+ }
+ freeMemoryOverheadData(mh);
+ return s;
+}
+
+/* Set the object LRU/LFU depending on server.maxmemory_policy.
+ * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
+ * The lru_idle and lru_clock args are only relevant if policy
+ * is MAXMEMORY_FLAG_LRU.
+ * Either or both of them may be <0, in that case, nothing is set. */
+void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
+ long long lru_clock) {
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ if (lfu_freq >= 0) {
+ serverAssert(lfu_freq <= 255);
+ val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
+ }
+ } else if (lru_idle >= 0) {
+ /* Serialized LRU idle time is in seconds. Scale
+ * according to the LRU clock resolution this Redis
+ * instance was compiled with (normally 1000 ms, so the
+ * below statement will expand to lru_idle*1000/1000. */
+ lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
+ val->lru = lru_clock - lru_idle;
+ /* If the lru field overflows (since LRU it is a wrapping
+ * clock), the best we can do is to provide the maximum
+ * representable idle time. */
+ if (val->lru < 0) val->lru = lru_clock+1;
}
}
+/* ======================= The OBJECT and MEMORY commands =================== */
+
/* This is a helper function for the OBJECT command. We need to lookup keys
* without any modification of LRU or other parameters. */
-robj *objectCommandLookup(redisClient *c, robj *key) {
+robj *objectCommandLookup(client *c, robj *key) {
dictEntry *de;
if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL;
return (robj*) dictGetVal(de);
}
-robj *objectCommandLookupOrReply(redisClient *c, robj *key, robj *reply) {
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
robj *o = objectCommandLookup(c,key);
if (!o) addReply(c, reply);
@@ -712,11 +1234,20 @@ robj *objectCommandLookupOrReply(redisClient *c, robj *key, robj *reply) {
}
/* Object command allows to inspect the internals of an Redis Object.
- * Usage: OBJECT <refcount|encoding|idletime> <key> */
-void objectCommand(redisClient *c) {
+ * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
+void objectCommand(client *c) {
robj *o;
- if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
+"FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
+"IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
+"REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
addReplyLongLong(c,o->refcount);
@@ -727,9 +1258,195 @@ void objectCommand(redisClient *c) {
} else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
== NULL) return;
+ if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
+ addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
+ } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
+ == NULL) return;
+ if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
+ addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
+ return;
+ }
+ /* LFUDecrAndReturn should be called
+ * in case of the key has not been accessed for a long time,
+ * because we update the access time only
+ * when the key is read or overwritten. */
+ addReplyLongLong(c,LFUDecrAndReturn(o));
} else {
- addReplyError(c,"Syntax error. Try OBJECT (refcount|encoding|idletime)");
+ addReplySubcommandSyntaxError(c);
}
}
+/* The memory command will eventually be a complete interface for the
+ * memory introspection capabilities of Redis.
+ *
+ * Usage: MEMORY usage <key> */
+void memoryCommand(client *c) {
+ robj *o;
+
+ if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
+ long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
+ for (int j = 3; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"samples") &&
+ j+1 < c->argc)
+ {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
+ == C_ERR) return;
+ if (samples < 0) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ if (samples == 0) samples = LLONG_MAX;;
+ j++; /* skip option argument. */
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+ if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
+ == NULL) return;
+ size_t usage = objectComputeSize(o,samples);
+ usage += sdsAllocSize(c->argv[2]->ptr);
+ usage += sizeof(dictEntry);
+ addReplyLongLong(c,usage);
+ } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
+ struct redisMemOverhead *mh = getMemoryOverheadData();
+
+ addReplyMultiBulkLen(c,(25+mh->num_dbs)*2);
+
+ addReplyBulkCString(c,"peak.allocated");
+ addReplyLongLong(c,mh->peak_allocated);
+
+ addReplyBulkCString(c,"total.allocated");
+ addReplyLongLong(c,mh->total_allocated);
+
+ addReplyBulkCString(c,"startup.allocated");
+ addReplyLongLong(c,mh->startup_allocated);
+
+ addReplyBulkCString(c,"replication.backlog");
+ addReplyLongLong(c,mh->repl_backlog);
+
+ addReplyBulkCString(c,"clients.slaves");
+ addReplyLongLong(c,mh->clients_slaves);
+
+ addReplyBulkCString(c,"clients.normal");
+ addReplyLongLong(c,mh->clients_normal);
+
+ addReplyBulkCString(c,"aof.buffer");
+ addReplyLongLong(c,mh->aof_buffer);
+
+ addReplyBulkCString(c,"lua.caches");
+ addReplyLongLong(c,mh->lua_caches);
+
+ for (size_t j = 0; j < mh->num_dbs; j++) {
+ char dbname[32];
+ snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
+ addReplyBulkCString(c,dbname);
+ addReplyMultiBulkLen(c,4);
+
+ addReplyBulkCString(c,"overhead.hashtable.main");
+ addReplyLongLong(c,mh->db[j].overhead_ht_main);
+
+ addReplyBulkCString(c,"overhead.hashtable.expires");
+ addReplyLongLong(c,mh->db[j].overhead_ht_expires);
+ }
+
+ addReplyBulkCString(c,"overhead.total");
+ addReplyLongLong(c,mh->overhead_total);
+
+ addReplyBulkCString(c,"keys.count");
+ addReplyLongLong(c,mh->total_keys);
+
+ addReplyBulkCString(c,"keys.bytes-per-key");
+ addReplyLongLong(c,mh->bytes_per_key);
+
+ addReplyBulkCString(c,"dataset.bytes");
+ addReplyLongLong(c,mh->dataset);
+
+ addReplyBulkCString(c,"dataset.percentage");
+ addReplyDouble(c,mh->dataset_perc);
+
+ addReplyBulkCString(c,"peak.percentage");
+ addReplyDouble(c,mh->peak_perc);
+
+ addReplyBulkCString(c,"allocator.allocated");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
+
+ addReplyBulkCString(c,"allocator.active");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
+
+ addReplyBulkCString(c,"allocator.resident");
+ addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
+
+ addReplyBulkCString(c,"allocator-fragmentation.ratio");
+ addReplyDouble(c,mh->allocator_frag);
+
+ addReplyBulkCString(c,"allocator-fragmentation.bytes");
+ addReplyLongLong(c,mh->allocator_frag_bytes);
+
+ addReplyBulkCString(c,"allocator-rss.ratio");
+ addReplyDouble(c,mh->allocator_rss);
+
+ addReplyBulkCString(c,"allocator-rss.bytes");
+ addReplyLongLong(c,mh->allocator_rss_bytes);
+
+ addReplyBulkCString(c,"rss-overhead.ratio");
+ addReplyDouble(c,mh->rss_extra);
+
+ addReplyBulkCString(c,"rss-overhead.bytes");
+ addReplyLongLong(c,mh->rss_extra_bytes);
+
+ addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
+ addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
+
+ addReplyBulkCString(c,"fragmentation.bytes");
+ addReplyLongLong(c,mh->total_frag_bytes);
+
+ freeMemoryOverheadData(mh);
+ } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
+#if defined(USE_JEMALLOC)
+ sds info = sdsempty();
+ je_malloc_stats_print(inputCatSds, &info, NULL);
+ addReplyBulkSds(c, info);
+#else
+ addReplyBulkCString(c,"Stats not supported for the current allocator");
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
+ sds report = getMemoryDoctorReport();
+ addReplyBulkSds(c,report);
+ } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
+#if defined(USE_JEMALLOC)
+ char tmp[32];
+ unsigned narenas = 0;
+ size_t sz = sizeof(unsigned);
+ if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
+ sprintf(tmp, "arena.%d.purge", narenas);
+ if (!je_mallctl(tmp, NULL, 0, NULL, 0)) {
+ addReply(c, shared.ok);
+ return;
+ }
+ }
+ addReplyError(c, "Error purging dirty pages");
+#else
+ addReply(c, shared.ok);
+ /* Nothing to do for other allocators. */
+#endif
+ } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
+ addReplyMultiBulkLen(c,5);
+ addReplyBulkCString(c,
+"MEMORY DOCTOR - Outputs memory problems report");
+ addReplyBulkCString(c,
+"MEMORY USAGE <key> [SAMPLES <count>] - Estimate memory usage of key");
+ addReplyBulkCString(c,
+"MEMORY STATS - Show memory usage details");
+ addReplyBulkCString(c,
+"MEMORY PURGE - Ask the allocator to release memory");
+ addReplyBulkCString(c,
+"MEMORY MALLOC-STATS - Show allocator internal stats");
+ } else {
+ addReplyError(c,"Syntax error. Try MEMORY HELP");
+ }
+}
diff --git a/src/pubsub.c b/src/pubsub.c
index d6cfbdf3c..859eb46a3 100644
--- a/src/pubsub.c
+++ b/src/pubsub.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* Pubsub low level API
@@ -48,14 +48,14 @@ int listMatchPubsubPattern(void *a, void *b) {
}
/* Return the number of channels + patterns a client is subscribed to. */
-int clientSubscriptionsCount(redisClient *c) {
+int clientSubscriptionsCount(client *c) {
return dictSize(c->pubsub_channels)+
listLength(c->pubsub_patterns);
}
/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
* 0 if the client was already subscribed to that channel. */
-int pubsubSubscribeChannel(redisClient *c, robj *channel) {
+int pubsubSubscribeChannel(client *c, robj *channel) {
dictEntry *de;
list *clients = NULL;
int retval = 0;
@@ -85,7 +85,7 @@ int pubsubSubscribeChannel(redisClient *c, robj *channel) {
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
-int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
+int pubsubUnsubscribeChannel(client *c, robj *channel, int notify) {
dictEntry *de;
list *clients;
listNode *ln;
@@ -98,10 +98,10 @@ int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
retval = 1;
/* Remove the client from the channel -> clients list hash table */
de = dictFind(server.pubsub_channels,channel);
- redisAssertWithInfo(c,NULL,de != NULL);
+ serverAssertWithInfo(c,NULL,de != NULL);
clients = dictGetVal(de);
ln = listSearchKey(clients,c);
- redisAssertWithInfo(c,NULL,ln != NULL);
+ serverAssertWithInfo(c,NULL,ln != NULL);
listDelNode(clients,ln);
if (listLength(clients) == 0) {
/* Free the list and associated hash entry at all if this was
@@ -124,7 +124,7 @@ int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
}
/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the client was already subscribed to that pattern. */
-int pubsubSubscribePattern(redisClient *c, robj *pattern) {
+int pubsubSubscribePattern(client *c, robj *pattern) {
int retval = 0;
if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
@@ -147,7 +147,7 @@ int pubsubSubscribePattern(redisClient *c, robj *pattern) {
/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
* 0 if the client was not subscribed to the specified channel. */
-int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
+int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
listNode *ln;
pubsubPattern pat;
int retval = 0;
@@ -175,7 +175,7 @@ int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
/* Unsubscribe from all the channels. Return the number of channels the
* client was subscribed to. */
-int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
+int pubsubUnsubscribeAllChannels(client *c, int notify) {
dictIterator *di = dictGetSafeIterator(c->pubsub_channels);
dictEntry *de;
int count = 0;
@@ -199,7 +199,7 @@ int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
/* Unsubscribe from all the patterns. Return the number of patterns the
* client was subscribed from. */
-int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
+int pubsubUnsubscribeAllPatterns(client *c, int notify) {
listNode *ln;
listIter li;
int count = 0;
@@ -237,7 +237,7 @@ int pubsubPublishMessage(robj *channel, robj *message) {
listRewind(list,&li);
while ((ln = listNext(&li)) != NULL) {
- redisClient *c = ln->value;
+ client *c = ln->value;
addReply(c,shared.mbulkhdr[3]);
addReply(c,shared.messagebulk);
@@ -274,15 +274,15 @@ int pubsubPublishMessage(robj *channel, robj *message) {
* Pubsub commands implementation
*----------------------------------------------------------------------------*/
-void subscribeCommand(redisClient *c) {
+void subscribeCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribeChannel(c,c->argv[j]);
- c->flags |= REDIS_PUBSUB;
+ c->flags |= CLIENT_PUBSUB;
}
-void unsubscribeCommand(redisClient *c) {
+void unsubscribeCommand(client *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllChannels(c,1);
} else {
@@ -291,18 +291,18 @@ void unsubscribeCommand(redisClient *c) {
for (j = 1; j < c->argc; j++)
pubsubUnsubscribeChannel(c,c->argv[j],1);
}
- if (clientSubscriptionsCount(c) == 0) c->flags &= ~REDIS_PUBSUB;
+ if (clientSubscriptionsCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
}
-void psubscribeCommand(redisClient *c) {
+void psubscribeCommand(client *c) {
int j;
for (j = 1; j < c->argc; j++)
pubsubSubscribePattern(c,c->argv[j]);
- c->flags |= REDIS_PUBSUB;
+ c->flags |= CLIENT_PUBSUB;
}
-void punsubscribeCommand(redisClient *c) {
+void punsubscribeCommand(client *c) {
if (c->argc == 1) {
pubsubUnsubscribeAllPatterns(c,1);
} else {
@@ -311,22 +311,30 @@ void punsubscribeCommand(redisClient *c) {
for (j = 1; j < c->argc; j++)
pubsubUnsubscribePattern(c,c->argv[j],1);
}
- if (clientSubscriptionsCount(c) == 0) c->flags &= ~REDIS_PUBSUB;
+ if (clientSubscriptionsCount(c) == 0) c->flags &= ~CLIENT_PUBSUB;
}
-void publishCommand(redisClient *c) {
+void publishCommand(client *c) {
int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
if (server.cluster_enabled)
clusterPropagatePublish(c->argv[1],c->argv[2]);
else
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL);
+ forceCommandPropagation(c,PROPAGATE_REPL);
addReplyLongLong(c,receivers);
}
/* PUBSUB command for Pub/Sub introspection. */
-void pubsubCommand(redisClient *c) {
- if (!strcasecmp(c->argv[1]->ptr,"channels") &&
- (c->argc == 2 || c->argc ==3))
+void pubsubCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"CHANNELS [<pattern>] -- Return the currently active channels matching a pattern (default: all).",
+"NUMPAT -- Return number of subscriptions to patterns.",
+"NUMSUB [channel-1 .. channel-N] -- Returns the number of subscribers for the specified channels (excluding patterns, default: none).",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (!strcasecmp(c->argv[1]->ptr,"channels") &&
+ (c->argc == 2 || c->argc == 3))
{
/* PUBSUB CHANNELS [<pattern>] */
sds pat = (c->argc == 2) ? NULL : c->argv[2]->ptr;
@@ -364,8 +372,6 @@ void pubsubCommand(redisClient *c) {
/* PUBSUB NUMPAT */
addReplyLongLong(c,listLength(server.pubsub_patterns));
} else {
- addReplyErrorFormat(c,
- "Unknown PUBSUB subcommand or wrong number of arguments for '%s'",
- (char*)c->argv[1]->ptr);
+ addReplySubcommandSyntaxError(c);
}
}
diff --git a/src/quicklist.c b/src/quicklist.c
index 6682b2087..7b5484116 100644
--- a/src/quicklist.c
+++ b/src/quicklist.c
@@ -149,7 +149,7 @@ REDIS_STATIC quicklistNode *quicklistCreateNode(void) {
}
/* Return cached quicklist count */
-unsigned int quicklistCount(quicklist *ql) { return ql->count; }
+unsigned long quicklistCount(const quicklist *ql) { return ql->count; }
/* Free entire quicklist. */
void quicklistRelease(quicklist *quicklist) {
@@ -671,6 +671,7 @@ int quicklistReplaceAtIndex(quicklist *quicklist, long index, void *data,
/* quicklistIndex provides an uncompressed node */
entry.node->zl = ziplistDelete(entry.node->zl, &entry.zi);
entry.node->zl = ziplistInsert(entry.node->zl, entry.zi, data, sz);
+ quicklistNodeUpdateSz(entry.node);
quicklistCompress(quicklist, entry.node);
return 1;
} else {
@@ -1191,12 +1192,12 @@ quicklist *quicklistDup(quicklist *orig) {
current = current->next) {
quicklistNode *node = quicklistCreateNode();
- if (node->encoding == QUICKLIST_NODE_ENCODING_LZF) {
- quicklistLZF *lzf = (quicklistLZF *)node->zl;
+ if (current->encoding == QUICKLIST_NODE_ENCODING_LZF) {
+ quicklistLZF *lzf = (quicklistLZF *)current->zl;
size_t lzf_sz = sizeof(*lzf) + lzf->sz;
node->zl = zmalloc(lzf_sz);
memcpy(node->zl, current->zl, lzf_sz);
- } else if (node->encoding == QUICKLIST_NODE_ENCODING_RAW) {
+ } else if (current->encoding == QUICKLIST_NODE_ENCODING_RAW) {
node->zl = zmalloc(current->sz);
memcpy(node->zl, current->zl, current->sz);
}
@@ -1372,7 +1373,7 @@ REDIS_STATIC void *_quicklistSaver(unsigned char *data, unsigned int sz) {
unsigned char *vstr;
if (data) {
vstr = zmalloc(sz);
- memcpy(data, vstr, sz);
+ memcpy(vstr, data, sz);
return vstr;
}
return NULL;
@@ -1635,7 +1636,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to tail of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushTail(ql, "hello", 6);
- /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ /* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}
@@ -1643,7 +1644,7 @@ int quicklistTest(int argc, char *argv[]) {
TEST("add to head of empty list") {
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushHead(ql, "hello", 6);
- /* 1 for head and 1 for tail beacuse 1 node = head = tail */
+ /* 1 for head and 1 for tail because 1 node = head = tail */
ql_verify(ql, 1, 1, 1, 1);
quicklistRelease(ql);
}
@@ -1757,7 +1758,8 @@ int quicklistTest(int argc, char *argv[]) {
TEST("pop 1 string from 1") {
quicklist *ql = quicklistNew(-2, options[_i]);
- quicklistPushHead(ql, genstr("hello", 331), 32);
+ char *populate = genstr("hello", 331);
+ quicklistPushHead(ql, populate, 32);
unsigned char *data;
unsigned int sz;
long long lv;
@@ -1765,6 +1767,9 @@ int quicklistTest(int argc, char *argv[]) {
quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
assert(data != NULL);
assert(sz == 32);
+ if (strcmp(populate, (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)", sz,
+ data, populate);
zfree(data);
ql_verify(ql, 0, 0, 0, 0);
quicklistRelease(ql);
@@ -1797,6 +1802,9 @@ int quicklistTest(int argc, char *argv[]) {
assert(ret == 1);
assert(data != NULL);
assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value (%s)",
+ sz, data, genstr("hello", 499 - i));
zfree(data);
}
ql_verify(ql, 0, 0, 0, 0);
@@ -1816,6 +1824,10 @@ int quicklistTest(int argc, char *argv[]) {
assert(ret == 1);
assert(data != NULL);
assert(sz == 32);
+ if (strcmp(genstr("hello", 499 - i), (char *)data))
+ ERR("Pop'd value (%.*s) didn't equal original value "
+ "(%s)",
+ sz, data, genstr("hello", 499 - i));
zfree(data);
} else {
assert(ret == 0);
diff --git a/src/quicklist.h b/src/quicklist.h
index 5c9530ccd..955a22cfa 100644
--- a/src/quicklist.h
+++ b/src/quicklist.h
@@ -64,7 +64,7 @@ typedef struct quicklistLZF {
char compressed[];
} quicklistLZF;
-/* quicklist is a 32 byte struct (on 64-bit systems) describing a quicklist.
+/* quicklist is a 40 byte struct (on 64-bit systems) describing a quicklist.
* 'count' is the number of total entries.
* 'len' is the number of quicklist nodes.
* 'compress' is: -1 if compression disabled, otherwise it's the number
@@ -74,7 +74,7 @@ typedef struct quicklist {
quicklistNode *head;
quicklistNode *tail;
unsigned long count; /* total count of all entries in all ziplists */
- unsigned int len; /* number of quicklistNodes */
+ unsigned long len; /* number of quicklistNodes */
int fill : 16; /* fill factor for individual nodes */
unsigned int compress : 16; /* depth of end nodes not to compress;0=off */
} quicklist;
@@ -92,8 +92,8 @@ typedef struct quicklistEntry {
quicklistNode *node;
unsigned char *zi;
unsigned char *value;
- unsigned int sz;
long long longval;
+ unsigned int sz;
int offset;
} quicklistEntry;
@@ -154,7 +154,7 @@ int quicklistPopCustom(quicklist *quicklist, int where, unsigned char **data,
void *(*saver)(unsigned char *data, unsigned int sz));
int quicklistPop(quicklist *quicklist, int where, unsigned char **data,
unsigned int *sz, long long *slong);
-unsigned int quicklistCount(quicklist *ql);
+unsigned long quicklistCount(const quicklist *ql);
int quicklistCompare(unsigned char *p1, unsigned char *p2, int p2_len);
size_t quicklistGetLzf(const quicklistNode *node, void **data);
diff --git a/src/rax.c b/src/rax.c
new file mode 100644
index 000000000..92b367550
--- /dev/null
+++ b/src/rax.c
@@ -0,0 +1,1810 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <math.h>
+#include "rax.h"
+
+#ifndef RAX_MALLOC_INCLUDE
+#define RAX_MALLOC_INCLUDE "rax_malloc.h"
+#endif
+
+#include RAX_MALLOC_INCLUDE
+
+/* This is a special pointer that is guaranteed to never have the same value
+ * of a radix tree node. It's used in order to report "not found" error without
+ * requiring the function to have multiple return values. */
+void *raxNotFound = (void*)"rax-not-found-pointer";
+
+/* -------------------------------- Debugging ------------------------------ */
+
+void raxDebugShowNode(const char *msg, raxNode *n);
+
+/* Turn debugging messages on/off. */
+#if 0
+#define debugf(...) \
+ do { \
+ printf("%s:%s:%d:\t", __FILE__, __FUNCTION__, __LINE__); \
+ printf(__VA_ARGS__); \
+ fflush(stdout); \
+ } while (0);
+
+#define debugnode(msg,n) raxDebugShowNode(msg,n)
+#else
+#define debugf(...)
+#define debugnode(msg,n)
+#endif
+
+/* ------------------------- raxStack functions --------------------------
+ * The raxStack is a simple stack of pointers that is capable of switching
+ * from using a stack-allocated array to dynamic heap once a given number of
+ * items are reached. It is used in order to retain the list of parent nodes
+ * while walking the radix tree in order to implement certain operations that
+ * need to navigate the tree upward.
+ * ------------------------------------------------------------------------- */
+
+/* Initialize the stack. */
+static inline void raxStackInit(raxStack *ts) {
+ ts->stack = ts->static_items;
+ ts->items = 0;
+ ts->maxitems = RAX_STACK_STATIC_ITEMS;
+ ts->oom = 0;
+}
+
+/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
+static inline int raxStackPush(raxStack *ts, void *ptr) {
+ if (ts->items == ts->maxitems) {
+ if (ts->stack == ts->static_items) {
+ ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
+ if (ts->stack == NULL) {
+ ts->stack = ts->static_items;
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
+ } else {
+ void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
+ if (newalloc == NULL) {
+ ts->oom = 1;
+ errno = ENOMEM;
+ return 0;
+ }
+ ts->stack = newalloc;
+ }
+ ts->maxitems *= 2;
+ }
+ ts->stack[ts->items] = ptr;
+ ts->items++;
+ return 1;
+}
+
+/* Pop an item from the stack, the function returns NULL if there are no
+ * items to pop. */
+static inline void *raxStackPop(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ ts->items--;
+ return ts->stack[ts->items];
+}
+
+/* Return the stack item at the top of the stack without actually consuming
+ * it. */
+static inline void *raxStackPeek(raxStack *ts) {
+ if (ts->items == 0) return NULL;
+ return ts->stack[ts->items-1];
+}
+
+/* Free the stack in case we used heap allocation. */
+static inline void raxStackFree(raxStack *ts) {
+ if (ts->stack != ts->static_items) rax_free(ts->stack);
+}
+
+/* ----------------------------------------------------------------------------
+ * Radix tree implementation
+ * --------------------------------------------------------------------------*/
+
+/* Allocate a new non compressed node with the specified number of children.
+ * If datafiled is true, the allocation is made large enough to hold the
+ * associated data pointer.
+ * Returns the new node pointer. On out of memory NULL is returned. */
+raxNode *raxNewNode(size_t children, int datafield) {
+ size_t nodesize = sizeof(raxNode)+children+
+ sizeof(raxNode*)*children;
+ if (datafield) nodesize += sizeof(void*);
+ raxNode *node = rax_malloc(nodesize);
+ if (node == NULL) return NULL;
+ node->iskey = 0;
+ node->isnull = 0;
+ node->iscompr = 0;
+ node->size = children;
+ return node;
+}
+
+/* Allocate a new rax and return its pointer. On out of memory the function
+ * returns NULL. */
+rax *raxNew(void) {
+ rax *rax = rax_malloc(sizeof(*rax));
+ if (rax == NULL) return NULL;
+ rax->numele = 0;
+ rax->numnodes = 1;
+ rax->head = raxNewNode(0,0);
+ if (rax->head == NULL) {
+ rax_free(rax);
+ return NULL;
+ } else {
+ return rax;
+ }
+}
+
+/* Return the current total size of the node. */
+#define raxNodeCurrentLength(n) ( \
+ sizeof(raxNode)+(n)->size+ \
+ ((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
+ (((n)->iskey && !(n)->isnull)*sizeof(void*)) \
+)
+
+/* realloc the node to make room for auxiliary data in order
+ * to store an item in that node. On out of memory NULL is returned. */
+raxNode *raxReallocForData(raxNode *n, void *data) {
+ if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
+ size_t curlen = raxNodeCurrentLength(n);
+ return rax_realloc(n,curlen+sizeof(void*));
+}
+
+/* Set the node auxiliary data to the specified pointer. */
+void raxSetData(raxNode *n, void *data) {
+ n->iskey = 1;
+ if (data != NULL) {
+ n->isnull = 0;
+ void **ndata = (void**)
+ ((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ memcpy(ndata,&data,sizeof(data));
+ } else {
+ n->isnull = 1;
+ }
+}
+
+/* Get the node auxiliary data. */
+void *raxGetData(raxNode *n) {
+ if (n->isnull) return NULL;
+ void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
+ void *data;
+ memcpy(&data,ndata,sizeof(data));
+ return data;
+}
+
+/* Add a new child to the node 'n' representing the character 'c' and return
+ * its new pointer, as well as the child pointer by reference. Additionally
+ * '***parentlink' is populated with the raxNode pointer-to-pointer of where
+ * the new child was stored, which is useful for the caller to replace the
+ * child pointer if it gets reallocated.
+ *
+ * On success the new parent node pointer is returned (it may change because
+ * of the realloc, so the caller should discard 'n' and use the new value).
+ * On out of memory NULL is returned, and the old node is still valid. */
+raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
+ assert(n->iscompr == 0);
+
+ size_t curlen = sizeof(raxNode)+
+ n->size+
+ sizeof(raxNode*)*n->size;
+ size_t newlen;
+
+ /* Alloc the new child we will link to 'n'. */
+ raxNode *child = raxNewNode(0,0);
+ if (child == NULL) return NULL;
+
+ /* Make space in the original node. */
+ if (n->iskey) curlen += sizeof(void*);
+ newlen = curlen+sizeof(raxNode*)+1; /* Add 1 char and 1 pointer. */
+ raxNode *newn = rax_realloc(n,newlen);
+ if (newn == NULL) {
+ rax_free(child);
+ return NULL;
+ }
+ n = newn;
+
+ /* After the reallocation, we have 5/9 (depending on the system
+ * pointer size) bytes at the end, that is, the additional char
+ * in the 'data' section, plus one pointer to the new child:
+ *
+ * [numc][abx][ap][bp][xp]|auxp|.....
+ *
+ * Let's find where to insert the new child in order to make sure
+ * it is inserted in-place lexicographically. */
+ int pos;
+ for (pos = 0; pos < n->size; pos++) {
+ if (n->data[pos] > c) break;
+ }
+
+ /* Now, if present, move auxiliary data pointer at the end
+ * so that we can mess with the other data without overwriting it.
+ * We will obtain something like that:
+ *
+ * [numc][abx][ap][bp][xp].....|auxp| */
+ unsigned char *src;
+ if (n->iskey && !n->isnull) {
+ src = n->data+n->size+sizeof(raxNode*)*n->size;
+ memmove(src+1+sizeof(raxNode*),src,sizeof(void*));
+ }
+
+ /* Now imagine we are adding a node with edge 'c'. The insertion
+ * point is between 'b' and 'x', so the 'pos' variable value is
+ * To start, move all the child pointers after the insertion point
+ * of 1+sizeof(pointer) bytes on the right, to obtain:
+ *
+ * [numc][abx][ap][bp].....[xp]|auxp| */
+ src = n->data+n->size+sizeof(raxNode*)*pos;
+ memmove(src+1+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
+
+ /* Now make the space for the additional char in the data section,
+ * but also move the pointers before the insertion point in the right
+ * by 1 byte, in order to obtain the following:
+ *
+ * [numc][ab.x][ap][bp]....[xp]|auxp| */
+ src = n->data+pos;
+ memmove(src+1,src,n->size-pos+sizeof(raxNode*)*pos);
+
+ /* We can now set the character and its child node pointer to get:
+ *
+ * [numc][abcx][ap][bp][cp]....|auxp|
+ * [numc][abcx][ap][bp][cp][xp]|auxp| */
+ n->data[pos] = c;
+ n->size++;
+ raxNode **childfield = (raxNode**)(n->data+n->size+sizeof(raxNode*)*pos);
+ memcpy(childfield,&child,sizeof(child));
+ *childptr = child;
+ *parentlink = childfield;
+ return n;
+}
+
+/* Return the pointer to the last child pointer in a node. For the compressed
+ * nodes this is the only child pointer. */
+#define raxNodeLastChildPtr(n) ((raxNode**) ( \
+ ((char*)(n)) + \
+ raxNodeCurrentLength(n) - \
+ sizeof(raxNode*) - \
+ (((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
+))
+
+/* Return the pointer to the first child pointer. */
+#define raxNodeFirstChildPtr(n) ((raxNode**)((n)->data+(n)->size))
+
+/* Turn the node 'n', that must be a node without any children, into a
+ * compressed node representing a set of nodes linked one after the other
+ * and having exactly one child each. The node can be a key or not: this
+ * property and the associated value if any will be preserved.
+ *
+ * The function also returns a child node, since the last node of the
+ * compressed chain cannot be part of the chain: it has zero children while
+ * we can only compress inner nodes with exactly one child each. */
+raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
+ assert(n->size == 0 && n->iscompr == 0);
+ void *data = NULL; /* Initialized only to avoid warnings. */
+ size_t newsize;
+
+ debugf("Compress node: %.*s\n", (int)len,s);
+
+ /* Allocate the child to link to this node. */
+ *child = raxNewNode(0,0);
+ if (*child == NULL) return NULL;
+
+ /* Make space in the parent node. */
+ newsize = sizeof(raxNode)+len+sizeof(raxNode*);
+ if (n->iskey) {
+ data = raxGetData(n); /* To restore it later. */
+ if (!n->isnull) newsize += sizeof(void*);
+ }
+ raxNode *newn = rax_realloc(n,newsize);
+ if (newn == NULL) {
+ rax_free(*child);
+ return NULL;
+ }
+ n = newn;
+
+ n->iscompr = 1;
+ n->size = len;
+ memcpy(n->data,s,len);
+ if (n->iskey) raxSetData(n,data);
+ raxNode **childfield = raxNodeLastChildPtr(n);
+ memcpy(childfield,child,sizeof(*child));
+ return n;
+}
+
+/* Low level function that walks the tree looking for the string
+ * 's' of 'len' bytes. The function returns the number of characters
+ * of the key that was possible to process: if the returned integer
+ * is the same as 'len', then it means that the node corresponding to the
+ * string was found (however it may not be a key in case the node->iskey is
+ * zero or if simply we stopped in the middle of a compressed node, so that
+ * 'splitpos' is non zero).
+ *
+ * Otherwise if the returned integer is not the same as 'len', there was an
+ * early stop during the tree walk because of a character mismatch.
+ *
+ * The node where the search ended (because the full string was processed
+ * or because there was an early stop) is returned by reference as
+ * '*stopnode' if the passed pointer is not NULL. This node link in the
+ * parent's node is returned as '*plink' if not NULL. Finally, if the
+ * search stopped in a compressed node, '*splitpos' returns the index
+ * inside the compressed node where the search ended. This is useful to
+ * know where to split the node for insertion.
+ *
+ * Note that when we stop in the middle of a compressed node with
+ * a perfect match, this function will return a length equal to the
+ * 'len' argument (all the key matched), and will return a *splitpos which is
+ * always positive (that will represent the index of the character immediately
+ * *after* the last match in the current compressed node).
+ *
+ * When instead we stop at a compressed node and *splitpos is zero, it
+ * means that the current node represents the key (that is, none of the
+ * compressed node characters are needed to represent the key, just all
+ * its parents nodes). */
+static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
+ raxNode *h = rax->head;
+ raxNode **parentlink = &rax->head;
+
+ size_t i = 0; /* Position in the string. */
+ size_t j = 0; /* Position in the node children (or bytes if compressed).*/
+ while(h->size && i < len) {
+ debugnode("Lookup current node",h);
+ unsigned char *v = h->data;
+
+ if (h->iscompr) {
+ for (j = 0; j < h->size && i < len; j++, i++) {
+ if (v[j] != s[i]) break;
+ }
+ if (j != h->size) break;
+ } else {
+ /* Even when h->size is large, linear scan provides good
+ * performances compared to other approaches that are in theory
+ * more sounding, like performing a binary search. */
+ for (j = 0; j < h->size; j++) {
+ if (v[j] == s[i]) break;
+ }
+ if (j == h->size) break;
+ i++;
+ }
+
+ if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
+ raxNode **children = raxNodeFirstChildPtr(h);
+ if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
+ memcpy(&h,children+j,sizeof(h));
+ parentlink = children+j;
+ j = 0; /* If the new node is compressed and we do not
+ iterate again (since i == l) set the split
+ position to 0 to signal this node represents
+ the searched key. */
+ }
+ debugnode("Lookup stop node is",h);
+ if (stopnode) *stopnode = h;
+ if (plink) *plink = parentlink;
+ if (splitpos && h->iscompr) *splitpos = j;
+ return i;
+}
+
+/* Insert the element 's' of size 'len', setting as auxiliary data
+ * the pointer 'data'. If the element is already present, the associated
+ * data is updated (only if 'overwrite' is set to 1), and 0 is returned,
+ * otherwise the element is inserted and 1 is returned. On out of memory the
+ * function returns 0 as well but sets errno to ENOMEM, otherwise errno will
+ * be set to 0.
+ */
+int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
+ size_t i;
+ int j = 0; /* Split position. If raxLowWalk() stops in a compressed
+ node, the index 'j' represents the char we stopped within the
+ compressed node, that is, the position where to split the
+ node for insertion. */
+ raxNode *h, **parentlink;
+
+ debugf("### Insert %.*s with value %p\n", (int)len, s, data);
+ i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
+
+ /* If i == len we walked following the whole string. If we are not
+ * in the middle of a compressed node, the string is either already
+ * inserted or this middle node is currently not a key, but can represent
+ * our key. We have just to reallocate the node and make space for the
+ * data pointer. */
+ if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
+ debugf("### Insert: node representing key exists\n");
+ /* Make space for the value pointer if needed. */
+ if (!h->iskey || (h->isnull && overwrite)) {
+ h = raxReallocForData(h,data);
+ if (h) memcpy(parentlink,&h,sizeof(h));
+ }
+ if (h == NULL) {
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* Update the existing key if there is already one. */
+ if (h->iskey) {
+ if (old) *old = raxGetData(h);
+ if (overwrite) raxSetData(h,data);
+ errno = 0;
+ return 0; /* Element already exists. */
+ }
+
+ /* Otherwise set the node as a key. Note that raxSetData()
+ * will set h->iskey. */
+ raxSetData(h,data);
+ rax->numele++;
+ return 1; /* Element inserted. */
+ }
+
+ /* If the node we stopped at is a compressed node, we need to
+ * split it before to continue.
+ *
+ * Splitting a compressed node have a few possible cases.
+ * Imagine that the node 'h' we are currently at is a compressed
+ * node contaning the string "ANNIBALE" (it means that it represents
+ * nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
+ * pointer of this node pointing at the 'E' node, because remember that
+ * we have characters at the edges of the graph, not inside the nodes
+ * themselves.
+ *
+ * In order to show a real case imagine our node to also point to
+ * another compressed node, that finally points at the node without
+ * children, representing 'O':
+ *
+ * "ANNIBALE" -> "SCO" -> []
+ *
+ * When inserting we may face the following cases. Note that all the cases
+ * require the insertion of a non compressed node with exactly two
+ * children, except for the last case which just requires splitting a
+ * compressed node.
+ *
+ * 1) Inserting "ANNIENTARE"
+ *
+ * |B| -> "ALE" -> "SCO" -> []
+ * "ANNI" -> |-|
+ * |E| -> (... continue algo ...) "NTARE" -> []
+ *
+ * 2) Inserting "ANNIBALI"
+ *
+ * |E| -> "SCO" -> []
+ * "ANNIBAL" -> |-|
+ * |I| -> (... continue algo ...) []
+ *
+ * 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
+ *
+ * |N| -> "NIBALE" -> "SCO" -> []
+ * |A| -> |-|
+ * |G| -> (... continue algo ...) |O| -> []
+ *
+ * 4) Inserting "CIAO"
+ *
+ * |A| -> "NNIBALE" -> "SCO" -> []
+ * |-|
+ * |C| -> (... continue algo ...) "IAO" -> []
+ *
+ * 5) Inserting "ANNI"
+ *
+ * "ANNI" -> "BALE" -> "SCO" -> []
+ *
+ * The final algorithm for insertion covering all the above cases is as
+ * follows.
+ *
+ * ============================= ALGO 1 =============================
+ *
+ * For the above cases 1 to 4, that is, all cases where we stopped in
+ * the middle of a compressed node for a character mismatch, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we found the mismatching
+ * character. For example if the node contains "ANNIBALE" and we add
+ * "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
+ * mismatching character is found.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create "split node" having as child the non common letter
+ * at the compressed node. The other non common letter (at the key)
+ * will be added later as we continue the normal insertion algorithm
+ * at step "6".
+ *
+ * 3a. IF $SPLITPOS == 0:
+ * Replace the old node with the split node, by copying the auxiliary
+ * data if any. Fix parent's reference. Free old node eventually
+ * (we still need its data for the next steps of the algorithm).
+ *
+ * 3b. IF $SPLITPOS != 0:
+ * Trim the compressed node (reallocating it as well) in order to
+ * contain $splitpos characters. Change chilid pointer in order to link
+ * to the split node. If new compressed node len is just 1, set
+ * iscompr to 0 (layout is the same). Fix parent's reference.
+ *
+ * 4a. IF the postfix len (the length of the remaining string of the
+ * original compressed node after the split character) is non zero,
+ * create a "postfix node". If the postfix node has just one character
+ * set iscompr to 0, otherwise iscompr to 1. Set the postfix node
+ * child pointer to $NEXT.
+ *
+ * 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
+ *
+ * 5. Set child[0] of split node to postfix node.
+ *
+ * 6. Set the split node as the current node, set current index at child[1]
+ * and continue insertion algorithm as usually.
+ *
+ * ============================= ALGO 2 =============================
+ *
+ * For case 5, that is, if we stopped in the middle of a compressed
+ * node but no mismatch was found, do:
+ *
+ * Let $SPLITPOS be the zero-based index at which, in the
+ * compressed node array of characters, we stopped iterating because
+ * there were no more keys character to match. So in the example of
+ * the node "ANNIBALE", addig the string "ANNI", the $SPLITPOS is 4.
+ *
+ * 1. Save the current compressed node $NEXT pointer (the pointer to the
+ * child element, that is always present in compressed nodes).
+ *
+ * 2. Create a "postfix node" containing all the characters from $SPLITPOS
+ * to the end. Use $NEXT as the postfix node child pointer.
+ * If the postfix node length is 1, set iscompr to 0.
+ * Set the node as a key with the associated value of the new
+ * inserted key.
+ *
+ * 3. Trim the current node to contain the first $SPLITPOS characters.
+ * As usually if the new node length is just 1, set iscompr to 0.
+ * Take the iskey / associated value as it was in the orignal node.
+ * Fix the parent's reference.
+ *
+ * 4. Set the postfix node as the only child pointer of the trimmed
+ * node created at step 1.
+ */
+
+ /* ------------------------- ALGORITHM 1 --------------------------- */
+ if (h->iscompr && i != len) {
+ debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
+ h->size, h->data, (void*)h);
+ debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
+ debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
+ debugf("Other (key) letter is '%c'\n", s[i]);
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+ debugf("Next is %p\n", (void*)next);
+ debugf("iskey %d\n", h->iskey);
+ if (h->iskey) {
+ debugf("key value is %p\n", raxGetData(h));
+ }
+
+ /* Set the length of the additional nodes we will need. */
+ size_t trimmedlen = j;
+ size_t postfixlen = h->size - j - 1;
+ int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
+ size_t nodesize;
+
+ /* 2: Create the split node. Also allocate the other nodes we'll need
+ * ASAP, so that it will be simpler to handle OOM. */
+ raxNode *splitnode = raxNewNode(1, split_node_is_key);
+ raxNode *trimmed = NULL;
+ raxNode *postfix = NULL;
+
+ if (trimmedlen) {
+ nodesize = sizeof(raxNode)+trimmedlen+sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ trimmed = rax_malloc(nodesize);
+ }
+
+ if (postfixlen) {
+ nodesize = sizeof(raxNode)+postfixlen+
+ sizeof(raxNode*);
+ postfix = rax_malloc(nodesize);
+ }
+
+ /* OOM? Abort now that the tree is untouched. */
+ if (splitnode == NULL ||
+ (trimmedlen && trimmed == NULL) ||
+ (postfixlen && postfix == NULL))
+ {
+ rax_free(splitnode);
+ rax_free(trimmed);
+ rax_free(postfix);
+ errno = ENOMEM;
+ return 0;
+ }
+ splitnode->data[0] = h->data[j];
+
+ if (j == 0) {
+ /* 3a: Replace the old node with the split node. */
+ if (h->iskey) {
+ void *ndata = raxGetData(h);
+ raxSetData(splitnode,ndata);
+ }
+ memcpy(parentlink,&splitnode,sizeof(splitnode));
+ } else {
+ /* 3b: Trim the compressed node. */
+ trimmed->size = j;
+ memcpy(trimmed->data,h->data,j);
+ trimmed->iscompr = j > 1 ? 1 : 0;
+ trimmed->iskey = h->iskey;
+ trimmed->isnull = h->isnull;
+ if (h->iskey && !h->isnull) {
+ void *ndata = raxGetData(h);
+ raxSetData(trimmed,ndata);
+ }
+ raxNode **cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&splitnode,sizeof(splitnode));
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ parentlink = cp; /* Set parentlink to splitnode parent. */
+ rax->numnodes++;
+ }
+
+ /* 4: Create the postfix node: what remains of the original
+ * compressed node after the split. */
+ if (postfixlen) {
+ /* 4a: create a postfix node. */
+ postfix->iskey = 0;
+ postfix->isnull = 0;
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ memcpy(postfix->data,h->data+j+1,postfixlen);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+ } else {
+ /* 4b: just use next as postfix node. */
+ postfix = next;
+ }
+
+ /* 5: Set splitnode first child as the postfix node. */
+ raxNode **splitchild = raxNodeLastChildPtr(splitnode);
+ memcpy(splitchild,&postfix,sizeof(postfix));
+
+ /* 6. Continue insertion: this will cause the splitnode to
+ * get a new child (the non common character at the currently
+ * inserted key). */
+ rax_free(h);
+ h = splitnode;
+ } else if (h->iscompr && i == len) {
+ /* ------------------------- ALGORITHM 2 --------------------------- */
+ debugf("ALGO 2: Stopped at compressed node %.*s (%p) j = %d\n",
+ h->size, h->data, (void*)h, j);
+
+ /* Allocate postfix & trimmed nodes ASAP to fail for OOM gracefully. */
+ size_t postfixlen = h->size - j;
+ size_t nodesize = sizeof(raxNode)+postfixlen+sizeof(raxNode*);
+ if (data != NULL) nodesize += sizeof(void*);
+ raxNode *postfix = rax_malloc(nodesize);
+
+ nodesize = sizeof(raxNode)+j+sizeof(raxNode*);
+ if (h->iskey && !h->isnull) nodesize += sizeof(void*);
+ raxNode *trimmed = rax_malloc(nodesize);
+
+ if (postfix == NULL || trimmed == NULL) {
+ rax_free(postfix);
+ rax_free(trimmed);
+ errno = ENOMEM;
+ return 0;
+ }
+
+ /* 1: Save next pointer. */
+ raxNode **childfield = raxNodeLastChildPtr(h);
+ raxNode *next;
+ memcpy(&next,childfield,sizeof(next));
+
+ /* 2: Create the postfix node. */
+ postfix->size = postfixlen;
+ postfix->iscompr = postfixlen > 1;
+ postfix->iskey = 1;
+ postfix->isnull = 0;
+ memcpy(postfix->data,h->data+j,postfixlen);
+ raxSetData(postfix,data);
+ raxNode **cp = raxNodeLastChildPtr(postfix);
+ memcpy(cp,&next,sizeof(next));
+ rax->numnodes++;
+
+ /* 3: Trim the compressed node. */
+ trimmed->size = j;
+ trimmed->iscompr = j > 1;
+ trimmed->iskey = 0;
+ trimmed->isnull = 0;
+ memcpy(trimmed->data,h->data,j);
+ memcpy(parentlink,&trimmed,sizeof(trimmed));
+ if (h->iskey) {
+ void *aux = raxGetData(h);
+ raxSetData(trimmed,aux);
+ }
+
+ /* Fix the trimmed node child pointer to point to
+ * the postfix node. */
+ cp = raxNodeLastChildPtr(trimmed);
+ memcpy(cp,&postfix,sizeof(postfix));
+
+ /* Finish! We don't need to continue with the insertion
+ * algorithm for ALGO 2. The key is already inserted. */
+ rax->numele++;
+ rax_free(h);
+ return 1; /* Key inserted. */
+ }
+
+ /* We walked the radix tree as far as we could, but still there are left
+ * chars in our string. We need to insert the missing nodes. */
+ while(i < len) {
+ raxNode *child;
+
+ /* If this node is going to have a single child, and there
+ * are other characters, so that that would result in a chain
+ * of single-childed nodes, turn it into a compressed node. */
+ if (h->size == 0 && len-i > 1) {
+ debugf("Inserting compressed node\n");
+ size_t comprsize = len-i;
+ if (comprsize > RAX_NODE_MAX_SIZE)
+ comprsize = RAX_NODE_MAX_SIZE;
+ raxNode *newh = raxCompressNode(h,s+i,comprsize,&child);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = raxNodeLastChildPtr(h);
+ i += comprsize;
+ } else {
+ debugf("Inserting normal node\n");
+ raxNode **new_parentlink;
+ raxNode *newh = raxAddChild(h,s[i],&child,&new_parentlink);
+ if (newh == NULL) goto oom;
+ h = newh;
+ memcpy(parentlink,&h,sizeof(h));
+ parentlink = new_parentlink;
+ i++;
+ }
+ rax->numnodes++;
+ h = child;
+ }
+ raxNode *newh = raxReallocForData(h,data);
+ if (newh == NULL) goto oom;
+ h = newh;
+ if (!h->iskey) rax->numele++;
+ raxSetData(h,data);
+ memcpy(parentlink,&h,sizeof(h));
+ return 1; /* Element inserted. */
+
+oom:
+ /* This code path handles out of memory after part of the sub-tree was
+ * already modified. Set the node as a key, and then remove it. However we
+ * do that only if the node is a terminal node, otherwise if the OOM
+ * happened reallocating a node in the middle, we don't need to free
+ * anything. */
+ if (h->size == 0) {
+ h->isnull = 1;
+ h->iskey = 1;
+ rax->numele++; /* Compensate the next remove. */
+ assert(raxRemove(rax,s,i,NULL) != 0);
+ }
+ errno = ENOMEM;
+ return 0;
+}
+
+/* Overwriting insert. Just a wrapper for raxGenericInsert() that will
+ * update the element if there is already one for the same key. */
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+ return raxGenericInsert(rax,s,len,data,old,1);
+}
+
+/* Non overwriting insert function: this if an element with the same key
+ * exists, the value is not updated and the function returns 0.
+ * This is a just a wrapper for raxGenericInsert(). */
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old) {
+ return raxGenericInsert(rax,s,len,data,old,0);
+}
+
+/* Find a key in the rax, returns raxNotFound special void pointer value
+ * if the item was not found, otherwise the value associated with the
+ * item is returned. */
+void *raxFind(rax *rax, unsigned char *s, size_t len) {
+ raxNode *h;
+
+ debugf("### Lookup: %.*s\n", (int)len, s);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,NULL);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey)
+ return raxNotFound;
+ return raxGetData(h);
+}
+
+/* Return the memory address where the 'parent' node stores the specified
+ * 'child' pointer, so that the caller can update the pointer with another
+ * one if needed. The function assumes it will find a match, otherwise the
+ * operation is an undefined behavior (it will continue scanning the
+ * memory without any bound checking). */
+raxNode **raxFindParentLink(raxNode *parent, raxNode *child) {
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode *c;
+ while(1) {
+ memcpy(&c,cp,sizeof(c));
+ if (c == child) break;
+ cp++;
+ }
+ return cp;
+}
+
+/* Low level child removal from node. The new node pointer (after the child
+ * removal) is returned. Note that this function does not fix the pointer
+ * of the parent node in its parent, so this task is up to the caller.
+ * The function never fails for out of memory. */
+raxNode *raxRemoveChild(raxNode *parent, raxNode *child) {
+ debugnode("raxRemoveChild before", parent);
+ /* If parent is a compressed node (having a single child, as for definition
+ * of the data structure), the removal of the child consists into turning
+ * it into a normal node without children. */
+ if (parent->iscompr) {
+ void *data = NULL;
+ if (parent->iskey) data = raxGetData(parent);
+ parent->isnull = 0;
+ parent->iscompr = 0;
+ parent->size = 0;
+ if (parent->iskey) raxSetData(parent,data);
+ debugnode("raxRemoveChild after", parent);
+ return parent;
+ }
+
+ /* Otherwise we need to scan for the children pointer and memmove()
+ * accordingly.
+ *
+ * 1. To start we seek the first element in both the children
+ * pointers and edge bytes in the node. */
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ raxNode **c = cp;
+ unsigned char *e = parent->data;
+
+ /* 2. Search the child pointer to remove inside the array of children
+ * pointers. */
+ while(1) {
+ raxNode *aux;
+ memcpy(&aux,c,sizeof(aux));
+ if (aux == child) break;
+ c++;
+ e++;
+ }
+
+ /* 3. Remove the edge and the pointer by memmoving the remaining children
+ * pointer and edge bytes one position before. */
+ int taillen = parent->size - (e - parent->data) - 1;
+ debugf("raxRemoveChild tail len: %d\n", taillen);
+ memmove(e,e+1,taillen);
+
+ /* Since we have one data byte less, also child pointers start one byte
+ * before now. */
+ memmove(((char*)cp)-1,cp,(parent->size-taillen-1)*sizeof(raxNode**));
+
+ /* Move the remaining "tail" pointer at the right position as well. */
+ size_t valuelen = (parent->iskey && !parent->isnull) ? sizeof(void*) : 0;
+ memmove(((char*)c)-1,c+1,taillen*sizeof(raxNode**)+valuelen);
+
+ /* 4. Update size. */
+ parent->size--;
+
+ /* realloc the node according to the theoretical memory usage, to free
+ * data if we are over-allocating right now. */
+ raxNode *newnode = rax_realloc(parent,raxNodeCurrentLength(parent));
+ if (newnode) {
+ debugnode("raxRemoveChild after", newnode);
+ }
+ /* Note: if rax_realloc() fails we just return the old address, which
+ * is valid. */
+ return newnode ? newnode : parent;
+}
+
+/* Remove the specified item. Returns 1 if the item was found and
+ * deleted, 0 otherwise. */
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) {
+ raxNode *h;
+ raxStack ts;
+
+ debugf("### Delete: %.*s\n", (int)len, s);
+ raxStackInit(&ts);
+ int splitpos = 0;
+ size_t i = raxLowWalk(rax,s,len,&h,NULL,&splitpos,&ts);
+ if (i != len || (h->iscompr && splitpos != 0) || !h->iskey) {
+ raxStackFree(&ts);
+ return 0;
+ }
+ if (old) *old = raxGetData(h);
+ h->iskey = 0;
+ rax->numele--;
+
+ /* If this node has no children, the deletion needs to reclaim the
+ * no longer used nodes. This is an iterative process that needs to
+ * walk the three upward, deleting all the nodes with just one child
+ * that are not keys, until the head of the rax is reached or the first
+ * node with more than one child is found. */
+
+ int trycompress = 0; /* Will be set to 1 if we should try to optimize the
+ tree resulting from the deletion. */
+
+ if (h->size == 0) {
+ debugf("Key deleted in node without children. Cleanup needed.\n");
+ raxNode *child = NULL;
+ while(h != rax->head) {
+ child = h;
+ debugf("Freeing child %p [%.*s] key:%d\n", (void*)child,
+ (int)child->size, (char*)child->data, child->iskey);
+ rax_free(child);
+ rax->numnodes--;
+ h = raxStackPop(&ts);
+ /* If this node has more then one child, or actually holds
+ * a key, stop here. */
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ if (child) {
+ debugf("Unlinking child %p from parent %p\n",
+ (void*)child, (void*)h);
+ raxNode *new = raxRemoveChild(h,child);
+ if (new != h) {
+ raxNode *parent = raxStackPeek(&ts);
+ raxNode **parentlink;
+ if (parent == NULL) {
+ parentlink = &rax->head;
+ } else {
+ parentlink = raxFindParentLink(parent,h);
+ }
+ memcpy(parentlink,&new,sizeof(new));
+ }
+
+ /* If after the removal the node has just a single child
+ * and is not a key, we need to try to compress it. */
+ if (new->size == 1 && new->iskey == 0) {
+ trycompress = 1;
+ h = new;
+ }
+ }
+ } else if (h->size == 1) {
+ /* If the node had just one child, after the removal of the key
+ * further compression with adjacent nodes is pontentially possible. */
+ trycompress = 1;
+ }
+
+ /* Don't try node compression if our nodes pointers stack is not
+ * complete because of OOM while executing raxLowWalk() */
+ if (trycompress && ts.oom) trycompress = 0;
+
+ /* Recompression: if trycompress is true, 'h' points to a radix tree node
+ * that changed in a way that could allow to compress nodes in this
+ * sub-branch. Compressed nodes represent chains of nodes that are not
+ * keys and have a single child, so there are two deletion events that
+ * may alter the tree so that further compression is needed:
+ *
+ * 1) A node with a single child was a key and now no longer is a key.
+ * 2) A node with two children now has just one child.
+ *
+ * We try to navigate upward till there are other nodes that can be
+ * compressed, when we reach the upper node which is not a key and has
+ * a single child, we scan the chain of children to collect the
+ * compressable part of the tree, and replace the current node with the
+ * new one, fixing the child pointer to reference the first non
+ * compressable node.
+ *
+ * Example of case "1". A tree stores the keys "FOO" = 1 and
+ * "FOOBAR" = 2:
+ *
+ *
+ * "FOO" -> "BAR" -> [] (2)
+ * (1)
+ *
+ * After the removal of "FOO" the tree can be compressed as:
+ *
+ * "FOOBAR" -> [] (2)
+ *
+ *
+ * Example of case "2". A tree stores the keys "FOOBAR" = 1 and
+ * "FOOTER" = 2:
+ *
+ * |B| -> "AR" -> [] (1)
+ * "FOO" -> |-|
+ * |T| -> "ER" -> [] (2)
+ *
+ * After the removal of "FOOTER" the resulting tree is:
+ *
+ * "FOO" -> |B| -> "AR" -> [] (1)
+ *
+ * That can be compressed into:
+ *
+ * "FOOBAR" -> [] (1)
+ */
+ if (trycompress) {
+ debugf("After removing %.*s:\n", (int)len, s);
+ debugnode("Compression may be needed",h);
+ debugf("Seek start node\n");
+
+ /* Try to reach the upper node that is compressible.
+ * At the end of the loop 'h' will point to the first node we
+ * can try to compress and 'parent' to its parent. */
+ raxNode *parent;
+ while(1) {
+ parent = raxStackPop(&ts);
+ if (!parent || parent->iskey ||
+ (!parent->iscompr && parent->size != 1)) break;
+ h = parent;
+ debugnode("Going up to",h);
+ }
+ raxNode *start = h; /* Compression starting node. */
+
+ /* Scan chain of nodes we can compress. */
+ size_t comprsize = h->size;
+ int nodes = 1;
+ while(h->size != 0) {
+ raxNode **cp = raxNodeLastChildPtr(h);
+ memcpy(&h,cp,sizeof(h));
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ /* Stop here if going to the next node would result into
+ * a compressed node larger than h->size can hold. */
+ if (comprsize + h->size > RAX_NODE_MAX_SIZE) break;
+ nodes++;
+ comprsize += h->size;
+ }
+ if (nodes > 1) {
+ /* If we can compress, create the new node and populate it. */
+ size_t nodesize =
+ sizeof(raxNode)+comprsize+sizeof(raxNode*);
+ raxNode *new = rax_malloc(nodesize);
+ /* An out of memory here just means we cannot optimize this
+ * node, but the tree is left in a consistent state. */
+ if (new == NULL) {
+ raxStackFree(&ts);
+ return 1;
+ }
+ new->iskey = 0;
+ new->isnull = 0;
+ new->iscompr = 1;
+ new->size = comprsize;
+ rax->numnodes++;
+
+ /* Scan again, this time to populate the new node content and
+ * to fix the new node child pointer. At the same time we free
+ * all the nodes that we'll no longer use. */
+ comprsize = 0;
+ h = start;
+ while(h->size != 0) {
+ memcpy(new->data+comprsize,h->data,h->size);
+ comprsize += h->size;
+ raxNode **cp = raxNodeLastChildPtr(h);
+ raxNode *tofree = h;
+ memcpy(&h,cp,sizeof(h));
+ rax_free(tofree); rax->numnodes--;
+ if (h->iskey || (!h->iscompr && h->size != 1)) break;
+ }
+ debugnode("New node",new);
+
+ /* Now 'h' points to the first node that we still need to use,
+ * so our new node child pointer will point to it. */
+ raxNode **cp = raxNodeLastChildPtr(new);
+ memcpy(cp,&h,sizeof(h));
+
+ /* Fix parent link. */
+ if (parent) {
+ raxNode **parentlink = raxFindParentLink(parent,start);
+ memcpy(parentlink,&new,sizeof(new));
+ } else {
+ rax->head = new;
+ }
+
+ debugf("Compressed %d nodes, %d total bytes\n",
+ nodes, (int)comprsize);
+ }
+ }
+ raxStackFree(&ts);
+ return 1;
+}
+
+/* This is the core of raxFree(): performs a depth-first scan of the
+ * tree and releases all the nodes found. */
+void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) {
+ debugnode("free traversing",n);
+ int numchildren = n->iscompr ? 1 : n->size;
+ raxNode **cp = raxNodeLastChildPtr(n);
+ while(numchildren--) {
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveFree(rax,child,free_callback);
+ cp--;
+ }
+ debugnode("free depth-first",n);
+ if (free_callback && n->iskey && !n->isnull)
+ free_callback(raxGetData(n));
+ rax_free(n);
+ rax->numnodes--;
+}
+
+/* Free a whole radix tree, calling the specified callback in order to
+ * free the auxiliary data. */
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) {
+ raxRecursiveFree(rax,rax->head,free_callback);
+ assert(rax->numnodes == 0);
+ rax_free(rax);
+}
+
+/* Free a whole radix tree. */
+void raxFree(rax *rax) {
+ raxFreeWithCallback(rax,NULL);
+}
+
+/* ------------------------------- Iterator --------------------------------- */
+
+/* Initialize a Rax iterator. This call should be performed a single time
+ * to initialize the iterator, and must be followed by a raxSeek() call,
+ * otherwise the raxPrev()/raxNext() functions will just return EOF. */
+void raxStart(raxIterator *it, rax *rt) {
+ it->flags = RAX_ITER_EOF; /* No crash if the iterator is not seeked. */
+ it->rt = rt;
+ it->key_len = 0;
+ it->key = it->key_static_string;
+ it->key_max = RAX_ITER_STATIC_LEN;
+ it->data = NULL;
+ it->node_cb = NULL;
+ raxStackInit(&it->stack);
+}
+
+/* Append characters at the current key string of the iterator 'it'. This
+ * is a low level function used to implement the iterator, not callable by
+ * the user. Returns 0 on out of memory, otherwise 1 is returned. */
+int raxIteratorAddChars(raxIterator *it, unsigned char *s, size_t len) {
+ if (it->key_max < it->key_len+len) {
+ unsigned char *old = (it->key == it->key_static_string) ? NULL :
+ it->key;
+ size_t new_max = (it->key_len+len)*2;
+ it->key = rax_realloc(old,new_max);
+ if (it->key == NULL) {
+ it->key = (!old) ? it->key_static_string : old;
+ errno = ENOMEM;
+ return 0;
+ }
+ if (old == NULL) memcpy(it->key,it->key_static_string,it->key_len);
+ it->key_max = new_max;
+ }
+ /* Use memmove since there could be an overlap between 's' and
+ * it->key when we use the current key in order to re-seek. */
+ memmove(it->key+it->key_len,s,len);
+ it->key_len += len;
+ return 1;
+}
+
+/* Remove the specified number of chars from the right of the current
+ * iterator key. */
+void raxIteratorDelChars(raxIterator *it, size_t count) {
+ it->key_len -= count;
+}
+
+/* Do an iteration step towards the next element. At the end of the step the
+ * iterator key will represent the (new) current key. If it is not possible
+ * to step in the specified direction since there are no longer elements, the
+ * iterator is flagged with RAX_ITER_EOF.
+ *
+ * If 'noup' is true the function starts directly scanning for the next
+ * lexicographically smaller children, and the current node is already assumed
+ * to be the parent of the last key node, so the first operation to go back to
+ * the parent will be skipped. This option is used by raxSeek() when
+ * implementing seeking a non existing element with the ">" or "<" options:
+ * the starting node is not a key in that particular case, so we start the scan
+ * from a node that does not represent the key set.
+ *
+ * The function returns 1 on success or 0 on out of memory. */
+int raxIteratorNextStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 1;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ while(1) {
+ int children = it->node->iscompr ? 1 : it->node->size;
+ if (!noup && children) {
+ debugf("GO DEEPER\n");
+ /* Seek the lexicographically smaller key in this subtree, which
+ * is the first one found always going torwards the first child
+ * of every successive node. */
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->iscompr ? it->node->size : 1)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Call the node callback if any, and replace the node pointer
+ * if the callback returns true. */
+ if (it->node_cb && it->node_cb(&it->node))
+ memcpy(cp,&it->node,sizeof(it->node));
+ /* For "next" step, stop every time we find a key along the
+ * way, since the key is lexicograhically smaller compared to
+ * what follows in the sub-children. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ } else {
+ /* If we finished exporing the previous sub-tree, switch to the
+ * new one: go upper until a node is found where there are
+ * children representing keys lexicographically greater than the
+ * current key. */
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+ /* If there are no children at the current node, try parent's
+ * next child. */
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the next child if there was at least one
+ * additional child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeFirstChildPtr(it->node);
+ int i = 0;
+ while (i < it->node->size) {
+ debugf("SCAN NEXT %c\n", it->node->data[i]);
+ if (it->node->data[i] > prevchild) break;
+ i++;
+ cp++;
+ }
+ if (i != it->node->size) {
+ debugf("SCAN found a new node\n");
+ raxIteratorAddChars(it,it->node->data+i,1);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Call the node callback if any, and replace the node
+ * pointer if the callback returns true. */
+ if (it->node_cb && it->node_cb(&it->node))
+ memcpy(cp,&it->node,sizeof(it->node));
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+/* Seek the grestest key in the subtree at the current node. Return 0 on
+ * out of memory, otherwise 1. This is an helper function for different
+ * iteration functions below. */
+int raxSeekGreatest(raxIterator *it) {
+ while(it->node->size) {
+ if (it->node->iscompr) {
+ if (!raxIteratorAddChars(it,it->node->data,
+ it->node->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data+it->node->size-1,1))
+ return 0;
+ }
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ }
+ return 1;
+}
+
+/* Like raxIteratorNextStep() but implements an iteration step moving
+ * to the lexicographically previous element. The 'noup' option has a similar
+ * effect to the one of raxIteratorNextStep(). */
+int raxIteratorPrevStep(raxIterator *it, int noup) {
+ if (it->flags & RAX_ITER_EOF) {
+ return 1;
+ } else if (it->flags & RAX_ITER_JUST_SEEKED) {
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ return 1;
+ }
+
+ /* Save key len, stack items and the node where we are currently
+ * so that on iterator EOF we can restore the current key and state. */
+ size_t orig_key_len = it->key_len;
+ size_t orig_stack_items = it->stack.items;
+ raxNode *orig_node = it->node;
+
+ while(1) {
+ int old_noup = noup;
+
+ /* Already on head? Can't go up, iteration finished. */
+ if (!noup && it->node == it->rt->head) {
+ it->flags |= RAX_ITER_EOF;
+ it->stack.items = orig_stack_items;
+ it->key_len = orig_key_len;
+ it->node = orig_node;
+ return 1;
+ }
+
+ unsigned char prevchild = it->key[it->key_len-1];
+ if (!noup) {
+ it->node = raxStackPop(&it->stack);
+ } else {
+ noup = 0;
+ }
+
+ /* Adjust the current key to represent the node we are
+ * at. */
+ int todel = it->node->iscompr ? it->node->size : 1;
+ raxIteratorDelChars(it,todel);
+
+ /* Try visiting the prev child if there is at least one
+ * child. */
+ if (!it->node->iscompr && it->node->size > (old_noup ? 0 : 1)) {
+ raxNode **cp = raxNodeLastChildPtr(it->node);
+ int i = it->node->size-1;
+ while (i >= 0) {
+ debugf("SCAN PREV %c\n", it->node->data[i]);
+ if (it->node->data[i] < prevchild) break;
+ i--;
+ cp--;
+ }
+ /* If we found a new subtree to explore in this node,
+ * go deeper following all the last children in order to
+ * find the key lexicographically greater. */
+ if (i != -1) {
+ debugf("SCAN found a new node\n");
+ /* Enter the node we just found. */
+ if (!raxIteratorAddChars(it,it->node->data+i,1)) return 0;
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ memcpy(&it->node,cp,sizeof(it->node));
+ /* Seek sub-tree max. */
+ if (!raxSeekGreatest(it)) return 0;
+ }
+ }
+
+ /* Return the key: this could be the key we found scanning a new
+ * subtree, or if we did not find a new subtree to explore here,
+ * before giving up with this node, check if it's a key itself. */
+ if (it->node->iskey) {
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+ }
+}
+
+/* Seek an iterator at the specified element.
+ * Return 0 if the seek failed for syntax error or out of memory. Otherwise
+ * 1 is returned. When 0 is returned for out of memory, errno is set to
+ * the ENOMEM value. */
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) {
+ int eq = 0, lt = 0, gt = 0, first = 0, last = 0;
+
+ it->stack.items = 0; /* Just resetting. Intialized by raxStart(). */
+ it->flags |= RAX_ITER_JUST_SEEKED;
+ it->flags &= ~RAX_ITER_EOF;
+ it->key_len = 0;
+ it->node = NULL;
+
+ /* Set flags according to the operator used to perform the seek. */
+ if (op[0] == '>') {
+ gt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '<') {
+ lt = 1;
+ if (op[1] == '=') eq = 1;
+ } else if (op[0] == '=') {
+ eq = 1;
+ } else if (op[0] == '^') {
+ first = 1;
+ } else if (op[0] == '$') {
+ last = 1;
+ } else {
+ errno = 0;
+ return 0; /* Error. */
+ }
+
+ /* If there are no elements, set the EOF condition immediately and
+ * return. */
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+
+ if (first) {
+ /* Seeking the first key greater or equal to the empty string
+ * is equivalent to seeking the smaller key available. */
+ return raxSeek(it,">=",NULL,0);
+ }
+
+ if (last) {
+ /* Find the greatest key taking always the last child till a
+ * final node is found. */
+ it->node = it->rt->head;
+ if (!raxSeekGreatest(it)) return 0;
+ assert(it->node->iskey);
+ it->data = raxGetData(it->node);
+ return 1;
+ }
+
+ /* We need to seek the specified key. What we do here is to actually
+ * perform a lookup, and later invoke the prev/next key code that
+ * we already use for iteration. */
+ int splitpos = 0;
+ size_t i = raxLowWalk(it->rt,ele,len,&it->node,NULL,&splitpos,&it->stack);
+
+ /* Return OOM on incomplete stack info. */
+ if (it->stack.oom) return 0;
+
+ if (eq && i == len && (!it->node->iscompr || splitpos == 0) &&
+ it->node->iskey)
+ {
+ /* We found our node, since the key matches and we have an
+ * "equal" condition. */
+ if (!raxIteratorAddChars(it,ele,len)) return 0; /* OOM. */
+ it->data = raxGetData(it->node);
+ } else if (lt || gt) {
+ /* Exact key not found or eq flag not set. We have to set as current
+ * key the one represented by the node we stopped at, and perform
+ * a next/prev operation to seek. To reconstruct the key at this node
+ * we start from the parent and go to the current node, accumulating
+ * the characters found along the way. */
+ if (!raxStackPush(&it->stack,it->node)) return 0;
+ for (size_t j = 1; j < it->stack.items; j++) {
+ raxNode *parent = it->stack.stack[j-1];
+ raxNode *child = it->stack.stack[j];
+ if (parent->iscompr) {
+ if (!raxIteratorAddChars(it,parent->data,parent->size))
+ return 0;
+ } else {
+ raxNode **cp = raxNodeFirstChildPtr(parent);
+ unsigned char *p = parent->data;
+ while(1) {
+ raxNode *aux;
+ memcpy(&aux,cp,sizeof(aux));
+ if (aux == child) break;
+ cp++;
+ p++;
+ }
+ if (!raxIteratorAddChars(it,p,1)) return 0;
+ }
+ }
+ raxStackPop(&it->stack);
+
+ /* We need to set the iterator in the correct state to call next/prev
+ * step in order to seek the desired element. */
+ debugf("After initial seek: i=%d len=%d key=%.*s\n",
+ (int)i, (int)len, (int)it->key_len, it->key);
+ if (i != len && !it->node->iscompr) {
+ /* If we stopped in the middle of a normal node because of a
+ * mismatch, add the mismatching character to the current key
+ * and call the iterator with the 'noup' flag so that it will try
+ * to seek the next/prev child in the current node directly based
+ * on the mismatching character. */
+ if (!raxIteratorAddChars(it,ele+i,1)) return 0;
+ debugf("Seek normal node on mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (lt && !raxIteratorPrevStep(it,1)) return 0;
+ if (gt && !raxIteratorNextStep(it,1)) return 0;
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else if (i != len && it->node->iscompr) {
+ debugf("Compressed mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* In case of a mismatch within a compressed node. */
+ int nodechar = it->node->data[splitpos];
+ int keychar = ele[i];
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (gt) {
+ /* If the key the compressed node represents is greater
+ * than our seek element, continue forward, otherwise set the
+ * state in order to go back to the next sub-tree. */
+ if (nodechar > keychar) {
+ if (!raxIteratorNextStep(it,0)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorNextStep(it,1)) return 0;
+ }
+ }
+ if (lt) {
+ /* If the key the compressed node represents is smaller
+ * than our seek element, seek the greater key in this
+ * subtree, otherwise set the state in order to go back to
+ * the previous sub-tree. */
+ if (nodechar < keychar) {
+ if (!raxSeekGreatest(it)) return 0;
+ it->data = raxGetData(it->node);
+ } else {
+ if (!raxIteratorAddChars(it,it->node->data,it->node->size))
+ return 0;
+ if (!raxIteratorPrevStep(it,1)) return 0;
+ }
+ }
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ } else {
+ debugf("No mismatch: %.*s\n",
+ (int)it->key_len, (char*)it->key);
+ /* If there was no mismatch we are into a node representing the
+ * key, (but which is not a key or the seek operator does not
+ * include 'eq'), or we stopped in the middle of a compressed node
+ * after processing all the key. Continue iterating as this was
+ * a legitimate key we stopped at. */
+ it->flags &= ~RAX_ITER_JUST_SEEKED;
+ if (it->node->iscompr && it->node->iskey && splitpos && lt) {
+ /* If we stopped in the middle of a compressed node with
+ * perfect match, and the condition is to seek a key "<" than
+ * the specified one, then if this node is a key it already
+ * represents our match. For instance we may have nodes:
+ *
+ * "f" -> "oobar" = 1 -> "" = 2
+ *
+ * Representing keys "f" = 1, "foobar" = 2. A seek for
+ * the key < "foo" will stop in the middle of the "oobar"
+ * node, but will be our match, representing the key "f".
+ *
+ * So in that case, we don't seek backward. */
+ } else {
+ if (gt && !raxIteratorNextStep(it,0)) return 0;
+ if (lt && !raxIteratorPrevStep(it,0)) return 0;
+ }
+ it->flags |= RAX_ITER_JUST_SEEKED; /* Ignore next call. */
+ }
+ } else {
+ /* If we are here just eq was set but no match was found. */
+ it->flags |= RAX_ITER_EOF;
+ return 1;
+ }
+ return 1;
+}
+
+/* Go to the next element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxNext(raxIterator *it) {
+ if (!raxIteratorNextStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Go to the previous element in the scope of the iterator 'it'.
+ * If EOF (or out of memory) is reached, 0 is returned, otherwise 1 is
+ * returned. In case 0 is returned because of OOM, errno is set to ENOMEM. */
+int raxPrev(raxIterator *it) {
+ if (!raxIteratorPrevStep(it,0)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (it->flags & RAX_ITER_EOF) {
+ errno = 0;
+ return 0;
+ }
+ return 1;
+}
+
+/* Perform a random walk starting in the current position of the iterator.
+ * Return 0 if the tree is empty or on out of memory. Otherwise 1 is returned
+ * and the iterator is set to the node reached after doing a random walk
+ * of 'steps' steps. If the 'steps' argument is 0, the random walk is performed
+ * using a random number of steps between 1 and two times the logarithm of
+ * the number of elements.
+ *
+ * NOTE: if you use this function to generate random elements from the radix
+ * tree, expect a disappointing distribution. A random walk produces good
+ * random elements if the tree is not sparse, however in the case of a radix
+ * tree certain keys will be reported much more often than others. At least
+ * this function should be able to expore every possible element eventually. */
+int raxRandomWalk(raxIterator *it, size_t steps) {
+ if (it->rt->numele == 0) {
+ it->flags |= RAX_ITER_EOF;
+ return 0;
+ }
+
+ if (steps == 0) {
+ size_t fle = floor(log(it->rt->numele));
+ fle *= 2;
+ steps = 1 + rand() % fle;
+ }
+
+ raxNode *n = it->node;
+ while(steps > 0 || !n->iskey) {
+ int numchildren = n->iscompr ? 1 : n->size;
+ int r = rand() % (numchildren+(n != it->rt->head));
+
+ if (r == numchildren) {
+ /* Go up to parent. */
+ n = raxStackPop(&it->stack);
+ int todel = n->iscompr ? n->size : 1;
+ raxIteratorDelChars(it,todel);
+ } else {
+ /* Select a random child. */
+ if (n->iscompr) {
+ if (!raxIteratorAddChars(it,n->data,n->size)) return 0;
+ } else {
+ if (!raxIteratorAddChars(it,n->data+r,1)) return 0;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n)+r;
+ if (!raxStackPush(&it->stack,n)) return 0;
+ memcpy(&n,cp,sizeof(n));
+ }
+ if (n->iskey) steps--;
+ }
+ it->node = n;
+ return 1;
+}
+
+/* Compare the key currently pointed by the iterator to the specified
+ * key according to the specified operator. Returns 1 if the comparison is
+ * true, otherwise 0 is returned. */
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len) {
+ int eq = 0, lt = 0, gt = 0;
+
+ if (op[0] == '=' || op[1] == '=') eq = 1;
+ if (op[0] == '>') gt = 1;
+ else if (op[0] == '<') lt = 1;
+ else if (op[1] != '=') return 0; /* Syntax error. */
+
+ size_t minlen = key_len < iter->key_len ? key_len : iter->key_len;
+ int cmp = memcmp(iter->key,key,minlen);
+
+ /* Handle == */
+ if (lt == 0 && gt == 0) return cmp == 0 && key_len == iter->key_len;
+
+ /* Handle >, >=, <, <= */
+ if (cmp == 0) {
+ /* Same prefix: longer wins. */
+ if (eq && key_len == iter->key_len) return 1;
+ else if (lt) return iter->key_len < key_len;
+ else if (gt) return iter->key_len > key_len;
+ } if (cmp > 0) {
+ return gt ? 1 : 0;
+ } else /* (cmp < 0) */ {
+ return lt ? 1 : 0;
+ }
+}
+
+/* Free the iterator. */
+void raxStop(raxIterator *it) {
+ if (it->key != it->key_static_string) rax_free(it->key);
+ raxStackFree(&it->stack);
+}
+
+/* Return if the iterator is in an EOF state. This happens when raxSeek()
+ * failed to seek an appropriate element, so that raxNext() or raxPrev()
+ * will return zero, or when an EOF condition was reached while iterating
+ * with raxNext() and raxPrev(). */
+int raxEOF(raxIterator *it) {
+ return it->flags & RAX_ITER_EOF;
+}
+
+/* Return the number of elements inside the radix tree. */
+uint64_t raxSize(rax *rax) {
+ return rax->numele;
+}
+
+/* ----------------------------- Introspection ------------------------------ */
+
+/* This function is mostly used for debugging and learning purposes.
+ * It shows an ASCII representation of a tree on standard output, outling
+ * all the nodes and the contained keys.
+ *
+ * The representation is as follow:
+ *
+ * "foobar" (compressed node)
+ * [abc] (normal node with three children)
+ * [abc]=0x12345678 (node is a key, pointing to value 0x12345678)
+ * [] (a normal empty node)
+ *
+ * Children are represented in new idented lines, each children prefixed by
+ * the "`-(x)" string, where "x" is the edge byte.
+ *
+ * [abc]
+ * `-(a) "ladin"
+ * `-(b) [kj]
+ * `-(c) []
+ *
+ * However when a node has a single child the following representation
+ * is used instead:
+ *
+ * [abc] -> "ladin" -> []
+ */
+
+/* The actual implementation of raxShow(). */
+void raxRecursiveShow(int level, int lpad, raxNode *n) {
+ char s = n->iscompr ? '"' : '[';
+ char e = n->iscompr ? '"' : ']';
+
+ int numchars = printf("%c%.*s%c", s, n->size, n->data, e);
+ if (n->iskey) {
+ numchars += printf("=%p",raxGetData(n));
+ }
+
+ int numchildren = n->iscompr ? 1 : n->size;
+ /* Note that 7 and 4 magic constants are the string length
+ * of " `-(x) " and " -> " respectively. */
+ if (level) {
+ lpad += (numchildren > 1) ? 7 : 4;
+ if (numchildren == 1) lpad += numchars;
+ }
+ raxNode **cp = raxNodeFirstChildPtr(n);
+ for (int i = 0; i < numchildren; i++) {
+ char *branch = " `-(%c) ";
+ if (numchildren > 1) {
+ printf("\n");
+ for (int j = 0; j < lpad; j++) putchar(' ');
+ printf(branch,n->data[i]);
+ } else {
+ printf(" -> ");
+ }
+ raxNode *child;
+ memcpy(&child,cp,sizeof(child));
+ raxRecursiveShow(level+1,lpad,child);
+ cp++;
+ }
+}
+
+/* Show a tree, as outlined in the comment above. */
+void raxShow(rax *rax) {
+ raxRecursiveShow(0,0,rax->head);
+ putchar('\n');
+}
+
+/* Used by debugnode() macro to show info about a given node. */
+void raxDebugShowNode(const char *msg, raxNode *n) {
+ printf("%s: %p [%.*s] key:%d size:%d children:",
+ msg, (void*)n, (int)n->size, (char*)n->data, n->iskey, n->size);
+ int numcld = n->iscompr ? 1 : n->size;
+ raxNode **cldptr = raxNodeLastChildPtr(n) - (numcld-1);
+ while(numcld--) {
+ raxNode *child;
+ memcpy(&child,cldptr,sizeof(child));
+ cldptr++;
+ printf("%p ", (void*)child);
+ }
+ printf("\n");
+ fflush(stdout);
+}
+
+
diff --git a/src/rax.h b/src/rax.h
new file mode 100644
index 000000000..43fceea35
--- /dev/null
+++ b/src/rax.h
@@ -0,0 +1,184 @@
+#ifndef RAX_H
+#define RAX_H
+
+#include <stdint.h>
+
+/* Representation of a radix tree as implemented in this file, that contains
+ * the strings "foo", "foobar" and "footer" after the insertion of each
+ * word. When the node represents a key inside the radix tree, we write it
+ * between [], otherwise it is written between ().
+ *
+ * This is the vanilla representation:
+ *
+ * (f) ""
+ * \
+ * (o) "f"
+ * \
+ * (o) "fo"
+ * \
+ * [t b] "foo"
+ * / \
+ * "foot" (e) (a) "foob"
+ * / \
+ * "foote" (r) (r) "fooba"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However, this implementation implements a very common optimization where
+ * successive nodes having a single child are "compressed" into the node
+ * itself as a string of characters, each representing a next-level child,
+ * and only the link to the node representing the last character node is
+ * provided inside the representation. So the above representation is turend
+ * into:
+ *
+ * ["foo"] ""
+ * |
+ * [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * However this optimization makes the implementation a bit more complex.
+ * For instance if a key "first" is added in the above radix tree, a
+ * "node splitting" operation is needed, since the "foo" prefix is no longer
+ * composed of nodes having a single child one after the other. This is the
+ * above tree and the resulting node splitting after this event happens:
+ *
+ *
+ * (f) ""
+ * /
+ * (i o) "f"
+ * / \
+ * "firs" ("rst") (o) "fo"
+ * / \
+ * "first" [] [t b] "foo"
+ * / \
+ * "foot" ("er") ("ar") "foob"
+ * / \
+ * "footer" [] [] "foobar"
+ *
+ * Similarly after deletion, if a new chain of nodes having a single child
+ * is created (the chain must also not include nodes that represent keys),
+ * it must be compressed back into a single node.
+ *
+ */
+
+#define RAX_NODE_MAX_SIZE ((1<<29)-1)
+typedef struct raxNode {
+ uint32_t iskey:1; /* Does this node contain a key? */
+ uint32_t isnull:1; /* Associated value is NULL (don't store it). */
+ uint32_t iscompr:1; /* Node is compressed. */
+ uint32_t size:29; /* Number of children, or compressed string len. */
+ /* Data layout is as follows:
+ *
+ * If node is not compressed we have 'size' bytes, one for each children
+ * character, and 'size' raxNode pointers, point to each child node.
+ * Note how the character is not stored in the children but in the
+ * edge of the parents:
+ *
+ * [header strlen=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
+ *
+ * if node is compressed (strlen != 0) the node has 1 children.
+ * In that case the 'size' bytes of the string stored immediately at
+ * the start of the data section, represent a sequence of successive
+ * nodes linked one after the other, for which only the last one in
+ * the sequence is actually represented as a node, and pointed to by
+ * the current compressed node.
+ *
+ * [header strlen=3][xyz][z-ptr](value-ptr?)
+ *
+ * Both compressed and not compressed nodes can represent a key
+ * with associated data in the radix tree at any level (not just terminal
+ * nodes).
+ *
+ * If the node has an associated key (iskey=1) and is not NULL
+ * (isnull=0), then after the raxNode pointers poiting to the
+ * children, an additional value pointer is present (as you can see
+ * in the representation above as "value-ptr" field).
+ */
+ unsigned char data[];
+} raxNode;
+
+typedef struct rax {
+ raxNode *head;
+ uint64_t numele;
+ uint64_t numnodes;
+} rax;
+
+/* Stack data structure used by raxLowWalk() in order to, optionally, return
+ * a list of parent nodes to the caller. The nodes do not have a "parent"
+ * field for space concerns, so we use the auxiliary stack when needed. */
+#define RAX_STACK_STATIC_ITEMS 32
+typedef struct raxStack {
+ void **stack; /* Points to static_items or an heap allocated array. */
+ size_t items, maxitems; /* Number of items contained and total space. */
+ /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
+ * and use this static array of pointers instead. */
+ void *static_items[RAX_STACK_STATIC_ITEMS];
+ int oom; /* True if pushing into this stack failed for OOM at some point. */
+} raxStack;
+
+/* Optional callback used for iterators and be notified on each rax node,
+ * including nodes not representing keys. If the callback returns true
+ * the callback changed the node pointer in the iterator structure, and the
+ * iterator implementation will have to replace the pointer in the radix tree
+ * internals. This allows the callback to reallocate the node to perform
+ * very special operations, normally not needed by normal applications.
+ *
+ * This callback is used to perform very low level analysis of the radix tree
+ * structure, scanning each possible node (but the root node), or in order to
+ * reallocate the nodes to reduce the allocation fragmentation (this is the
+ * Redis application for this callback).
+ *
+ * This is currently only supported in forward iterations (raxNext) */
+typedef int (*raxNodeCallback)(raxNode **noderef);
+
+/* Radix tree iterator state is encapsulated into this data structure. */
+#define RAX_ITER_STATIC_LEN 128
+#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
+ element for the first iteration and
+ clear the flag. */
+#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
+#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
+ iterating. But it is slower. */
+typedef struct raxIterator {
+ int flags;
+ rax *rt; /* Radix tree we are iterating. */
+ unsigned char *key; /* The current string. */
+ void *data; /* Data associated to this key. */
+ size_t key_len; /* Current key length. */
+ size_t key_max; /* Max key len the current key buffer can hold. */
+ unsigned char key_static_string[RAX_ITER_STATIC_LEN];
+ raxNode *node; /* Current node. Only for unsafe iteration. */
+ raxStack stack; /* Stack used for unsafe iteration. */
+ raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
+} raxIterator;
+
+/* A special pointer returned for not found items. */
+extern void *raxNotFound;
+
+/* Exported API. */
+rax *raxNew(void);
+int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
+int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
+void *raxFind(rax *rax, unsigned char *s, size_t len);
+void raxFree(rax *rax);
+void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
+void raxStart(raxIterator *it, rax *rt);
+int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
+int raxNext(raxIterator *it);
+int raxPrev(raxIterator *it);
+int raxRandomWalk(raxIterator *it, size_t steps);
+int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
+void raxStop(raxIterator *it);
+int raxEOF(raxIterator *it);
+void raxShow(rax *rax);
+uint64_t raxSize(rax *rax);
+
+/* Internal API. May be used by the node callback in order to access rax nodes
+ * in a low level way, so this function is exported as well. */
+void raxSetData(raxNode *n, void *data);
+
+#endif
diff --git a/src/rax_malloc.h b/src/rax_malloc.h
new file mode 100644
index 000000000..9295985c6
--- /dev/null
+++ b/src/rax_malloc.h
@@ -0,0 +1,44 @@
+/* Rax -- A radix tree implementation.
+ *
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Allocator selection.
+ *
+ * This file is used in order to change the Rax allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#ifndef RAX_ALLOC_H
+#define RAX_ALLOC_H
+#include "zmalloc.h"
+#define rax_malloc zmalloc
+#define rax_realloc zrealloc
+#define rax_free zfree
+#endif
diff --git a/src/rdb.c b/src/rdb.c
index 36ba151c7..3e43cb4e4 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -27,10 +27,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "lzf.h" /* LZF compression library */
#include "zipmap.h"
#include "endianconv.h"
+#include "stream.h"
#include <math.h>
#include <sys/types.h>
@@ -39,10 +40,34 @@
#include <sys/wait.h>
#include <arpa/inet.h>
#include <sys/stat.h>
+#include <sys/param.h>
-#define RDB_LOAD_NONE 0
-#define RDB_LOAD_ENC (1<<0)
-#define RDB_LOAD_PLAIN (1<<1)
+#define rdbExitReportCorruptRDB(...) rdbCheckThenExit(__LINE__,__VA_ARGS__)
+
+extern int rdbCheckMode;
+void rdbCheckError(const char *fmt, ...);
+void rdbCheckSetError(const char *fmt, ...);
+
+void rdbCheckThenExit(int linenum, char *reason, ...) {
+ va_list ap;
+ char msg[1024];
+ int len;
+
+ len = snprintf(msg,sizeof(msg),
+ "Internal error in RDB reading function at rdb.c:%d -> ", linenum);
+ va_start(ap,reason);
+ vsnprintf(msg+len,sizeof(msg)-len,reason,ap);
+ va_end(ap);
+
+ if (!rdbCheckMode) {
+ serverLog(LL_WARNING, "%s", msg);
+ char *argv[2] = {"",server.rdb_filename};
+ redis_check_rdb_main(2,argv,NULL);
+ } else {
+ rdbCheckError("%s",msg);
+ }
+ exit(1);
+}
static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
if (rdb && rioWrite(rdb,p,len) == 0)
@@ -50,6 +75,18 @@ static int rdbWriteRaw(rio *rdb, void *p, size_t len) {
return len;
}
+/* This is just a wrapper for the low level function rioRead() that will
+ * automatically abort if it is not possible to read the specified amount
+ * of bytes. */
+void rdbLoadRaw(rio *rdb, void *buf, uint64_t len) {
+ if (rioRead(rdb,buf,len) == 0) {
+ rdbExitReportCorruptRDB(
+ "Impossible to read %llu bytes in rdbLoadRaw()",
+ (unsigned long long) len);
+ return; /* Not reached. */
+ }
+}
+
int rdbSaveType(rio *rdb, unsigned char type) {
return rdbWriteRaw(rdb,&type,1);
}
@@ -63,79 +100,131 @@ int rdbLoadType(rio *rdb) {
return type;
}
+/* This is only used to load old databases stored with the RDB_OPCODE_EXPIRETIME
+ * opcode. New versions of Redis store using the RDB_OPCODE_EXPIRETIME_MS
+ * opcode. */
time_t rdbLoadTime(rio *rdb) {
int32_t t32;
- if (rioRead(rdb,&t32,4) == 0) return -1;
+ rdbLoadRaw(rdb,&t32,4);
return (time_t)t32;
}
int rdbSaveMillisecondTime(rio *rdb, long long t) {
int64_t t64 = (int64_t) t;
+ memrev64ifbe(&t64); /* Store in little endian. */
return rdbWriteRaw(rdb,&t64,8);
}
-long long rdbLoadMillisecondTime(rio *rdb) {
+/* This function loads a time from the RDB file. It gets the version of the
+ * RDB because, unfortunately, before Redis 5 (RDB version 9), the function
+ * failed to convert data to/from little endian, so RDB files with keys having
+ * expires could not be shared between big endian and little endian systems
+ * (because the expire time will be totally wrong). The fix for this is just
+ * to call memrev64ifbe(), however if we fix this for all the RDB versions,
+ * this call will introduce an incompatibility for big endian systems:
+ * after upgrading to Redis version 5 they will no longer be able to load their
+ * own old RDB files. Because of that, we instead fix the function only for new
+ * RDB versions, and load older RDB versions as we used to do in the past,
+ * allowing big endian systems to load their own old RDB files. */
+long long rdbLoadMillisecondTime(rio *rdb, int rdbver) {
int64_t t64;
- if (rioRead(rdb,&t64,8) == 0) return -1;
+ rdbLoadRaw(rdb,&t64,8);
+ if (rdbver >= 9) /* Check the top comment of this function. */
+ memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */
return (long long)t64;
}
/* Saves an encoded length. The first two bits in the first byte are used to
- * hold the encoding type. See the REDIS_RDB_* definitions for more information
+ * hold the encoding type. See the RDB_* definitions for more information
* on the types of encoding. */
-int rdbSaveLen(rio *rdb, uint32_t len) {
+int rdbSaveLen(rio *rdb, uint64_t len) {
unsigned char buf[2];
size_t nwritten;
if (len < (1<<6)) {
/* Save a 6 bit len */
- buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
+ buf[0] = (len&0xFF)|(RDB_6BITLEN<<6);
if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
nwritten = 1;
} else if (len < (1<<14)) {
/* Save a 14 bit len */
- buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
+ buf[0] = ((len>>8)&0xFF)|(RDB_14BITLEN<<6);
buf[1] = len&0xFF;
if (rdbWriteRaw(rdb,buf,2) == -1) return -1;
nwritten = 2;
- } else {
+ } else if (len <= UINT32_MAX) {
/* Save a 32 bit len */
- buf[0] = (REDIS_RDB_32BITLEN<<6);
+ buf[0] = RDB_32BITLEN;
if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
- len = htonl(len);
- if (rdbWriteRaw(rdb,&len,4) == -1) return -1;
+ uint32_t len32 = htonl(len);
+ if (rdbWriteRaw(rdb,&len32,4) == -1) return -1;
nwritten = 1+4;
+ } else {
+ /* Save a 64 bit len */
+ buf[0] = RDB_64BITLEN;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ len = htonu64(len);
+ if (rdbWriteRaw(rdb,&len,8) == -1) return -1;
+ nwritten = 1+8;
}
return nwritten;
}
-/* Load an encoded length. The "isencoded" argument is set to 1 if the length
- * is not actually a length but an "encoding type". See the REDIS_RDB_ENC_*
- * definitions in rdb.h for more information. */
-uint32_t rdbLoadLen(rio *rdb, int *isencoded) {
+
+/* Load an encoded length. If the loaded length is a normal length as stored
+ * with rdbSaveLen(), the read length is set to '*lenptr'. If instead the
+ * loaded length describes a special encoding that follows, then '*isencoded'
+ * is set to 1 and the encoding format is stored at '*lenptr'.
+ *
+ * See the RDB_ENC_* definitions in rdb.h for more information on special
+ * encodings.
+ *
+ * The function returns -1 on error, 0 on success. */
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr) {
unsigned char buf[2];
- uint32_t len;
int type;
if (isencoded) *isencoded = 0;
- if (rioRead(rdb,buf,1) == 0) return REDIS_RDB_LENERR;
+ if (rioRead(rdb,buf,1) == 0) return -1;
type = (buf[0]&0xC0)>>6;
- if (type == REDIS_RDB_ENCVAL) {
+ if (type == RDB_ENCVAL) {
/* Read a 6 bit encoding type. */
if (isencoded) *isencoded = 1;
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_6BITLEN) {
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_6BITLEN) {
/* Read a 6 bit len. */
- return buf[0]&0x3F;
- } else if (type == REDIS_RDB_14BITLEN) {
+ *lenptr = buf[0]&0x3F;
+ } else if (type == RDB_14BITLEN) {
/* Read a 14 bit len. */
- if (rioRead(rdb,buf+1,1) == 0) return REDIS_RDB_LENERR;
- return ((buf[0]&0x3F)<<8)|buf[1];
- } else {
+ if (rioRead(rdb,buf+1,1) == 0) return -1;
+ *lenptr = ((buf[0]&0x3F)<<8)|buf[1];
+ } else if (buf[0] == RDB_32BITLEN) {
/* Read a 32 bit len. */
- if (rioRead(rdb,&len,4) == 0) return REDIS_RDB_LENERR;
- return ntohl(len);
+ uint32_t len;
+ if (rioRead(rdb,&len,4) == 0) return -1;
+ *lenptr = ntohl(len);
+ } else if (buf[0] == RDB_64BITLEN) {
+ /* Read a 64 bit len. */
+ uint64_t len;
+ if (rioRead(rdb,&len,8) == 0) return -1;
+ *lenptr = ntohu64(len);
+ } else {
+ rdbExitReportCorruptRDB(
+ "Unknown length encoding %d in rdbLoadLen()",type);
+ return -1; /* Never reached. */
}
+ return 0;
+}
+
+/* This is like rdbLoadLenByRef() but directly returns the value read
+ * from the RDB stream, signaling an error by returning RDB_LENERR
+ * (since it is a too large count to be applicable in any Redis data
+ * structure). */
+uint64_t rdbLoadLen(rio *rdb, int *isencoded) {
+ uint64_t len;
+
+ if (rdbLoadLenByRef(rdb,isencoded,&len) == -1) return RDB_LENERR;
+ return len;
}
/* Encodes the "value" argument as integer when it fits in the supported ranges
@@ -144,16 +233,16 @@ uint32_t rdbLoadLen(rio *rdb, int *isencoded) {
* length is returned. Otherwise 0 is returned. */
int rdbEncodeInteger(long long value, unsigned char *enc) {
if (value >= -(1<<7) && value <= (1<<7)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT8;
enc[1] = value&0xFF;
return 2;
} else if (value >= -(1<<15) && value <= (1<<15)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT16;
enc[1] = value&0xFF;
enc[2] = (value>>8)&0xFF;
return 3;
} else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
- enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
+ enc[0] = (RDB_ENCVAL<<6)|RDB_ENC_INT32;
enc[1] = value&0xFF;
enc[2] = (value>>8)&0xFF;
enc[3] = (value>>16)&0xFF;
@@ -167,39 +256,41 @@ int rdbEncodeInteger(long long value, unsigned char *enc) {
/* Loads an integer-encoded object with the specified encoding type "enctype".
* The returned value changes according to the flags, see
* rdbGenerincLoadStringObject() for more info. */
-void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags) {
+void *rdbLoadIntegerObject(rio *rdb, int enctype, int flags, size_t *lenptr) {
int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
int encode = flags & RDB_LOAD_ENC;
unsigned char enc[4];
long long val;
- if (enctype == REDIS_RDB_ENC_INT8) {
+ if (enctype == RDB_ENC_INT8) {
if (rioRead(rdb,enc,1) == 0) return NULL;
val = (signed char)enc[0];
- } else if (enctype == REDIS_RDB_ENC_INT16) {
+ } else if (enctype == RDB_ENC_INT16) {
uint16_t v;
if (rioRead(rdb,enc,2) == 0) return NULL;
v = enc[0]|(enc[1]<<8);
val = (int16_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT32) {
+ } else if (enctype == RDB_ENC_INT32) {
uint32_t v;
if (rioRead(rdb,enc,4) == 0) return NULL;
v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
val = (int32_t)v;
} else {
val = 0; /* anti-warning */
- redisPanic("Unknown RDB integer encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB integer encoding type %d",enctype);
}
- if (plain) {
- char buf[REDIS_LONGSTR_SIZE], *p;
+ if (plain || sds) {
+ char buf[LONG_STR_SIZE], *p;
int len = ll2string(buf,sizeof(buf),val);
- p = zmalloc(len);
+ if (lenptr) *lenptr = len;
+ p = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
memcpy(p,buf,len);
return p;
} else if (encode) {
- return createStringObjectFromLongLong(val);
+ return createStringObjectFromLongLongForValue(val);
} else {
- return createObject(REDIS_STRING,sdsfromlonglong(val));
+ return createObject(OBJ_STRING,sdsfromlonglong(val));
}
}
@@ -222,13 +313,13 @@ int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
return rdbEncodeInteger(value,enc);
}
-int rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
- size_t original_len) {
+ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len,
+ size_t original_len) {
unsigned char byte;
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
/* Data compressed! Let's save it on disk */
- byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
+ byte = (RDB_ENCVAL<<6)|RDB_ENC_LZF;
if ((n = rdbWriteRaw(rdb,&byte,1)) == -1) goto writeerr;
nwritten += n;
@@ -247,7 +338,7 @@ writeerr:
return -1;
}
-int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
+ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
size_t comprlen, outlen;
void *out;
@@ -260,7 +351,7 @@ int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
zfree(out);
return 0;
}
- size_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
+ ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
zfree(out);
return nwritten;
}
@@ -268,32 +359,38 @@ int rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) {
/* Load an LZF compressed string in RDB format. The returned value
* changes according to 'flags'. For more info check the
* rdbGenericLoadStringObject() function. */
-void *rdbLoadLzfStringObject(rio *rdb, int flags) {
+void *rdbLoadLzfStringObject(rio *rdb, int flags, size_t *lenptr) {
int plain = flags & RDB_LOAD_PLAIN;
- unsigned int len, clen;
+ int sds = flags & RDB_LOAD_SDS;
+ uint64_t len, clen;
unsigned char *c = NULL;
- sds val = NULL;
+ char *val = NULL;
- if ((clen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((clen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
if ((c = zmalloc(clen)) == NULL) goto err;
/* Allocate our target according to the uncompressed size. */
if (plain) {
val = zmalloc(len);
} else {
- if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
+ val = sdsnewlen(SDS_NOINIT,len);
}
+ if (lenptr) *lenptr = len;
/* Load the compressed representation and uncompress it to target. */
if (rioRead(rdb,c,clen) == 0) goto err;
- if (lzf_decompress(c,clen,val,len) == 0) goto err;
+ if (lzf_decompress(c,clen,val,len) == 0) {
+ if (rdbCheckMode) rdbCheckSetError("Invalid LZF compressed string");
+ goto err;
+ }
zfree(c);
- if (plain)
+ if (plain || sds) {
return val;
- else
- return createObject(REDIS_STRING,val);
+ } else {
+ return createObject(OBJ_STRING,val);
+ }
err:
zfree(c);
if (plain)
@@ -305,9 +402,9 @@ err:
/* Save a string object as [len][data] on disk. If the object is a string
* representation of an integer value we try to save it in a special form */
-int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
int enclen;
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
/* Try integer encoding */
if (len <= 11) {
@@ -338,16 +435,16 @@ int rdbSaveRawString(rio *rdb, unsigned char *s, size_t len) {
}
/* Save a long long value as either an encoded string or a string. */
-int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
+ssize_t rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
unsigned char buf[32];
- int n, nwritten = 0;
+ ssize_t n, nwritten = 0;
int enclen = rdbEncodeInteger(value,buf);
if (enclen > 0) {
return rdbWriteRaw(rdb,buf,enclen);
} else {
/* Encode as string */
enclen = ll2string((char*)buf,32,value);
- redisAssert(enclen < 32);
+ serverAssert(enclen < 32);
if ((n = rdbSaveLen(rdb,enclen)) == -1) return -1;
nwritten += n;
if ((n = rdbWriteRaw(rdb,buf,enclen)) == -1) return -1;
@@ -356,14 +453,14 @@ int rdbSaveLongLongAsStringObject(rio *rdb, long long value) {
return nwritten;
}
-/* Like rdbSaveStringObjectRaw() but handle encoded objects */
-int rdbSaveStringObject(rio *rdb, robj *obj) {
+/* Like rdbSaveRawString() gets a Redis object instead. */
+ssize_t rdbSaveStringObject(rio *rdb, robj *obj) {
/* Avoid to decode the object, then encode it again, if the
* object is already integer encoded. */
- if (obj->encoding == REDIS_ENCODING_INT) {
+ if (obj->encoding == OBJ_ENCODING_INT) {
return rdbSaveLongLongAsStringObject(rdb,(long)obj->ptr);
} else {
- redisAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
+ serverAssertWithInfo(NULL,obj,sdsEncodedObject(obj));
return rdbSaveRawString(rdb,obj->ptr,sdslen(obj->ptr));
}
}
@@ -376,53 +473,61 @@ int rdbSaveStringObject(rio *rdb, robj *obj) {
* efficient. When this flag is passed the function
* no longer guarantees that obj->ptr is an SDS string.
* RDB_LOAD_PLAIN: Return a plain string allocated with zmalloc()
- * instead of a Redis object.
+ * instead of a Redis object with an sds in it.
+ * RDB_LOAD_SDS: Return an SDS string instead of a Redis object.
+ *
+ * On I/O error NULL is returned.
*/
-void *rdbGenericLoadStringObject(rio *rdb, int flags) {
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr) {
int encode = flags & RDB_LOAD_ENC;
int plain = flags & RDB_LOAD_PLAIN;
+ int sds = flags & RDB_LOAD_SDS;
int isencoded;
- uint32_t len;
+ uint64_t len;
len = rdbLoadLen(rdb,&isencoded);
if (isencoded) {
switch(len) {
- case REDIS_RDB_ENC_INT8:
- case REDIS_RDB_ENC_INT16:
- case REDIS_RDB_ENC_INT32:
- return rdbLoadIntegerObject(rdb,len,flags);
- case REDIS_RDB_ENC_LZF:
- return rdbLoadLzfStringObject(rdb,flags);
+ case RDB_ENC_INT8:
+ case RDB_ENC_INT16:
+ case RDB_ENC_INT32:
+ return rdbLoadIntegerObject(rdb,len,flags,lenptr);
+ case RDB_ENC_LZF:
+ return rdbLoadLzfStringObject(rdb,flags,lenptr);
default:
- redisPanic("Unknown RDB encoding type");
+ rdbExitReportCorruptRDB("Unknown RDB string encoding type %d",len);
}
}
- if (len == REDIS_RDB_LENERR) return NULL;
- if (!plain) {
- robj *o = encode ? createStringObject(NULL,len) :
- createRawStringObject(NULL,len);
- if (len && rioRead(rdb,o->ptr,len) == 0) {
- decrRefCount(o);
+ if (len == RDB_LENERR) return NULL;
+ if (plain || sds) {
+ void *buf = plain ? zmalloc(len) : sdsnewlen(SDS_NOINIT,len);
+ if (lenptr) *lenptr = len;
+ if (len && rioRead(rdb,buf,len) == 0) {
+ if (plain)
+ zfree(buf);
+ else
+ sdsfree(buf);
return NULL;
}
- return o;
+ return buf;
} else {
- void *buf = zmalloc(len);
- if (len && rioRead(rdb,buf,len) == 0) {
- zfree(buf);
+ robj *o = encode ? createStringObject(SDS_NOINIT,len) :
+ createRawStringObject(SDS_NOINIT,len);
+ if (len && rioRead(rdb,o->ptr,len) == 0) {
+ decrRefCount(o);
return NULL;
}
- return buf;
+ return o;
}
}
robj *rdbLoadStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
}
robj *rdbLoadEncodedStringObject(rio *rdb) {
- return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC);
+ return rdbGenericLoadStringObject(rdb,RDB_LOAD_ENC,NULL);
}
/* Save a double value. Doubles are saved as strings prefixed by an unsigned
@@ -485,39 +590,74 @@ int rdbLoadDoubleValue(rio *rdb, double *val) {
}
}
+/* Saves a double for RDB 8 or greater, where IE754 binary64 format is assumed.
+ * We just make sure the integer is always stored in little endian, otherwise
+ * the value is copied verbatim from memory to disk.
+ *
+ * Return -1 on error, the size of the serialized value on success. */
+int rdbSaveBinaryDoubleValue(rio *rdb, double val) {
+ memrev64ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Loads a double from RDB 8 or greater. See rdbSaveBinaryDoubleValue() for
+ * more info. On error -1 is returned, otherwise 0. */
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev64ifbe(val);
+ return 0;
+}
+
+/* Like rdbSaveBinaryDoubleValue() but single precision. */
+int rdbSaveBinaryFloatValue(rio *rdb, float val) {
+ memrev32ifbe(&val);
+ return rdbWriteRaw(rdb,&val,sizeof(val));
+}
+
+/* Like rdbLoadBinaryDoubleValue() but single precision. */
+int rdbLoadBinaryFloatValue(rio *rdb, float *val) {
+ if (rioRead(rdb,val,sizeof(*val)) == 0) return -1;
+ memrev32ifbe(val);
+ return 0;
+}
+
/* Save the object type of object "o". */
int rdbSaveObjectType(rio *rdb, robj *o) {
switch (o->type) {
- case REDIS_STRING:
- return rdbSaveType(rdb,REDIS_RDB_TYPE_STRING);
- case REDIS_LIST:
- if (o->encoding == REDIS_ENCODING_QUICKLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_LIST_QUICKLIST);
+ case OBJ_STRING:
+ return rdbSaveType(rdb,RDB_TYPE_STRING);
+ case OBJ_LIST:
+ if (o->encoding == OBJ_ENCODING_QUICKLIST)
+ return rdbSaveType(rdb,RDB_TYPE_LIST_QUICKLIST);
else
- redisPanic("Unknown list encoding");
- case REDIS_SET:
- if (o->encoding == REDIS_ENCODING_INTSET)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_SET_INTSET);
- else if (o->encoding == REDIS_ENCODING_HT)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_SET);
+ serverPanic("Unknown list encoding");
+ case OBJ_SET:
+ if (o->encoding == OBJ_ENCODING_INTSET)
+ return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_SET);
else
- redisPanic("Unknown set encoding");
- case REDIS_ZSET:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_SKIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_ZSET);
+ serverPanic("Unknown set encoding");
+ case OBJ_ZSET:
+ if (o->encoding == OBJ_ENCODING_ZIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_ZIPLIST);
+ else if (o->encoding == OBJ_ENCODING_SKIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_ZSET_2);
else
- redisPanic("Unknown sorted set encoding");
- case REDIS_HASH:
- if (o->encoding == REDIS_ENCODING_ZIPLIST)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH_ZIPLIST);
- else if (o->encoding == REDIS_ENCODING_HT)
- return rdbSaveType(rdb,REDIS_RDB_TYPE_HASH);
+ serverPanic("Unknown sorted set encoding");
+ case OBJ_HASH:
+ if (o->encoding == OBJ_ENCODING_ZIPLIST)
+ return rdbSaveType(rdb,RDB_TYPE_HASH_ZIPLIST);
+ else if (o->encoding == OBJ_ENCODING_HT)
+ return rdbSaveType(rdb,RDB_TYPE_HASH);
else
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
+ case OBJ_STREAM:
+ return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS);
+ case OBJ_MODULE:
+ return rdbSaveType(rdb,RDB_TYPE_MODULE_2);
default:
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
return -1; /* avoid warning */
}
@@ -531,24 +671,103 @@ int rdbLoadObjectType(rio *rdb) {
return type;
}
-/* Save a Redis object. Returns -1 on error, number of bytes written on success. */
-int rdbSaveObject(rio *rdb, robj *o) {
- int n = 0, nwritten = 0;
+/* This helper function serializes a consumer group Pending Entries List (PEL)
+ * into the RDB file. The 'nacks' argument tells the function if also persist
+ * the informations about the not acknowledged message, or if to persist
+ * just the IDs: this is useful because for the global consumer group PEL
+ * we serialized the NACKs as well, but when serializing the local consumer
+ * PELs we just add the ID, that will be resolved inside the global PEL to
+ * put a reference to the same structure. */
+ssize_t rdbSaveStreamPEL(rio *rdb, rax *pel, int nacks) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of entries in the PEL. */
+ if ((n = rdbSaveLen(rdb,raxSize(pel))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each entry. */
+ raxIterator ri;
+ raxStart(&ri,pel);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ /* We store IDs in raw form as 128 big big endian numbers, like
+ * they are inside the radix tree key. */
+ if ((n = rdbWriteRaw(rdb,ri.key,sizeof(streamID))) == -1) return -1;
+ nwritten += n;
+
+ if (nacks) {
+ streamNACK *nack = ri.data;
+ if ((n = rdbSaveMillisecondTime(rdb,nack->delivery_time)) == -1)
+ return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,nack->delivery_count)) == -1) return -1;
+ nwritten += n;
+ /* We don't save the consumer name: we'll save the pending IDs
+ * for each consumer in the consumer PEL, and resolve the consumer
+ * at loading time. */
+ }
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Serialize the consumers of a stream consumer group into the RDB. Helper
+ * function for the stream data type serialization. What we do here is to
+ * persist the consumer metadata, and it's PEL, for each consumer. */
+size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) {
+ ssize_t n, nwritten = 0;
+
+ /* Number of consumers in this consumer group. */
+ if ((n = rdbSaveLen(rdb,raxSize(cg->consumers))) == -1) return -1;
+ nwritten += n;
+
+ /* Save each consumer. */
+ raxIterator ri;
+ raxStart(&ri,cg->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+
+ /* Consumer name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) return -1;
+ nwritten += n;
+
+ /* Last seen time. */
+ if ((n = rdbSaveMillisecondTime(rdb,consumer->seen_time)) == -1)
+ return -1;
+ nwritten += n;
+
+ /* Consumer PEL, without the ACKs (see last parameter of the function
+ * passed with value of 0), at loading time we'll lookup the ID
+ * in the consumer group global PEL and will put a reference in the
+ * consumer local PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,consumer->pel,0)) == -1)
+ return -1;
+ nwritten += n;
+ }
+ raxStop(&ri);
+ return nwritten;
+}
+
+/* Save a Redis object.
+ * Returns -1 on error, number of bytes written on success. */
+ssize_t rdbSaveObject(rio *rdb, robj *o) {
+ ssize_t n = 0, nwritten = 0;
- if (o->type == REDIS_STRING) {
+ if (o->type == OBJ_STRING) {
/* Save a string value */
if ((n = rdbSaveStringObject(rdb,o)) == -1) return -1;
nwritten += n;
- } else if (o->type == REDIS_LIST) {
+ } else if (o->type == OBJ_LIST) {
/* Save a list value */
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklist *ql = o->ptr;
quicklistNode *node = ql->head;
if ((n = rdbSaveLen(rdb,ql->len)) == -1) return -1;
nwritten += n;
- do {
+ while(node) {
if (quicklistNodeIsCompressed(node)) {
void *data;
size_t compress_len = quicklistGetLzf(node, &data);
@@ -558,94 +777,216 @@ int rdbSaveObject(rio *rdb, robj *o) {
if ((n = rdbSaveRawString(rdb,node->zl,node->sz)) == -1) return -1;
nwritten += n;
}
- } while ((node = node->next));
+ node = node->next;
+ }
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
- } else if (o->type == REDIS_SET) {
+ } else if (o->type == OBJ_SET) {
/* Save a set value */
- if (o->encoding == REDIS_ENCODING_HT) {
+ if (o->encoding == OBJ_ENCODING_HT) {
dict *set = o->ptr;
dictIterator *di = dictGetIterator(set);
dictEntry *de;
- if ((n = rdbSaveLen(rdb,dictSize(set))) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,dictSize(set))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
nwritten += n;
while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
+ sds ele = dictGetKey(de);
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)ele,sdslen(ele)))
+ == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
nwritten += n;
}
dictReleaseIterator(di);
- } else if (o->encoding == REDIS_ENCODING_INTSET) {
+ } else if (o->encoding == OBJ_ENCODING_INTSET) {
size_t l = intsetBlobLen((intset*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (o->type == REDIS_ZSET) {
+ } else if (o->type == OBJ_ZSET) {
/* Save a sorted set value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
size_t l = ziplistBlobLen((unsigned char*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = o->ptr;
- dictIterator *di = dictGetIterator(zs->dict);
- dictEntry *de;
+ zskiplist *zsl = zs->zsl;
- if ((n = rdbSaveLen(rdb,dictSize(zs->dict))) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,zsl->length)) == -1) return -1;
nwritten += n;
- while((de = dictNext(di)) != NULL) {
- robj *eleobj = dictGetKey(de);
- double *score = dictGetVal(de);
-
- if ((n = rdbSaveStringObject(rdb,eleobj)) == -1) return -1;
+ /* We save the skiplist elements from the greatest to the smallest
+ * (that's trivial since the elements are already ordered in the
+ * skiplist): this improves the load process, since the next loaded
+ * element will always be the smaller, so adding to the skiplist
+ * will always immediately stop at the head, making the insertion
+ * O(1) instead of O(log(N)). */
+ zskiplistNode *zn = zsl->tail;
+ while (zn != NULL) {
+ if ((n = rdbSaveRawString(rdb,
+ (unsigned char*)zn->ele,sdslen(zn->ele))) == -1)
+ {
+ return -1;
+ }
nwritten += n;
- if ((n = rdbSaveDoubleValue(rdb,*score)) == -1) return -1;
+ if ((n = rdbSaveBinaryDoubleValue(rdb,zn->score)) == -1)
+ return -1;
nwritten += n;
+ zn = zn->backward;
}
- dictReleaseIterator(di);
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
- } else if (o->type == REDIS_HASH) {
+ } else if (o->type == OBJ_HASH) {
/* Save a hash value */
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
size_t l = ziplistBlobLen((unsigned char*)o->ptr);
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
- } else if (o->encoding == REDIS_ENCODING_HT) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
dictIterator *di = dictGetIterator(o->ptr);
dictEntry *de;
- if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) return -1;
+ if ((n = rdbSaveLen(rdb,dictSize((dict*)o->ptr))) == -1) {
+ dictReleaseIterator(di);
+ return -1;
+ }
nwritten += n;
while((de = dictNext(di)) != NULL) {
- robj *key = dictGetKey(de);
- robj *val = dictGetVal(de);
+ sds field = dictGetKey(de);
+ sds value = dictGetVal(de);
- if ((n = rdbSaveStringObject(rdb,key)) == -1) return -1;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)field,
+ sdslen(field))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
nwritten += n;
- if ((n = rdbSaveStringObject(rdb,val)) == -1) return -1;
+ if ((n = rdbSaveRawString(rdb,(unsigned char*)value,
+ sdslen(value))) == -1)
+ {
+ dictReleaseIterator(di);
+ return -1;
+ }
nwritten += n;
}
dictReleaseIterator(di);
-
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
+ }
+ } else if (o->type == OBJ_STREAM) {
+ /* Store how many listpacks we have inside the radix tree. */
+ stream *s = o->ptr;
+ rax *rax = s->rax;
+ if ((n = rdbSaveLen(rdb,raxSize(rax))) == -1) return -1;
+ nwritten += n;
+
+ /* Serialize all the listpacks inside the radix tree as they are,
+ * when loading back, we'll use the first entry of each listpack
+ * to insert it back into the radix tree. */
+ raxIterator ri;
+ raxStart(&ri,rax);
+ raxSeek(&ri,"^",NULL,0);
+ while (raxNext(&ri)) {
+ unsigned char *lp = ri.data;
+ size_t lp_bytes = lpBytes(lp);
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveRawString(rdb,lp,lp_bytes)) == -1) return -1;
+ nwritten += n;
}
+ raxStop(&ri);
+
+ /* Save the number of elements inside the stream. We cannot obtain
+ * this easily later, since our macro nodes should be checked for
+ * number of items: not a great CPU / space tradeoff. */
+ if ((n = rdbSaveLen(rdb,s->length)) == -1) return -1;
+ nwritten += n;
+ /* Save the last entry ID. */
+ if ((n = rdbSaveLen(rdb,s->last_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,s->last_id.seq)) == -1) return -1;
+ nwritten += n;
+
+ /* The consumer groups and their clients are part of the stream
+ * type, so serialize every consumer group. */
+
+ /* Save the number of groups. */
+ size_t num_cgroups = s->cgroups ? raxSize(s->cgroups) : 0;
+ if ((n = rdbSaveLen(rdb,num_cgroups)) == -1) return -1;
+ nwritten += n;
+
+ if (num_cgroups) {
+ /* Serialize each consumer group. */
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+
+ /* Save the group name. */
+ if ((n = rdbSaveRawString(rdb,ri.key,ri.key_len)) == -1)
+ return -1;
+ nwritten += n;
+ /* Last ID. */
+ if ((n = rdbSaveLen(rdb,cg->last_id.ms)) == -1) return -1;
+ nwritten += n;
+ if ((n = rdbSaveLen(rdb,cg->last_id.seq)) == -1) return -1;
+ nwritten += n;
+
+ /* Save the global PEL. */
+ if ((n = rdbSaveStreamPEL(rdb,cg->pel,1)) == -1) return -1;
+ nwritten += n;
+
+ /* Save the consumers of this group. */
+ if ((n = rdbSaveStreamConsumers(rdb,cg)) == -1) return -1;
+ nwritten += n;
+ }
+ raxStop(&ri);
+ }
+ } else if (o->type == OBJ_MODULE) {
+ /* Save a module-specific value. */
+ RedisModuleIO io;
+ moduleValue *mv = o->ptr;
+ moduleType *mt = mv->type;
+ moduleInitIOContext(io,mt,rdb);
+
+ /* Write the "module" identifier as prefix, so that we'll be able
+ * to call the right module during loading. */
+ int retval = rdbSaveLen(rdb,mt->id);
+ if (retval == -1) return -1;
+ io.bytes += retval;
+
+ /* Then write the module-specific representation + EOF marker. */
+ mt->rdb_save(&io,mv->value);
+ retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
+ if (retval == -1) return -1;
+ io.bytes += retval;
+
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+ return io.error ? -1 : (ssize_t)io.bytes;
} else {
- redisPanic("Unknown object type");
+ serverPanic("Unknown object type");
}
return nwritten;
}
@@ -654,9 +995,9 @@ int rdbSaveObject(rio *rdb, robj *o) {
* the rdbSaveObject() function. Currently we use a trick to get
* this length with very little changes to the code. In the future
* we could switch to a faster solution. */
-off_t rdbSavedObjectLen(robj *o) {
- int len = rdbSaveObject(NULL,o);
- redisAssertWithInfo(NULL,o,len != -1);
+size_t rdbSavedObjectLen(robj *o) {
+ ssize_t len = rdbSaveObject(NULL,o);
+ serverAssertWithInfo(NULL,o,len != -1);
return len;
}
@@ -664,17 +1005,36 @@ off_t rdbSavedObjectLen(robj *o) {
* On error -1 is returned.
* On success if the key was actually saved 1 is returned, otherwise 0
* is returned (the key was already expired). */
-int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
- long long expiretime, long long now)
-{
+int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) {
+ int savelru = server.maxmemory_policy & MAXMEMORY_FLAG_LRU;
+ int savelfu = server.maxmemory_policy & MAXMEMORY_FLAG_LFU;
+
/* Save the expire time */
if (expiretime != -1) {
- /* If this key is already expired skip it */
- if (expiretime < now) return 0;
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
+ if (rdbSaveType(rdb,RDB_OPCODE_EXPIRETIME_MS) == -1) return -1;
if (rdbSaveMillisecondTime(rdb,expiretime) == -1) return -1;
}
+ /* Save the LRU info. */
+ if (savelru) {
+ uint64_t idletime = estimateObjectIdleTime(val);
+ idletime /= 1000; /* Using seconds is enough and requires less space.*/
+ if (rdbSaveType(rdb,RDB_OPCODE_IDLE) == -1) return -1;
+ if (rdbSaveLen(rdb,idletime) == -1) return -1;
+ }
+
+ /* Save the LFU info. */
+ if (savelfu) {
+ uint8_t buf[1];
+ buf[0] = LFUDecrAndReturn(val);
+ /* We can encode this in exactly two bytes: the opcode and an 8
+ * bit counter, since the frequency is logarithmic with a 0-255 range.
+ * Note that we do not store the halving time because to reset it
+ * a single time when loading does not affect the frequency much. */
+ if (rdbSaveType(rdb,RDB_OPCODE_FREQ) == -1) return -1;
+ if (rdbWriteRaw(rdb,buf,1) == -1) return -1;
+ }
+
/* Save type, key, value */
if (rdbSaveObjectType(rdb,val) == -1) return -1;
if (rdbSaveStringObject(rdb,key) == -1) return -1;
@@ -683,83 +1043,94 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val,
}
/* Save an AUX field. */
-int rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_AUX) == -1) return -1;
- if (rdbSaveRawString(rdb,key,keylen) == -1) return -1;
- if (rdbSaveRawString(rdb,val,vallen) == -1) return -1;
- return 1;
+ssize_t rdbSaveAuxField(rio *rdb, void *key, size_t keylen, void *val, size_t vallen) {
+ ssize_t ret, len = 0;
+ if ((ret = rdbSaveType(rdb,RDB_OPCODE_AUX)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,key,keylen)) == -1) return -1;
+ len += ret;
+ if ((ret = rdbSaveRawString(rdb,val,vallen)) == -1) return -1;
+ len += ret;
+ return len;
}
/* Wrapper for rdbSaveAuxField() used when key/val length can be obtained
* with strlen(). */
-int rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
+ssize_t rdbSaveAuxFieldStrStr(rio *rdb, char *key, char *val) {
return rdbSaveAuxField(rdb,key,strlen(key),val,strlen(val));
}
/* Wrapper for strlen(key) + integer type (up to long long range). */
-int rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
- char buf[REDIS_LONGSTR_SIZE];
+ssize_t rdbSaveAuxFieldStrInt(rio *rdb, char *key, long long val) {
+ char buf[LONG_STR_SIZE];
int vlen = ll2string(buf,sizeof(buf),val);
return rdbSaveAuxField(rdb,key,strlen(key),buf,vlen);
}
/* Save a few default AUX fields with information about the RDB generated. */
-int rdbSaveInfoAuxFields(rio *rdb) {
+int rdbSaveInfoAuxFields(rio *rdb, int flags, rdbSaveInfo *rsi) {
int redis_bits = (sizeof(void*) == 8) ? 64 : 32;
+ int aof_preamble = (flags & RDB_SAVE_AOF_PREAMBLE) != 0;
/* Add a few fields about the state when the RDB was created. */
if (rdbSaveAuxFieldStrStr(rdb,"redis-ver",REDIS_VERSION) == -1) return -1;
if (rdbSaveAuxFieldStrInt(rdb,"redis-bits",redis_bits) == -1) return -1;
if (rdbSaveAuxFieldStrInt(rdb,"ctime",time(NULL)) == -1) return -1;
if (rdbSaveAuxFieldStrInt(rdb,"used-mem",zmalloc_used_memory()) == -1) return -1;
+
+ /* Handle saving options that generate aux fields. */
+ if (rsi) {
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-stream-db",rsi->repl_stream_db)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrStr(rdb,"repl-id",server.replid)
+ == -1) return -1;
+ if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",server.master_repl_offset)
+ == -1) return -1;
+ }
+ if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1;
return 1;
}
/* Produces a dump of the database in RDB format sending it to the specified
- * Redis I/O channel. On success REDIS_OK is returned, otherwise REDIS_ERR
+ * Redis I/O channel. On success C_OK is returned, otherwise C_ERR
* is returned and part of the output, or all the output, can be
* missing because of I/O errors.
*
- * When the function returns REDIS_ERR and if 'error' is not NULL, the
+ * When the function returns C_ERR and if 'error' is not NULL, the
* integer pointed by 'error' is set to the value of errno just after the I/O
* error. */
-int rdbSaveRio(rio *rdb, int *error) {
+int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi) {
dictIterator *di = NULL;
dictEntry *de;
char magic[10];
int j;
- long long now = mstime();
uint64_t cksum;
+ size_t processed = 0;
if (server.rdb_checksum)
rdb->update_cksum = rioGenericUpdateChecksum;
- snprintf(magic,sizeof(magic),"REDIS%04d",REDIS_RDB_VERSION);
+ snprintf(magic,sizeof(magic),"REDIS%04d",RDB_VERSION);
if (rdbWriteRaw(rdb,magic,9) == -1) goto werr;
- if (rdbSaveInfoAuxFields(rdb) == -1) goto werr;
+ if (rdbSaveInfoAuxFields(rdb,flags,rsi) == -1) goto werr;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
- if (!di) return REDIS_ERR;
/* Write the SELECT DB opcode */
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_SELECTDB) == -1) goto werr;
+ if (rdbSaveType(rdb,RDB_OPCODE_SELECTDB) == -1) goto werr;
if (rdbSaveLen(rdb,j) == -1) goto werr;
/* Write the RESIZE DB opcode. We trim the size to UINT32_MAX, which
* is currently the largest type we are able to represent in RDB sizes.
* However this does not limit the actual size of the DB to load since
* these sizes are just hints to resize the hash tables. */
- uint32_t db_size, expires_size;
- db_size = (dictSize(db->dict) <= UINT32_MAX) ?
- dictSize(db->dict) :
- UINT32_MAX;
- expires_size = (dictSize(db->dict) <= UINT32_MAX) ?
- dictSize(db->expires) :
- UINT32_MAX;
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_RESIZEDB) == -1) goto werr;
+ uint64_t db_size, expires_size;
+ db_size = dictSize(db->dict);
+ expires_size = dictSize(db->expires);
+ if (rdbSaveType(rdb,RDB_OPCODE_RESIZEDB) == -1) goto werr;
if (rdbSaveLen(rdb,db_size) == -1) goto werr;
if (rdbSaveLen(rdb,expires_size) == -1) goto werr;
@@ -771,26 +1142,51 @@ int rdbSaveRio(rio *rdb, int *error) {
initStaticStringObject(key,keystr);
expire = getExpire(db,&key);
- if (rdbSaveKeyValuePair(rdb,&key,o,expire,now) == -1) goto werr;
+ if (rdbSaveKeyValuePair(rdb,&key,o,expire) == -1) goto werr;
+
+ /* When this RDB is produced as part of an AOF rewrite, move
+ * accumulated diff from parent to child while rewriting in
+ * order to have a smaller final write. */
+ if (flags & RDB_SAVE_AOF_PREAMBLE &&
+ rdb->processed_bytes > processed+AOF_READ_DIFF_INTERVAL_BYTES)
+ {
+ processed = rdb->processed_bytes;
+ aofReadDiffFromParent();
+ }
+ }
+ dictReleaseIterator(di);
+ di = NULL; /* So that we don't release it again on error. */
+ }
+
+ /* If we are storing the replication information on disk, persist
+ * the script cache as well: on successful PSYNC after a restart, we need
+ * to be able to process any EVALSHA inside the replication backlog the
+ * master will send us. */
+ if (rsi && dictSize(server.lua_scripts)) {
+ di = dictGetIterator(server.lua_scripts);
+ while((de = dictNext(di)) != NULL) {
+ robj *body = dictGetVal(de);
+ if (rdbSaveAuxField(rdb,"lua",3,body->ptr,sdslen(body->ptr)) == -1)
+ goto werr;
}
dictReleaseIterator(di);
+ di = NULL; /* So that we don't release it again on error. */
}
- di = NULL; /* So that we don't release it again on error. */
/* EOF opcode */
- if (rdbSaveType(rdb,REDIS_RDB_OPCODE_EOF) == -1) goto werr;
+ if (rdbSaveType(rdb,RDB_OPCODE_EOF) == -1) goto werr;
/* CRC64 checksum. It will be zero if checksum computation is disabled, the
* loading code skips the check in this case. */
cksum = rdb->cksum;
memrev64ifbe(&cksum);
if (rioWrite(rdb,&cksum,8) == 0) goto werr;
- return REDIS_OK;
+ return C_OK;
werr:
if (error) *error = errno;
if (di) dictReleaseIterator(di);
- return REDIS_ERR;
+ return C_ERR;
}
/* This is just a wrapper to rdbSaveRio() that additionally adds a prefix
@@ -801,27 +1197,28 @@ werr:
* While the suffix is the 40 bytes hex string we announced in the prefix.
* This way processes receiving the payload can understand when it ends
* without doing any processing of the content. */
-int rdbSaveRioWithEOFMark(rio *rdb, int *error) {
- char eofmark[REDIS_EOF_MARK_SIZE];
+int rdbSaveRioWithEOFMark(rio *rdb, int *error, rdbSaveInfo *rsi) {
+ char eofmark[RDB_EOF_MARK_SIZE];
- getRandomHexChars(eofmark,REDIS_EOF_MARK_SIZE);
+ getRandomHexChars(eofmark,RDB_EOF_MARK_SIZE);
if (error) *error = 0;
if (rioWrite(rdb,"$EOF:",5) == 0) goto werr;
- if (rioWrite(rdb,eofmark,REDIS_EOF_MARK_SIZE) == 0) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
if (rioWrite(rdb,"\r\n",2) == 0) goto werr;
- if (rdbSaveRio(rdb,error) == REDIS_ERR) goto werr;
- if (rioWrite(rdb,eofmark,REDIS_EOF_MARK_SIZE) == 0) goto werr;
- return REDIS_OK;
+ if (rdbSaveRio(rdb,error,RDB_SAVE_NONE,rsi) == C_ERR) goto werr;
+ if (rioWrite(rdb,eofmark,RDB_EOF_MARK_SIZE) == 0) goto werr;
+ return C_OK;
werr: /* Write error. */
/* Set 'error' only if not already set by rdbSaveRio() call. */
if (error && *error == 0) *error = errno;
- return REDIS_ERR;
+ return C_ERR;
}
-/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success. */
-int rdbSave(char *filename) {
+/* Save the DB on disk. Return C_ERR on error, C_OK on success. */
+int rdbSave(char *filename, rdbSaveInfo *rsi) {
char tmpfile[256];
+ char cwd[MAXPATHLEN]; /* Current working dir path for error messages. */
FILE *fp;
rio rdb;
int error = 0;
@@ -829,13 +1226,22 @@ int rdbSave(char *filename) {
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
if (!fp) {
- redisLog(REDIS_WARNING, "Failed opening .rdb for saving: %s",
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Failed opening the RDB file %s (in server root dir %s) "
+ "for saving: %s",
+ filename,
+ cwdp ? cwdp : "unknown",
strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
rioInitWithFile(&rdb,fp);
- if (rdbSaveRio(&rdb,&error) == REDIS_ERR) {
+
+ if (server.rdb_save_incremental_fsync)
+ rioSetAutoSync(&rdb,REDIS_AUTOSYNC_BYTES);
+
+ if (rdbSaveRio(&rdb,&error,RDB_SAVE_NONE,rsi) == C_ERR) {
errno = error;
goto werr;
}
@@ -848,31 +1254,40 @@ int rdbSave(char *filename) {
/* Use RENAME to make sure the DB file is changed atomically only
* if the generate DB file is ok. */
if (rename(tmpfile,filename) == -1) {
- redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
+ char *cwdp = getcwd(cwd,MAXPATHLEN);
+ serverLog(LL_WARNING,
+ "Error moving temp DB file %s on the final "
+ "destination %s (in server root dir %s): %s",
+ tmpfile,
+ filename,
+ cwdp ? cwdp : "unknown",
+ strerror(errno));
unlink(tmpfile);
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"DB saved on disk");
+
+ serverLog(LL_NOTICE,"DB saved on disk");
server.dirty = 0;
server.lastsave = time(NULL);
- server.lastbgsave_status = REDIS_OK;
- return REDIS_OK;
+ server.lastbgsave_status = C_OK;
+ return C_OK;
werr:
+ serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno));
fclose(fp);
unlink(tmpfile);
- redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
-int rdbSaveBackground(char *filename) {
+int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
pid_t childpid;
long long start;
- if (server.rdb_child_pid != -1) return REDIS_ERR;
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
server.dirty_before_bgsave = server.dirty;
server.lastbgsave_try = time(NULL);
+ openChildInfoPipe();
start = ustime();
if ((childpid = fork()) == 0) {
@@ -881,36 +1296,40 @@ int rdbSaveBackground(char *filename) {
/* Child */
closeListeningSockets(0);
redisSetProcTitle("redis-rdb-bgsave");
- retval = rdbSave(filename);
- if (retval == REDIS_OK) {
- size_t private_dirty = zmalloc_get_private_dirty();
+ retval = rdbSave(filename,rsi);
+ if (retval == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"RDB: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
+
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_RDB);
}
- exitFromChild((retval == REDIS_OK) ? 0 : 1);
+ exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
server.stat_fork_time = ustime()-start;
server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
- server.lastbgsave_status = REDIS_ERR;
- redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
+ closeChildInfoPipe();
+ server.lastbgsave_status = C_ERR;
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
- redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
+ serverLog(LL_NOTICE,"Background saving started by pid %d",childpid);
server.rdb_save_time_start = time(NULL);
server.rdb_child_pid = childpid;
- server.rdb_child_type = REDIS_RDB_CHILD_TYPE_DISK;
+ server.rdb_child_type = RDB_CHILD_TYPE_DISK;
updateDictResizePolicy();
- return REDIS_OK;
+ return C_OK;
}
- return REDIS_OK; /* unreached */
+ return C_OK; /* unreached */
}
void rdbRemoveTempFile(pid_t childpid) {
@@ -920,20 +1339,59 @@ void rdbRemoveTempFile(pid_t childpid) {
unlink(tmpfile);
}
+/* This function is called by rdbLoadObject() when the code is in RDB-check
+ * mode and we find a module value of type 2 that can be parsed without
+ * the need of the actual module. The value is parsed for errors, finally
+ * a dummy redis object is returned just to conform to the API. */
+robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) {
+ uint64_t opcode;
+ while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) {
+ if (opcode == RDB_MODULE_OPCODE_SINT ||
+ opcode == RDB_MODULE_OPCODE_UINT)
+ {
+ uint64_t len;
+ if (rdbLoadLenByRef(rdb,NULL,&len) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading integer from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_STRING) {
+ robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
+ if (o == NULL) {
+ rdbExitReportCorruptRDB(
+ "Error reading string from module %s value", modulename);
+ }
+ decrRefCount(o);
+ } else if (opcode == RDB_MODULE_OPCODE_FLOAT) {
+ float val;
+ if (rdbLoadBinaryFloatValue(rdb,&val) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading float from module %s value", modulename);
+ }
+ } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) {
+ double val;
+ if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) {
+ rdbExitReportCorruptRDB(
+ "Error reading double from module %s value", modulename);
+ }
+ }
+ }
+ return createStringObject("module-dummy-value",18);
+}
+
/* Load a Redis object of the specified type from the specified file.
* On success a newly allocated object is returned, otherwise NULL. */
robj *rdbLoadObject(int rdbtype, rio *rdb) {
- robj *o, *ele, *dec;
- size_t len;
+ robj *o = NULL, *ele, *dec;
+ uint64_t len;
unsigned int i;
- if (rdbtype == REDIS_RDB_TYPE_STRING) {
+ if (rdbtype == RDB_TYPE_STRING) {
/* Read string value */
if ((o = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
o = tryObjectEncoding(o);
- } else if (rdbtype == REDIS_RDB_TYPE_LIST) {
+ } else if (rdbtype == RDB_TYPE_LIST) {
/* Read list value */
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
o = createQuicklistObject();
quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
@@ -948,9 +1406,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
decrRefCount(dec);
decrRefCount(ele);
}
- } else if (rdbtype == REDIS_RDB_TYPE_SET) {
- /* Read list/set value */
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ } else if (rdbtype == RDB_TYPE_SET) {
+ /* Read Set value */
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
/* Use a regular set when there are too many entries. */
if (len > server.set_max_intset_entries) {
@@ -963,146 +1421,156 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
o = createIntsetObject();
}
- /* Load every single element of the list/set */
+ /* Load every single element of the set */
for (i = 0; i < len; i++) {
long long llval;
- if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
+ sds sdsele;
- if (o->encoding == REDIS_ENCODING_INTSET) {
- /* Fetch integer value from element */
- if (isObjectRepresentableAsLongLong(ele,&llval) == REDIS_OK) {
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+
+ if (o->encoding == OBJ_ENCODING_INTSET) {
+ /* Fetch integer value from element. */
+ if (isSdsRepresentableAsLongLong(sdsele,&llval) == C_OK) {
o->ptr = intsetAdd(o->ptr,llval,NULL);
} else {
- setTypeConvert(o,REDIS_ENCODING_HT);
+ setTypeConvert(o,OBJ_ENCODING_HT);
dictExpand(o->ptr,len);
}
}
/* This will also be called when the set was just converted
- * to a regular hash table encoded set */
- if (o->encoding == REDIS_ENCODING_HT) {
- dictAdd((dict*)o->ptr,ele,NULL);
+ * to a regular hash table encoded set. */
+ if (o->encoding == OBJ_ENCODING_HT) {
+ dictAdd((dict*)o->ptr,sdsele,NULL);
} else {
- decrRefCount(ele);
+ sdsfree(sdsele);
}
}
- } else if (rdbtype == REDIS_RDB_TYPE_ZSET) {
- /* Read list/set value */
- size_t zsetlen;
+ } else if (rdbtype == RDB_TYPE_ZSET_2 || rdbtype == RDB_TYPE_ZSET) {
+ /* Read list/set value. */
+ uint64_t zsetlen;
size_t maxelelen = 0;
zset *zs;
- if ((zsetlen = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ if ((zsetlen = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
o = createZsetObject();
zs = o->ptr;
- /* Load every single element of the list/set */
+ if (zsetlen > DICT_HT_INITIAL_SIZE)
+ dictExpand(zs->dict,zsetlen);
+
+ /* Load every single element of the sorted set. */
while(zsetlen--) {
- robj *ele;
+ sds sdsele;
double score;
zskiplistNode *znode;
- if ((ele = rdbLoadEncodedStringObject(rdb)) == NULL) return NULL;
- ele = tryObjectEncoding(ele);
- if (rdbLoadDoubleValue(rdb,&score) == -1) return NULL;
+ if ((sdsele = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+
+ if (rdbtype == RDB_TYPE_ZSET_2) {
+ if (rdbLoadBinaryDoubleValue(rdb,&score) == -1) return NULL;
+ } else {
+ if (rdbLoadDoubleValue(rdb,&score) == -1) return NULL;
+ }
/* Don't care about integer-encoded strings. */
- if (sdsEncodedObject(ele) && sdslen(ele->ptr) > maxelelen)
- maxelelen = sdslen(ele->ptr);
+ if (sdslen(sdsele) > maxelelen) maxelelen = sdslen(sdsele);
- znode = zslInsert(zs->zsl,score,ele);
- dictAdd(zs->dict,ele,&znode->score);
- incrRefCount(ele); /* added to skiplist */
+ znode = zslInsert(zs->zsl,score,sdsele);
+ dictAdd(zs->dict,sdsele,&znode->score);
}
/* Convert *after* loading, since sorted sets are not stored ordered. */
if (zsetLength(o) <= server.zset_max_ziplist_entries &&
maxelelen <= server.zset_max_ziplist_value)
- zsetConvert(o,REDIS_ENCODING_ZIPLIST);
- } else if (rdbtype == REDIS_RDB_TYPE_HASH) {
- size_t len;
+ zsetConvert(o,OBJ_ENCODING_ZIPLIST);
+ } else if (rdbtype == RDB_TYPE_HASH) {
+ uint64_t len;
int ret;
+ sds field, value;
len = rdbLoadLen(rdb, NULL);
- if (len == REDIS_RDB_LENERR) return NULL;
+ if (len == RDB_LENERR) return NULL;
o = createHashObject();
/* Too many entries? Use a hash table. */
if (len > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
/* Load every field and value into the ziplist */
- while (o->encoding == REDIS_ENCODING_ZIPLIST && len > 0) {
- robj *field, *value;
-
+ while (o->encoding == OBJ_ENCODING_ZIPLIST && len > 0) {
len--;
/* Load raw strings */
- field = rdbLoadStringObject(rdb);
- if (field == NULL) return NULL;
- redisAssert(sdsEncodedObject(field));
- value = rdbLoadStringObject(rdb);
- if (value == NULL) return NULL;
- redisAssert(sdsEncodedObject(value));
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
/* Add pair to ziplist */
- o->ptr = ziplistPush(o->ptr, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL);
- o->ptr = ziplistPush(o->ptr, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL);
+ o->ptr = ziplistPush(o->ptr, (unsigned char*)field,
+ sdslen(field), ZIPLIST_TAIL);
+ o->ptr = ziplistPush(o->ptr, (unsigned char*)value,
+ sdslen(value), ZIPLIST_TAIL);
+
/* Convert to hash table if size threshold is exceeded */
- if (sdslen(field->ptr) > server.hash_max_ziplist_value ||
- sdslen(value->ptr) > server.hash_max_ziplist_value)
+ if (sdslen(field) > server.hash_max_ziplist_value ||
+ sdslen(value) > server.hash_max_ziplist_value)
{
- decrRefCount(field);
- decrRefCount(value);
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ sdsfree(field);
+ sdsfree(value);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
}
- decrRefCount(field);
- decrRefCount(value);
+ sdsfree(field);
+ sdsfree(value);
}
- /* Load remaining fields and values into the hash table */
- while (o->encoding == REDIS_ENCODING_HT && len > 0) {
- robj *field, *value;
+ if (o->encoding == OBJ_ENCODING_HT && len > DICT_HT_INITIAL_SIZE)
+ dictExpand(o->ptr,len);
+ /* Load remaining fields and values into the hash table */
+ while (o->encoding == OBJ_ENCODING_HT && len > 0) {
len--;
/* Load encoded strings */
- field = rdbLoadEncodedStringObject(rdb);
- if (field == NULL) return NULL;
- value = rdbLoadEncodedStringObject(rdb);
- if (value == NULL) return NULL;
-
- field = tryObjectEncoding(field);
- value = tryObjectEncoding(value);
+ if ((field = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
+ if ((value = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL))
+ == NULL) return NULL;
/* Add pair to hash table */
ret = dictAdd((dict*)o->ptr, field, value);
- redisAssert(ret == DICT_OK);
+ if (ret == DICT_ERR) {
+ rdbExitReportCorruptRDB("Duplicate keys detected");
+ }
}
/* All pairs should be read by now */
- redisAssert(len == 0);
- } else if (rdbtype == REDIS_RDB_TYPE_LIST_QUICKLIST) {
- if ((len = rdbLoadLen(rdb,NULL)) == REDIS_RDB_LENERR) return NULL;
+ serverAssert(len == 0);
+ } else if (rdbtype == RDB_TYPE_LIST_QUICKLIST) {
+ if ((len = rdbLoadLen(rdb,NULL)) == RDB_LENERR) return NULL;
o = createQuicklistObject();
quicklistSetOptions(o->ptr, server.list_max_ziplist_size,
server.list_compress_depth);
while (len--) {
- unsigned char *zl = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ unsigned char *zl =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL);
if (zl == NULL) return NULL;
quicklistAppendZiplist(o->ptr, zl);
}
- } else if (rdbtype == REDIS_RDB_TYPE_HASH_ZIPMAP ||
- rdbtype == REDIS_RDB_TYPE_LIST_ZIPLIST ||
- rdbtype == REDIS_RDB_TYPE_SET_INTSET ||
- rdbtype == REDIS_RDB_TYPE_ZSET_ZIPLIST ||
- rdbtype == REDIS_RDB_TYPE_HASH_ZIPLIST)
+ } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
+ rdbtype == RDB_TYPE_LIST_ZIPLIST ||
+ rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
+ rdbtype == RDB_TYPE_HASH_ZIPLIST)
{
- unsigned char *encoded = rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN);
+ unsigned char *encoded =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL);
if (encoded == NULL) return NULL;
- o = createObject(REDIS_STRING,encoded); /* Obj type fixed below. */
+ o = createObject(OBJ_STRING,encoded); /* Obj type fixed below. */
/* Fix the object encoding, and make sure to convert the encoded
* data type into the base type if accordingly to the current
@@ -1111,7 +1579,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
* size as this is an O(N) scan. Eventually everything will get
* converted. */
switch(rdbtype) {
- case REDIS_RDB_TYPE_HASH_ZIPMAP:
+ case RDB_TYPE_HASH_ZIPMAP:
/* Convert to ziplist encoded hash. This must be deprecated
* when loading dumps created by Redis 2.4 gets deprecated. */
{
@@ -1130,45 +1598,199 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
zfree(o->ptr);
o->ptr = zl;
- o->type = REDIS_HASH;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (hashTypeLength(o) > server.hash_max_ziplist_entries ||
maxlen > server.hash_max_ziplist_value)
{
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
}
}
break;
- case REDIS_RDB_TYPE_LIST_ZIPLIST:
- o->type = REDIS_LIST;
- o->encoding = REDIS_ENCODING_ZIPLIST;
- listTypeConvert(o,REDIS_ENCODING_QUICKLIST);
+ case RDB_TYPE_LIST_ZIPLIST:
+ o->type = OBJ_LIST;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
+ listTypeConvert(o,OBJ_ENCODING_QUICKLIST);
break;
- case REDIS_RDB_TYPE_SET_INTSET:
- o->type = REDIS_SET;
- o->encoding = REDIS_ENCODING_INTSET;
+ case RDB_TYPE_SET_INTSET:
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_INTSET;
if (intsetLen(o->ptr) > server.set_max_intset_entries)
- setTypeConvert(o,REDIS_ENCODING_HT);
+ setTypeConvert(o,OBJ_ENCODING_HT);
break;
- case REDIS_RDB_TYPE_ZSET_ZIPLIST:
- o->type = REDIS_ZSET;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ case RDB_TYPE_ZSET_ZIPLIST:
+ o->type = OBJ_ZSET;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (zsetLength(o) > server.zset_max_ziplist_entries)
- zsetConvert(o,REDIS_ENCODING_SKIPLIST);
+ zsetConvert(o,OBJ_ENCODING_SKIPLIST);
break;
- case REDIS_RDB_TYPE_HASH_ZIPLIST:
- o->type = REDIS_HASH;
- o->encoding = REDIS_ENCODING_ZIPLIST;
+ case RDB_TYPE_HASH_ZIPLIST:
+ o->type = OBJ_HASH;
+ o->encoding = OBJ_ENCODING_ZIPLIST;
if (hashTypeLength(o) > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
default:
- redisPanic("Unknown encoding");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
break;
}
+ } else if (rdbtype == RDB_TYPE_STREAM_LISTPACKS) {
+ o = createStreamObject();
+ stream *s = o->ptr;
+ uint64_t listpacks = rdbLoadLen(rdb,NULL);
+
+ while(listpacks--) {
+ /* Get the master ID, the one we'll use as key of the radix tree
+ * node: the entries inside the listpack itself are delta-encoded
+ * relatively to this ID. */
+ sds nodekey = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (sdslen(nodekey) != sizeof(streamID)) {
+ rdbExitReportCorruptRDB("Stream node key entry is not the "
+ "size of a stream ID");
+ }
+
+ /* Load the listpack. */
+ unsigned char *lp =
+ rdbGenericLoadStringObject(rdb,RDB_LOAD_PLAIN,NULL);
+ if (lp == NULL) return NULL;
+ unsigned char *first = lpFirst(lp);
+ if (first == NULL) {
+ /* Serialized listpacks should never be empty, since on
+ * deletion we should remove the radix tree key if the
+ * resulting listpack is empty. */
+ rdbExitReportCorruptRDB("Empty listpack inside stream");
+ }
+
+ /* Insert the key in the radix tree. */
+ int retval = raxInsert(s->rax,
+ (unsigned char*)nodekey,sizeof(streamID),lp,NULL);
+ sdsfree(nodekey);
+ if (!retval)
+ rdbExitReportCorruptRDB("Listpack re-added with existing key");
+ }
+ /* Load total number of items inside the stream. */
+ s->length = rdbLoadLen(rdb,NULL);
+ /* Load the last entry ID. */
+ s->last_id.ms = rdbLoadLen(rdb,NULL);
+ s->last_id.seq = rdbLoadLen(rdb,NULL);
+
+ /* Consumer groups loading */
+ size_t cgroups_count = rdbLoadLen(rdb,NULL);
+ while(cgroups_count--) {
+ /* Get the consumer group name and ID. We can then create the
+ * consumer group ASAP and populate its structure as
+ * we read more data. */
+ streamID cg_id;
+ sds cgname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cgname == NULL) {
+ rdbExitReportCorruptRDB(
+ "Error reading the consumer group name from Stream");
+ }
+ cg_id.ms = rdbLoadLen(rdb,NULL);
+ cg_id.seq = rdbLoadLen(rdb,NULL);
+ streamCG *cgroup = streamCreateCG(s,cgname,sdslen(cgname),&cg_id);
+ if (cgroup == NULL)
+ rdbExitReportCorruptRDB("Duplicated consumer group name %s",
+ cgname);
+ sdsfree(cgname);
+
+ /* Load the global PEL for this consumer group, however we'll
+ * not yet populate the NACK structures with the message
+ * owner, since consumers for this group and their messages will
+ * be read as a next step. So for now leave them not resolved
+ * and later populate it. */
+ size_t pel_size = rdbLoadLen(rdb,NULL);
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ rdbLoadRaw(rdb,rawid,sizeof(rawid));
+ streamNACK *nack = streamCreateNACK(NULL);
+ nack->delivery_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+ nack->delivery_count = rdbLoadLen(rdb,NULL);
+ if (!raxInsert(cgroup->pel,rawid,sizeof(rawid),nack,NULL))
+ rdbExitReportCorruptRDB("Duplicated gobal PEL entry "
+ "loading stream consumer group");
+ }
+
+ /* Now that we loaded our global PEL, we need to load the
+ * consumers and their local PELs. */
+ size_t consumers_num = rdbLoadLen(rdb,NULL);
+ while(consumers_num--) {
+ sds cname = rdbGenericLoadStringObject(rdb,RDB_LOAD_SDS,NULL);
+ if (cname == NULL) {
+ rdbExitReportCorruptRDB(
+ "Error reading the consumer name from Stream group");
+ }
+ streamConsumer *consumer = streamLookupConsumer(cgroup,cname,
+ 1);
+ sdsfree(cname);
+ consumer->seen_time = rdbLoadMillisecondTime(rdb,RDB_VERSION);
+
+ /* Load the PEL about entries owned by this specific
+ * consumer. */
+ pel_size = rdbLoadLen(rdb,NULL);
+ while(pel_size--) {
+ unsigned char rawid[sizeof(streamID)];
+ rdbLoadRaw(rdb,rawid,sizeof(rawid));
+ streamNACK *nack = raxFind(cgroup->pel,rawid,sizeof(rawid));
+ if (nack == raxNotFound)
+ rdbExitReportCorruptRDB("Consumer entry not found in "
+ "group global PEL");
+
+ /* Set the NACK consumer, that was left to NULL when
+ * loading the global PEL. Then set the same shared
+ * NACK structure also in the consumer-specific PEL. */
+ nack->consumer = consumer;
+ if (!raxInsert(consumer->pel,rawid,sizeof(rawid),nack,NULL))
+ rdbExitReportCorruptRDB("Duplicated consumer PEL entry "
+ " loading a stream consumer "
+ "group");
+ }
+ }
+ }
+ } else if (rdbtype == RDB_TYPE_MODULE || rdbtype == RDB_TYPE_MODULE_2) {
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+ char name[10];
+
+ if (rdbCheckMode && rdbtype == RDB_TYPE_MODULE_2) {
+ moduleTypeNameByID(name,moduleid);
+ return rdbLoadCheckModuleValue(rdb,name);
+ }
+
+ if (mt == NULL) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module data I can't load: no matching module '%s'", name);
+ exit(1);
+ }
+ RedisModuleIO io;
+ moduleInitIOContext(io,mt,rdb);
+ io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2;
+ /* Call the rdb_load method of the module providing the 10 bit
+ * encoding version in the lower 10 bits of the module ID. */
+ void *ptr = mt->rdb_load(&io,moduleid&1023);
+ if (io.ctx) {
+ moduleFreeContext(io.ctx);
+ zfree(io.ctx);
+ }
+
+ /* Module v2 serialization has an EOF mark at the end. */
+ if (io.ver == 2) {
+ uint64_t eof = rdbLoadLen(rdb,NULL);
+ if (eof != RDB_MODULE_OPCODE_EOF) {
+ serverLog(LL_WARNING,"The RDB file contains module data for the module '%s' that is not terminated by the proper module value EOF marker", name);
+ exit(1);
+ }
+ }
+
+ if (ptr == NULL) {
+ moduleTypeNameByID(name,moduleid);
+ serverLog(LL_WARNING,"The RDB file contains module data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);
+ exit(1);
+ }
+ o = createModuleObject(mt,ptr);
} else {
- redisPanic("Unknown object type");
+ rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
}
return o;
}
@@ -1213,201 +1835,281 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
* our cached time since it is used to create and update the last
* interaction time with clients and for other important things. */
updateCachedTime();
- if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER)
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER)
replicationSendNewlineToMaster();
loadingProgress(r->processed_bytes);
processEventsWhileBlocked();
}
}
-int rdbLoad(char *filename) {
- uint32_t dbid;
+/* Load an RDB file from the rio stream 'rdb'. On success C_OK is returned,
+ * otherwise C_ERR is returned and 'errno' is set accordingly. */
+int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
+ uint64_t dbid;
int type, rdbver;
redisDb *db = server.db+0;
char buf[1024];
- long long expiretime, now = mstime();
- FILE *fp;
- rio rdb;
- if ((fp = fopen(filename,"r")) == NULL) return REDIS_ERR;
-
- rioInitWithFile(&rdb,fp);
- rdb.update_cksum = rdbLoadProgressCallback;
- rdb.max_processing_chunk = server.loading_process_events_interval_bytes;
- if (rioRead(&rdb,buf,9) == 0) goto eoferr;
+ rdb->update_cksum = rdbLoadProgressCallback;
+ rdb->max_processing_chunk = server.loading_process_events_interval_bytes;
+ if (rioRead(rdb,buf,9) == 0) goto eoferr;
buf[9] = '\0';
if (memcmp(buf,"REDIS",5) != 0) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
+ serverLog(LL_WARNING,"Wrong signature trying to load DB from file");
errno = EINVAL;
- return REDIS_ERR;
+ return C_ERR;
}
rdbver = atoi(buf+5);
- if (rdbver < 1 || rdbver > REDIS_RDB_VERSION) {
- fclose(fp);
- redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ serverLog(LL_WARNING,"Can't handle RDB format version %d",rdbver);
errno = EINVAL;
- return REDIS_ERR;
+ return C_ERR;
}
- startLoading(fp);
+ /* Key-specific attributes, set by opcodes before the key type. */
+ long long lru_idle = -1, lfu_freq = -1, expiretime = -1, now = mstime();
+ long long lru_clock = LRU_CLOCK();
+
while(1) {
robj *key, *val;
- expiretime = -1;
/* Read type. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+ if ((type = rdbLoadType(rdb)) == -1) goto eoferr;
/* Handle special types. */
- if (type == REDIS_RDB_OPCODE_EXPIRETIME) {
+ if (type == RDB_OPCODE_EXPIRETIME) {
/* EXPIRETIME: load an expire associated with the next key
* to load. Note that after loading an expire we need to
* load the actual type, and continue. */
- if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
- /* We read the time so we need to read the object type again. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- /* the EXPIRETIME opcode specifies time in seconds, so convert
- * into milliseconds. */
+ expiretime = rdbLoadTime(rdb);
expiretime *= 1000;
- } else if (type == REDIS_RDB_OPCODE_EXPIRETIME_MS) {
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
/* EXPIRETIME_MS: milliseconds precision expire times introduced
* with RDB v3. Like EXPIRETIME but no with more precision. */
- if ((expiretime = rdbLoadMillisecondTime(&rdb)) == -1) goto eoferr;
- /* We read the time so we need to read the object type again. */
- if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
- } else if (type == REDIS_RDB_OPCODE_EOF) {
+ expiretime = rdbLoadMillisecondTime(rdb,rdbver);
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_FREQ) {
+ /* FREQ: LFU frequency. */
+ uint8_t byte;
+ if (rioRead(rdb,&byte,1) == 0) goto eoferr;
+ lfu_freq = byte;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_IDLE) {
+ /* IDLE: LRU idle time. */
+ uint64_t qword;
+ if ((qword = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
+ lru_idle = qword;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EOF) {
/* EOF: End of file, exit the main loop. */
break;
- } else if (type == REDIS_RDB_OPCODE_SELECTDB) {
+ } else if (type == RDB_OPCODE_SELECTDB) {
/* SELECTDB: Select the specified database. */
- if ((dbid = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
- goto eoferr;
+ if ((dbid = rdbLoadLen(rdb,NULL)) == RDB_LENERR) goto eoferr;
if (dbid >= (unsigned)server.dbnum) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"FATAL: Data file was created with a Redis "
"server configured to handle more than %d "
"databases. Exiting\n", server.dbnum);
exit(1);
}
db = server.db+dbid;
- continue; /* Read type again. */
- } else if (type == REDIS_RDB_OPCODE_RESIZEDB) {
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
/* RESIZEDB: Hint about the size of the keys in the currently
* selected data base, in order to avoid useless rehashing. */
- uint32_t db_size, expires_size;
- if ((db_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ uint64_t db_size, expires_size;
+ if ((db_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
goto eoferr;
- if ((expires_size = rdbLoadLen(&rdb,NULL)) == REDIS_RDB_LENERR)
+ if ((expires_size = rdbLoadLen(rdb,NULL)) == RDB_LENERR)
goto eoferr;
dictExpand(db->dict,db_size);
dictExpand(db->expires,expires_size);
- continue; /* Read type again. */
- } else if (type == REDIS_RDB_OPCODE_AUX) {
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_AUX) {
/* AUX: generic string-string fields. Use to add state to RDB
* which is backward compatible. Implementations of RDB loading
* are requierd to skip AUX fields they don't understand.
*
* An AUX field is composed of two strings: key and value. */
robj *auxkey, *auxval;
- if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
- if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxkey = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
if (((char*)auxkey->ptr)[0] == '%') {
/* All the fields with a name staring with '%' are considered
* information fields and are logged at startup with a log
* level of NOTICE. */
- redisLog(REDIS_NOTICE,"RDB '%s': %s", auxkey->ptr, auxval->ptr);
+ serverLog(LL_NOTICE,"RDB '%s': %s",
+ (char*)auxkey->ptr,
+ (char*)auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-stream-db")) {
+ if (rsi) rsi->repl_stream_db = atoi(auxval->ptr);
+ } else if (!strcasecmp(auxkey->ptr,"repl-id")) {
+ if (rsi && sdslen(auxval->ptr) == CONFIG_RUN_ID_SIZE) {
+ memcpy(rsi->repl_id,auxval->ptr,CONFIG_RUN_ID_SIZE+1);
+ rsi->repl_id_is_set = 1;
+ }
+ } else if (!strcasecmp(auxkey->ptr,"repl-offset")) {
+ if (rsi) rsi->repl_offset = strtoll(auxval->ptr,NULL,10);
+ } else if (!strcasecmp(auxkey->ptr,"lua")) {
+ /* Load the script back in memory. */
+ if (luaCreateFunction(NULL,server.lua,auxval) == NULL) {
+ rdbExitReportCorruptRDB(
+ "Can't load Lua script from RDB file! "
+ "BODY: %s", auxval->ptr);
+ }
} else {
/* We ignore fields we don't understand, as by AUX field
* contract. */
- redisLog(REDIS_DEBUG,"Unrecognized RDB AUX field: '%s'",
- auxkey->ptr);
+ serverLog(LL_DEBUG,"Unrecognized RDB AUX field: '%s'",
+ (char*)auxkey->ptr);
}
decrRefCount(auxkey);
decrRefCount(auxval);
continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_MODULE_AUX) {
+ /* This is just for compatibility with the future: we have plans
+ * to add the ability for modules to store anything in the RDB
+ * file, like data that is not related to the Redis key space.
+ * Such data will potentially be stored both before and after the
+ * RDB keys-values section. For this reason since RDB version 9,
+ * we have the ability to read a MODULE_AUX opcode followed by an
+ * identifier of the module, and a serialized value in "MODULE V2"
+ * format. */
+ uint64_t moduleid = rdbLoadLen(rdb,NULL);
+ moduleType *mt = moduleTypeLookupModuleByID(moduleid);
+ char name[10];
+ moduleTypeNameByID(name,moduleid);
+
+ if (!rdbCheckMode && mt == NULL) {
+ /* Unknown module. */
+ serverLog(LL_WARNING,"The RDB file contains AUX module data I can't load: no matching module '%s'", name);
+ exit(1);
+ } else if (!rdbCheckMode && mt != NULL) {
+ /* This version of Redis actually does not know what to do
+ * with modules AUX data... */
+ serverLog(LL_WARNING,"The RDB file contains AUX module data I can't load for the module '%s'. Probably you want to use a newer version of Redis which implements aux data callbacks", name);
+ exit(1);
+ } else {
+ /* RDB check mode. */
+ robj *aux = rdbLoadCheckModuleValue(rdb,name);
+ decrRefCount(aux);
+ }
}
/* Read key */
- if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr;
/* Read value */
- if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
+ if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr;
/* Check if the key already expired. This function is used when loading
* an RDB file from disk, either at startup, or when an RDB was
* received from the master. In the latter case, the master is
* responsible for key expiry. If we would expire keys here, the
* snapshot taken by the master may not be reflected on the slave. */
- if (server.masterhost == NULL && expiretime != -1 && expiretime < now) {
+ if (server.masterhost == NULL && !loading_aof && expiretime != -1 && expiretime < now) {
decrRefCount(key);
decrRefCount(val);
- continue;
- }
- /* Add the new object in the hash table */
- dbAdd(db,key,val);
+ } else {
+ /* Add the new object in the hash table */
+ dbAdd(db,key,val);
- /* Set the expire time if needed */
- if (expiretime != -1) setExpire(db,key,expiretime);
+ /* Set the expire time if needed */
+ if (expiretime != -1) setExpire(NULL,db,key,expiretime);
+
+ /* Set usage information (for eviction). */
+ objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock);
- decrRefCount(key);
+ /* Decrement the key refcount since dbAdd() will take its
+ * own reference. */
+ decrRefCount(key);
+ }
+
+ /* Reset the state that is key-specified and is populated by
+ * opcodes before the key, so that we start from scratch again. */
+ expiretime = -1;
+ lfu_freq = -1;
+ lru_idle = -1;
}
/* Verify the checksum if RDB version is >= 5 */
- if (rdbver >= 5 && server.rdb_checksum) {
- uint64_t cksum, expected = rdb.cksum;
-
- if (rioRead(&rdb,&cksum,8) == 0) goto eoferr;
- memrev64ifbe(&cksum);
- if (cksum == 0) {
- redisLog(REDIS_WARNING,"RDB file was saved with checksum disabled: no check performed.");
- } else if (cksum != expected) {
- redisLog(REDIS_WARNING,"Wrong RDB checksum. Aborting now.");
- exit(1);
+ if (rdbver >= 5) {
+ uint64_t cksum, expected = rdb->cksum;
+
+ if (rioRead(rdb,&cksum,8) == 0) goto eoferr;
+ if (server.rdb_checksum) {
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ serverLog(LL_WARNING,"RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ serverLog(LL_WARNING,"Wrong RDB checksum. Aborting now.");
+ rdbExitReportCorruptRDB("RDB CRC error");
+ }
}
}
+ return C_OK;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ serverLog(LL_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
+ rdbExitReportCorruptRDB("Unexpected EOF reading RDB file");
+ return C_ERR; /* Just to avoid warning */
+}
+/* Like rdbLoadRio() but takes a filename instead of a rio stream. The
+ * filename is open for reading and a rio stream object created in order
+ * to do the actual loading. Moreover the ETA displayed in the INFO
+ * output is initialized and finalized.
+ *
+ * If you pass an 'rsi' structure initialied with RDB_SAVE_OPTION_INIT, the
+ * loading code will fiil the information fields in the structure. */
+int rdbLoad(char *filename, rdbSaveInfo *rsi) {
+ FILE *fp;
+ rio rdb;
+ int retval;
+
+ if ((fp = fopen(filename,"r")) == NULL) return C_ERR;
+ startLoading(fp);
+ rioInitWithFile(&rdb,fp);
+ retval = rdbLoadRio(&rdb,rsi,0);
fclose(fp);
stopLoading();
- return REDIS_OK;
-
-eoferr: /* unexpected end of file is handled here with a fatal exit */
- redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
- exit(1);
- return REDIS_ERR; /* Just to avoid warning */
+ return retval;
}
/* A background saving child (BGSAVE) terminated its work. Handle this.
* This function covers the case of actual BGSAVEs. */
void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
if (!bysignal && exitcode == 0) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background saving terminated with success");
server.dirty = server.dirty - server.dirty_before_bgsave;
server.lastsave = time(NULL);
- server.lastbgsave_status = REDIS_OK;
+ server.lastbgsave_status = C_OK;
} else if (!bysignal && exitcode != 0) {
- redisLog(REDIS_WARNING, "Background saving error");
- server.lastbgsave_status = REDIS_ERR;
+ serverLog(LL_WARNING, "Background saving error");
+ server.lastbgsave_status = C_ERR;
} else {
mstime_t latency;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Background saving terminated by signal %d", bysignal);
latencyStartMonitor(latency);
rdbRemoveTempFile(server.rdb_child_pid);
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency);
/* SIGUSR1 is whitelisted, so we have a way to kill a child without
- * tirggering an error conditon. */
+ * tirggering an error condition. */
if (bysignal != SIGUSR1)
- server.lastbgsave_status = REDIS_ERR;
+ server.lastbgsave_status = C_ERR;
}
server.rdb_child_pid = -1;
- server.rdb_child_type = REDIS_RDB_CHILD_TYPE_NONE;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start;
server.rdb_save_time_start = -1;
/* Possibly there are slaves waiting for a BGSAVE in order to be served
* (the first stage of SYNC is a bulk transfer of dump.rdb) */
- updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? REDIS_OK : REDIS_ERR, REDIS_RDB_CHILD_TYPE_DISK);
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, RDB_CHILD_TYPE_DISK);
}
/* A background saving child (BGSAVE) terminated its work. Handle this.
@@ -1417,16 +2119,16 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
uint64_t *ok_slaves;
if (!bysignal && exitcode == 0) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Background RDB transfer terminated with success");
} else if (!bysignal && exitcode != 0) {
- redisLog(REDIS_WARNING, "Background transfer error");
+ serverLog(LL_WARNING, "Background transfer error");
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Background transfer terminated by signal %d", bysignal);
}
server.rdb_child_pid = -1;
- server.rdb_child_type = REDIS_RDB_CHILD_TYPE_NONE;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_save_time_start = -1;
/* If the child returns an OK exit code, read the set of slave client
@@ -1434,7 +2136,7 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
* in error state.
*
* If the process returned an error, consider the list of slaves that
- * can continue to be emtpy, so that it's just a special case of the
+ * can continue to be empty, so that it's just a special case of the
* normal code path. */
ok_slaves = zmalloc(sizeof(uint64_t)); /* Make space for the count. */
ok_slaves[0] = 0;
@@ -1468,9 +2170,9 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
uint64_t j;
int errorcode = 0;
@@ -1484,14 +2186,14 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
}
}
if (j == ok_slaves[0] || errorcode != 0) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Closing slave %s: child->slave RDB transfer failed: %s",
replicationGetSlaveName(slave),
(errorcode == 0) ? "RDB transfer child aborted"
: strerror(errorcode));
freeClient(slave);
} else {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Slave %s correctly received the streamed RDB file.",
replicationGetSlaveName(slave));
/* Restore the socket as non-blocking. */
@@ -1502,27 +2204,27 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
}
zfree(ok_slaves);
- updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? REDIS_OK : REDIS_ERR, REDIS_RDB_CHILD_TYPE_SOCKET);
+ updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, RDB_CHILD_TYPE_SOCKET);
}
/* When a background RDB saving/transfer terminates, call the right handler. */
void backgroundSaveDoneHandler(int exitcode, int bysignal) {
switch(server.rdb_child_type) {
- case REDIS_RDB_CHILD_TYPE_DISK:
+ case RDB_CHILD_TYPE_DISK:
backgroundSaveDoneHandlerDisk(exitcode,bysignal);
break;
- case REDIS_RDB_CHILD_TYPE_SOCKET:
+ case RDB_CHILD_TYPE_SOCKET:
backgroundSaveDoneHandlerSocket(exitcode,bysignal);
break;
default:
- redisPanic("Unknown RDB child type.");
+ serverPanic("Unknown RDB child type.");
break;
}
}
/* Spawn an RDB child that writes the RDB to the sockets of the slaves
- * that are currently in REDIS_REPL_WAIT_BGSAVE_START state. */
-int rdbSaveToSlavesSockets(void) {
+ * that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
+int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
int *fds;
uint64_t *clientids;
int numfds;
@@ -1532,12 +2234,12 @@ int rdbSaveToSlavesSockets(void) {
long long start;
int pipefds[2];
- if (server.rdb_child_pid != -1) return REDIS_ERR;
+ if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
/* Before to fork, create a pipe that will be used in order to
* send back to the parent the IDs of the slaves that successfully
* received all the writes. */
- if (pipe(pipefds) == -1) return REDIS_ERR;
+ if (pipe(pipefds) == -1) return C_ERR;
server.rdb_pipe_read_result_from_child = pipefds[0];
server.rdb_pipe_write_result_to_parent = pipefds[1];
@@ -1552,13 +2254,13 @@ int rdbSaveToSlavesSockets(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
clientids[numfds] = slave->id;
fds[numfds++] = slave->fd;
- slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- /* Put the socket in non-blocking mode to simplify RDB transfer.
+ replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
+ /* Put the socket in blocking mode to simplify RDB transfer.
* We'll restore it when the children returns (since duped socket
* will share the O_NONBLOCK attribute with the parent). */
anetBlock(NULL,slave->fd);
@@ -1567,6 +2269,7 @@ int rdbSaveToSlavesSockets(void) {
}
/* Create the child process. */
+ openChildInfoPipe();
start = ustime();
if ((childpid = fork()) == 0) {
/* Child */
@@ -1579,19 +2282,22 @@ int rdbSaveToSlavesSockets(void) {
closeListeningSockets(0);
redisSetProcTitle("redis-rdb-to-slaves");
- retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL);
- if (retval == REDIS_OK && rioFlush(&slave_sockets) == 0)
- retval = REDIS_ERR;
+ retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL,rsi);
+ if (retval == C_OK && rioFlush(&slave_sockets) == 0)
+ retval = C_ERR;
- if (retval == REDIS_OK) {
- size_t private_dirty = zmalloc_get_private_dirty();
+ if (retval == C_OK) {
+ size_t private_dirty = zmalloc_get_private_dirty(-1);
if (private_dirty) {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"RDB: %zu MB of memory used by copy-on-write",
private_dirty/(1024*1024));
}
+ server.child_info_data.cow_size = private_dirty;
+ sendChildInfo(CHILD_INFO_TYPE_RDB);
+
/* If we are returning OK, at least one slave was served
* with the RDB file as expected, so we need to send a report
* to the parent via the pipe. The format of the message is:
@@ -1627,57 +2333,152 @@ int rdbSaveToSlavesSockets(void) {
write(server.rdb_pipe_write_result_to_parent,msg,msglen)
!= msglen)
{
- retval = REDIS_ERR;
+ retval = C_ERR;
}
zfree(msg);
}
zfree(clientids);
- exitFromChild((retval == REDIS_OK) ? 0 : 1);
+ rioFreeFdset(&slave_sockets);
+ exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
- zfree(clientids); /* Not used by parent. Free ASAP. */
- server.stat_fork_time = ustime()-start;
- server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
- latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
if (childpid == -1) {
- redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
+ serverLog(LL_WARNING,"Can't save in background: fork: %s",
strerror(errno));
- zfree(fds);
+
+ /* Undo the state change. The caller will perform cleanup on
+ * all the slaves in BGSAVE_START state, but an early call to
+ * replicationSetupSlaveForFullResync() turned it into BGSAVE_END */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+ int j;
+
+ for (j = 0; j < numfds; j++) {
+ if (slave->id == clientids[j]) {
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ break;
+ }
+ }
+ }
close(pipefds[0]);
close(pipefds[1]);
- return REDIS_ERR;
+ closeChildInfoPipe();
+ } else {
+ server.stat_fork_time = ustime()-start;
+ server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
+ latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
+
+ serverLog(LL_NOTICE,"Background RDB transfer started by pid %d",
+ childpid);
+ server.rdb_save_time_start = time(NULL);
+ server.rdb_child_pid = childpid;
+ server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
+ updateDictResizePolicy();
}
- redisLog(REDIS_NOTICE,"Background RDB transfer started by pid %d",childpid);
- server.rdb_save_time_start = time(NULL);
- server.rdb_child_pid = childpid;
- server.rdb_child_type = REDIS_RDB_CHILD_TYPE_SOCKET;
- updateDictResizePolicy();
+ zfree(clientids);
zfree(fds);
- return REDIS_OK;
+ return (childpid == -1) ? C_ERR : C_OK;
}
- return REDIS_OK; /* unreached */
+ return C_OK; /* Unreached. */
}
-void saveCommand(redisClient *c) {
+void saveCommand(client *c) {
if (server.rdb_child_pid != -1) {
addReplyError(c,"Background save already in progress");
return;
}
- if (rdbSave(server.rdb_filename) == REDIS_OK) {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(server.rdb_filename,rsiptr) == C_OK) {
addReply(c,shared.ok);
} else {
addReply(c,shared.err);
}
}
-void bgsaveCommand(redisClient *c) {
+/* BGSAVE [SCHEDULE] */
+void bgsaveCommand(client *c) {
+ int schedule = 0;
+
+ /* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite
+ * is in progress. Instead of returning an error a BGSAVE gets scheduled. */
+ if (c->argc > 1) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
+ schedule = 1;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+
if (server.rdb_child_pid != -1) {
addReplyError(c,"Background save already in progress");
} else if (server.aof_child_pid != -1) {
- addReplyError(c,"Can't BGSAVE while AOF log rewriting is in progress");
- } else if (rdbSaveBackground(server.rdb_filename) == REDIS_OK) {
+ if (schedule) {
+ server.rdb_bgsave_scheduled = 1;
+ addReplyStatus(c,"Background saving scheduled");
+ } else {
+ addReplyError(c,
+ "An AOF log rewriting in progress: can't BGSAVE right now. "
+ "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
+ "possible.");
+ }
+ } else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) {
addReplyStatus(c,"Background saving started");
} else {
addReply(c,shared.err);
}
}
+
+/* Populate the rdbSaveInfo structure used to persist the replication
+ * information inside the RDB file. Currently the structure explicitly
+ * contains just the currently selected DB from the master stream, however
+ * if the rdbSave*() family functions receive a NULL rsi structure also
+ * the Replication ID/offset is not saved. The function popultes 'rsi'
+ * that is normally stack-allocated in the caller, returns the populated
+ * pointer if the instance has a valid master client, otherwise NULL
+ * is returned, and the RDB saving will not persist any replication related
+ * information. */
+rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
+ rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT;
+ *rsi = rsi_init;
+
+ /* If the instance is a master, we can populate the replication info
+ * only when repl_backlog is not NULL. If the repl_backlog is NULL,
+ * it means that the instance isn't in any replication chains. In this
+ * scenario the replication info is useless, because when a slave
+ * connects to us, the NULL repl_backlog will trigger a full
+ * synchronization, at the same time we will use a new replid and clear
+ * replid2. */
+ if (!server.masterhost && server.repl_backlog) {
+ /* Note that when server.slaveseldb is -1, it means that this master
+ * didn't apply any write commands after a full synchronization.
+ * So we can let repl_stream_db be 0, this allows a restarted slave
+ * to reload replication ID/offset, it's safe because the next write
+ * command must generate a SELECT statement. */
+ rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
+ return rsi;
+ }
+
+ /* If the instance is a slave we need a connected master
+ * in order to fetch the currently selected DB. */
+ if (server.master) {
+ rsi->repl_stream_db = server.master->db->id;
+ return rsi;
+ }
+
+ /* If we have a cached master we can use it in order to populate the
+ * replication selected DB info inside the RDB file: the slave can
+ * increment the master_repl_offset only from data arriving from the
+ * master, so if we are disconnected the offset in the cached master
+ * is valid. */
+ if (server.cached_master) {
+ rsi->repl_stream_db = server.cached_master->db->id;
+ return rsi;
+ }
+ return NULL;
+}
diff --git a/src/rdb.h b/src/rdb.h
index 6319f5d02..7b9486169 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -27,94 +27,133 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __REDIS_RDB_H
-#define __REDIS_RDB_H
+#ifndef __RDB_H
+#define __RDB_H
#include <stdio.h>
#include "rio.h"
/* TBD: include only necessary headers. */
-#include "redis.h"
+#include "server.h"
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define REDIS_RDB_VERSION 7
+#define RDB_VERSION 9
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
* the first byte to interpreter the length:
*
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
+ * 00|XXXXXX => if the two MSB are 00 the len is the 6 bits of this byte
+ * 01|XXXXXX XXXXXXXX => 01, the len is 14 byes, 6 bits + 8 bits of next byte
+ * 10|000000 [32 bit integer] => A full 32 bit len in net byte order will follow
+ * 10|000001 [64 bit integer] => A full 64 bit len in net byte order will follow
+ * 11|OBKIND this means: specially encoded object will follow. The six bits
* number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
+ * See the RDB_ENC_* defines.
*
* Lengths up to 63 are stored using a single byte, most DB keys, and may
* values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
+#define RDB_6BITLEN 0
+#define RDB_14BITLEN 1
+#define RDB_32BITLEN 0x80
+#define RDB_64BITLEN 0x81
+#define RDB_ENCVAL 3
+#define RDB_LENERR UINT64_MAX
/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
+ * set, the remaining six bits specify a special encoding for the object
* accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
+#define RDB_ENC_INT8 0 /* 8 bit signed integer */
+#define RDB_ENC_INT16 1 /* 16 bit signed integer */
+#define RDB_ENC_INT32 2 /* 32 bit signed integer */
+#define RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-/* Dup object types to RDB object types. Only reason is readability (are we
- * dealing with RDB types or with in-memory object types?). */
-#define REDIS_RDB_TYPE_STRING 0
-#define REDIS_RDB_TYPE_LIST 1
-#define REDIS_RDB_TYPE_SET 2
-#define REDIS_RDB_TYPE_ZSET 3
-#define REDIS_RDB_TYPE_HASH 4
+/* Map object types to RDB object types. Macros starting with OBJ_ are for
+ * memory storage and may change. Instead RDB types must be fixed because
+ * we store them on disk. */
+#define RDB_TYPE_STRING 0
+#define RDB_TYPE_LIST 1
+#define RDB_TYPE_SET 2
+#define RDB_TYPE_ZSET 3
+#define RDB_TYPE_HASH 4
+#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */
+#define RDB_TYPE_MODULE 6
+#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without
+ the generating module being loaded. */
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Object types for encoded objects. */
-#define REDIS_RDB_TYPE_HASH_ZIPMAP 9
-#define REDIS_RDB_TYPE_LIST_ZIPLIST 10
-#define REDIS_RDB_TYPE_SET_INTSET 11
-#define REDIS_RDB_TYPE_ZSET_ZIPLIST 12
-#define REDIS_RDB_TYPE_HASH_ZIPLIST 13
-#define REDIS_RDB_TYPE_LIST_QUICKLIST 14
+#define RDB_TYPE_HASH_ZIPMAP 9
+#define RDB_TYPE_LIST_ZIPLIST 10
+#define RDB_TYPE_SET_INTSET 11
+#define RDB_TYPE_ZSET_ZIPLIST 12
+#define RDB_TYPE_HASH_ZIPLIST 13
+#define RDB_TYPE_LIST_QUICKLIST 14
+#define RDB_TYPE_STREAM_LISTPACKS 15
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) ((t >= 0 && t <= 4) || (t >= 9 && t <= 14))
+#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 15))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
-#define REDIS_RDB_OPCODE_AUX 250
-#define REDIS_RDB_OPCODE_RESIZEDB 251
-#define REDIS_RDB_OPCODE_EXPIRETIME_MS 252
-#define REDIS_RDB_OPCODE_EXPIRETIME 253
-#define REDIS_RDB_OPCODE_SELECTDB 254
-#define REDIS_RDB_OPCODE_EOF 255
+#define RDB_OPCODE_MODULE_AUX 247 /* Module auxiliary data. */
+#define RDB_OPCODE_IDLE 248 /* LRU idle time. */
+#define RDB_OPCODE_FREQ 249 /* LFU frequency. */
+#define RDB_OPCODE_AUX 250 /* RDB aux field. */
+#define RDB_OPCODE_RESIZEDB 251 /* Hash table resize hint. */
+#define RDB_OPCODE_EXPIRETIME_MS 252 /* Expire time in milliseconds. */
+#define RDB_OPCODE_EXPIRETIME 253 /* Old expire time in seconds. */
+#define RDB_OPCODE_SELECTDB 254 /* DB number of the following keys. */
+#define RDB_OPCODE_EOF 255 /* End of the RDB file. */
+
+/* Module serialized values sub opcodes */
+#define RDB_MODULE_OPCODE_EOF 0 /* End of module value. */
+#define RDB_MODULE_OPCODE_SINT 1 /* Signed integer. */
+#define RDB_MODULE_OPCODE_UINT 2 /* Unsigned integer. */
+#define RDB_MODULE_OPCODE_FLOAT 3 /* Float. */
+#define RDB_MODULE_OPCODE_DOUBLE 4 /* Double. */
+#define RDB_MODULE_OPCODE_STRING 5 /* String. */
+
+/* rdbLoad...() functions flags. */
+#define RDB_LOAD_NONE 0
+#define RDB_LOAD_ENC (1<<0)
+#define RDB_LOAD_PLAIN (1<<1)
+#define RDB_LOAD_SDS (1<<2)
+
+#define RDB_SAVE_NONE 0
+#define RDB_SAVE_AOF_PREAMBLE (1<<0)
int rdbSaveType(rio *rdb, unsigned char type);
int rdbLoadType(rio *rdb);
int rdbSaveTime(rio *rdb, time_t t);
time_t rdbLoadTime(rio *rdb);
-int rdbSaveLen(rio *rdb, uint32_t len);
-uint32_t rdbLoadLen(rio *rdb, int *isencoded);
+int rdbSaveLen(rio *rdb, uint64_t len);
+int rdbSaveMillisecondTime(rio *rdb, long long t);
+long long rdbLoadMillisecondTime(rio *rdb, int rdbver);
+uint64_t rdbLoadLen(rio *rdb, int *isencoded);
+int rdbLoadLenByRef(rio *rdb, int *isencoded, uint64_t *lenptr);
int rdbSaveObjectType(rio *rdb, robj *o);
int rdbLoadObjectType(rio *rdb);
-int rdbLoad(char *filename);
-int rdbSaveBackground(char *filename);
-int rdbSaveToSlavesSockets(void);
+int rdbLoad(char *filename, rdbSaveInfo *rsi);
+int rdbSaveBackground(char *filename, rdbSaveInfo *rsi);
+int rdbSaveToSlavesSockets(rdbSaveInfo *rsi);
void rdbRemoveTempFile(pid_t childpid);
-int rdbSave(char *filename);
-int rdbSaveObject(rio *rdb, robj *o);
-off_t rdbSavedObjectLen(robj *o);
-off_t rdbSavedObjectPages(robj *o);
+int rdbSave(char *filename, rdbSaveInfo *rsi);
+ssize_t rdbSaveObject(rio *rdb, robj *o);
+size_t rdbSavedObjectLen(robj *o);
robj *rdbLoadObject(int type, rio *rdb);
void backgroundSaveDoneHandler(int exitcode, int bysignal);
-int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime, long long now);
+int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime);
robj *rdbLoadStringObject(rio *rdb);
+ssize_t rdbSaveStringObject(rio *rdb, robj *obj);
+ssize_t rdbSaveRawString(rio *rdb, unsigned char *s, size_t len);
+void *rdbGenericLoadStringObject(rio *rdb, int flags, size_t *lenptr);
+int rdbSaveBinaryDoubleValue(rio *rdb, double val);
+int rdbLoadBinaryDoubleValue(rio *rdb, double *val);
+int rdbSaveBinaryFloatValue(rio *rdb, float val);
+int rdbLoadBinaryFloatValue(rio *rdb, float *val);
+int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof);
+rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi);
#endif
diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c
index 7567e0181..d30879dc4 100644
--- a/src/redis-benchmark.c
+++ b/src/redis-benchmark.c
@@ -40,13 +40,13 @@
#include <signal.h>
#include <assert.h>
+#include <sds.h> /* Use hiredis sds. */
#include "ae.h"
#include "hiredis.h"
-#include "sds.h"
#include "adlist.h"
#include "zmalloc.h"
-#define REDIS_NOTUSED(V) ((void) V)
+#define UNUSED(V) ((void) V)
#define RANDPTR_INITIAL_SIZE 8
static struct config {
@@ -65,6 +65,7 @@ static struct config {
int randomkeys_keyspacelen;
int keepalive;
int pipeline;
+ int showerrors;
long long start;
long long totlatency;
long long *latency;
@@ -86,7 +87,7 @@ typedef struct _client {
char **randptr; /* Pointers to :rand: strings inside the command buf */
size_t randlen; /* Number of pointers in client->randptr */
size_t randfree; /* Number of unused pointers in client->randptr */
- unsigned int written; /* Bytes of 'obuf' already written */
+ size_t written; /* Bytes of 'obuf' already written */
long long start; /* Start time of a request */
long long latency; /* Request latency */
int pending; /* Number of pending requests (replies to consume) */
@@ -188,9 +189,9 @@ static void clientDone(client c) {
static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
client c = privdata;
void *reply = NULL;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(fd);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
/* Calculate latency only for the first read event. This means that the
* server already sent the reply and we need to parse it. Parsing overhead
@@ -212,6 +213,16 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
exit(1);
}
+ if (config.showerrors) {
+ static time_t lasterr_time = 0;
+ time_t now = time(NULL);
+ redisReply *r = reply;
+ if (r->type == REDIS_REPLY_ERROR && lasterr_time != now) {
+ lasterr_time = now;
+ printf("Error from server: %s\n", r->str);
+ }
+ }
+
freeReplyObject(reply);
/* This is an OK for prefix commands such as auth and select.*/
if (c->prefix_pending > 0) {
@@ -227,7 +238,7 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
c->randptr[j] -= c->prefixlen;
c->prefixlen = 0;
}
- continue;
+ continue;
}
if (config.requests_finished < config.requests)
@@ -246,9 +257,9 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
client c = privdata;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(fd);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(fd);
+ UNUSED(mask);
/* Initialize request when nothing was written. */
if (c->written == 0) {
@@ -266,7 +277,7 @@ static void writeHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (sdslen(c->obuf) > c->written) {
void *ptr = c->obuf+c->written;
- int nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
+ ssize_t nwritten = write(c->context->fd,ptr,sdslen(c->obuf)-c->written);
if (nwritten == -1) {
if (errno != EPIPE)
fprintf(stderr, "Writing to socket: %s\n", strerror(errno));
@@ -518,6 +529,8 @@ int parseOptions(int argc, const char **argv) {
config.loop = 1;
} else if (!strcmp(argv[i],"-I")) {
config.idlemode = 1;
+ } else if (!strcmp(argv[i],"-e")) {
+ config.showerrors = 1;
} else if (!strcmp(argv[i],"-t")) {
if (lastarg) goto invalid;
/* We get the list of tests to run as a string in the form
@@ -552,15 +565,15 @@ invalid:
usage:
printf(
-"Usage: redis-benchmark [-h <host>] [-p <port>] [-c <clients>] [-n <requests]> [-k <boolean>]\n\n"
+"Usage: redis-benchmark [-h <host>] [-p <port>] [-c <clients>] [-n <requests>] [-k <boolean>]\n\n"
" -h <hostname> Server hostname (default 127.0.0.1)\n"
" -p <port> Server port (default 6379)\n"
" -s <socket> Server socket (overrides host and port)\n"
" -a <password> Password for Redis Auth\n"
" -c <clients> Number of parallel connections (default 50)\n"
" -n <requests> Total number of requests (default 100000)\n"
-" -d <size> Data size of SET/GET value in bytes (default 2)\n"
-" -dbnum <db> SELECT the specified db number (default 0)\n"
+" -d <size> Data size of SET/GET value in bytes (default 3)\n"
+" --dbnum <db> SELECT the specified db number (default 0)\n"
" -k <boolean> 1=keep alive 0=reconnect (default 1)\n"
" -r <keyspacelen> Use random keys for SET/GET/INCR, random values for SADD\n"
" Using this option the benchmark will expand the string __rand_int__\n"
@@ -569,6 +582,8 @@ usage:
" is executed. Default tests use this to hit random keys in the\n"
" specified range.\n"
" -P <numreq> Pipeline <numreq> requests. Default 1 (no pipeline).\n"
+" -e If server replies with errors, show them on stdout.\n"
+" (no more than 1 error per second is displayed)\n"
" -q Quiet. Just show query/sec values\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
@@ -595,11 +610,11 @@ usage:
}
int showThroughput(struct aeEventLoop *eventLoop, long long id, void *clientData) {
- REDIS_NOTUSED(eventLoop);
- REDIS_NOTUSED(id);
- REDIS_NOTUSED(clientData);
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
- if (config.liveclients == 0) {
+ if (config.liveclients == 0 && config.requests_finished != config.requests) {
fprintf(stderr,"All clients disconnected... aborting.\n");
exit(1);
}
@@ -649,6 +664,7 @@ int main(int argc, const char **argv) {
config.keepalive = 1;
config.datasize = 3;
config.pipeline = 1;
+ config.showerrors = 0;
config.randomkeys = 0;
config.randomkeys_keyspacelen = 0;
config.quiet = 0;
@@ -763,6 +779,13 @@ int main(int argc, const char **argv) {
free(cmd);
}
+ if (test_is_selected("hset")) {
+ len = redisFormatCommand(&cmd,
+ "HSET myset:__rand_int__ element:__rand_int__ %s",data);
+ benchmark("HSET",cmd,len);
+ free(cmd);
+ }
+
if (test_is_selected("spop")) {
len = redisFormatCommand(&cmd,"SPOP myset");
benchmark("SPOP",cmd,len);
diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c
index 6c8f55279..c4d5a225e 100644
--- a/src/redis-check-aof.c
+++ b/src/redis-check-aof.c
@@ -28,13 +28,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "fmacros.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
+#include "server.h"
#include <sys/stat.h>
-#include "config.h"
#define ERROR(...) { \
char __buf[1024]; \
@@ -60,7 +55,7 @@ int readLong(FILE *fp, char prefix, long *target) {
return 0;
}
if (buf[0] != prefix) {
- ERROR("Expected prefix '%c', got: '%c'",buf[0],prefix);
+ ERROR("Expected prefix '%c', got: '%c'",prefix,buf[0]);
return 0;
}
*target = strtol(buf+1,&eptr,10);
@@ -87,7 +82,7 @@ int readString(FILE *fp, char** target) {
/* Increase length to also consume \r\n */
len += 2;
- *target = (char*)malloc(len);
+ *target = (char*)zmalloc(len);
if (!readBytes(fp,*target,len)) {
return 0;
}
@@ -127,12 +122,12 @@ off_t process(FILE *fp) {
}
}
}
- free(str);
+ zfree(str);
}
/* Stop if the loop did not finish */
if (i < argc) {
- if (str) free(str);
+ if (str) zfree(str);
break;
}
}
@@ -146,7 +141,7 @@ off_t process(FILE *fp) {
return pos;
}
-int main(int argc, char **argv) {
+int redis_check_aof_main(int argc, char **argv) {
char *filename;
int fix = 0;
@@ -185,6 +180,25 @@ int main(int argc, char **argv) {
exit(1);
}
+ /* This AOF file may have an RDB preamble. Check this to start, and if this
+ * is the case, start processing the RDB part. */
+ if (size >= 8) { /* There must be at least room for the RDB header. */
+ char sig[5];
+ int has_preamble = fread(sig,sizeof(sig),1,fp) == 1 &&
+ memcmp(sig,"REDIS",sizeof(sig)) == 0;
+ rewind(fp);
+ if (has_preamble) {
+ printf("The AOF appears to start with an RDB preamble.\n"
+ "Checking the RDB preamble to start:\n");
+ if (redis_check_rdb_main(argc,argv,fp) == C_ERR) {
+ printf("RDB preamble of AOF file is not sane, aborting.\n");
+ exit(1);
+ } else {
+ printf("RDB preamble is OK, proceeding with AOF tail...\n");
+ }
+ }
+ }
+
off_t pos = process(fp);
off_t diff = size-pos;
printf("AOF analyzed: size=%lld, ok_up_to=%lld, diff=%lld\n",
@@ -206,7 +220,8 @@ int main(int argc, char **argv) {
printf("Successfully truncated AOF\n");
}
} else {
- printf("AOF is not valid\n");
+ printf("AOF is not valid. "
+ "Use the --fix option to try fixing it.\n");
exit(1);
}
} else {
@@ -214,5 +229,5 @@ int main(int argc, char **argv) {
}
fclose(fp);
- return 0;
+ exit(0);
}
diff --git a/src/redis-check-dump.c b/src/redis-check-dump.c
deleted file mode 100644
index 546462001..000000000
--- a/src/redis-check-dump.c
+++ /dev/null
@@ -1,771 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <stdint.h>
-#include <limits.h>
-#include "lzf.h"
-#include "crc64.h"
-
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-#define REDIS_HASH_ZIPMAP 9
-#define REDIS_LIST_ZIPLIST 10
-#define REDIS_SET_INTSET 11
-#define REDIS_ZSET_ZIPLIST 12
-#define REDIS_HASH_ZIPLIST 13
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
-#define REDIS_ENCODING_HT 3 /* Encoded as a hash table */
-
-/* Object types only used for dumping to disk */
-#define REDIS_EXPIRETIME_MS 252
-#define REDIS_EXPIRETIME 253
-#define REDIS_SELECTDB 254
-#define REDIS_EOF 255
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
-#define ERROR(...) { \
- printf(__VA_ARGS__); \
- exit(1); \
-}
-
-/* data type to hold offset in file and size */
-typedef struct {
- void *data;
- size_t size;
- size_t offset;
-} pos;
-
-static unsigned char level = 0;
-static pos positions[16];
-
-#define CURR_OFFSET (positions[level].offset)
-
-/* Hold a stack of errors */
-typedef struct {
- char error[16][1024];
- size_t offset[16];
- size_t level;
-} errors_t;
-static errors_t errors;
-
-#define SHIFT_ERROR(provided_offset, ...) { \
- sprintf(errors.error[errors.level], __VA_ARGS__); \
- errors.offset[errors.level] = provided_offset; \
- errors.level++; \
-}
-
-/* Data type to hold opcode with optional key name an success status */
-typedef struct {
- char* key;
- int type;
- char success;
-} entry;
-
-/* Global vars that are actually used as constants. The following double
- * values are used for double on-disk serialization, and are initialized
- * at runtime to avoid strange compiler optimizations. */
-static double R_Zero, R_PosInf, R_NegInf, R_Nan;
-
-#define MAX_TYPES_NUM 256
-#define MAX_TYPE_NAME_LEN 16
-/* store string types for output */
-static char types[MAX_TYPES_NUM][MAX_TYPE_NAME_LEN];
-
-/* Return true if 't' is a valid object type. */
-int checkType(unsigned char t) {
- /* In case a new object type is added, update the following
- * condition as necessary. */
- return
- (t >= REDIS_HASH_ZIPMAP && t <= REDIS_HASH_ZIPLIST) ||
- t <= REDIS_HASH ||
- t >= REDIS_EXPIRETIME_MS;
-}
-
-/* when number of bytes to read is negative, do a peek */
-int readBytes(void *target, long num) {
- char peek = (num < 0) ? 1 : 0;
- num = (num < 0) ? -num : num;
-
- pos p = positions[level];
- if (p.offset + num > p.size) {
- return 0;
- } else {
- memcpy(target, (void*)((size_t)p.data + p.offset), num);
- if (!peek) positions[level].offset += num;
- }
- return 1;
-}
-
-int processHeader(void) {
- char buf[10] = "_________";
- int dump_version;
-
- if (!readBytes(buf, 9)) {
- ERROR("Cannot read header\n");
- }
-
- /* expect the first 5 bytes to equal REDIS */
- if (memcmp(buf,"REDIS",5) != 0) {
- ERROR("Wrong signature in header\n");
- }
-
- dump_version = (int)strtol(buf + 5, NULL, 10);
- if (dump_version < 1 || dump_version > 6) {
- ERROR("Unknown RDB format version: %d\n", dump_version);
- }
- return dump_version;
-}
-
-int loadType(entry *e) {
- uint32_t offset = CURR_OFFSET;
-
- /* this byte needs to qualify as type */
- unsigned char t;
- if (readBytes(&t, 1)) {
- if (checkType(t)) {
- e->type = t;
- return 1;
- } else {
- SHIFT_ERROR(offset, "Unknown type (0x%02x)", t);
- }
- } else {
- SHIFT_ERROR(offset, "Could not read type");
- }
-
- /* failure */
- return 0;
-}
-
-int peekType() {
- unsigned char t;
- if (readBytes(&t, -1) && (checkType(t)))
- return t;
- return -1;
-}
-
-/* discard time, just consume the bytes */
-int processTime(int type) {
- uint32_t offset = CURR_OFFSET;
- unsigned char t[8];
- int timelen = (type == REDIS_EXPIRETIME_MS) ? 8 : 4;
-
- if (readBytes(t,timelen)) {
- return 1;
- } else {
- SHIFT_ERROR(offset, "Could not read time");
- }
-
- /* failure */
- return 0;
-}
-
-uint32_t loadLength(int *isencoded) {
- unsigned char buf[2];
- uint32_t len;
- int type;
-
- if (isencoded) *isencoded = 0;
- if (!readBytes(buf, 1)) return REDIS_RDB_LENERR;
- type = (buf[0] & 0xC0) >> 6;
- if (type == REDIS_RDB_6BITLEN) {
- /* Read a 6 bit len */
- return buf[0] & 0x3F;
- } else if (type == REDIS_RDB_ENCVAL) {
- /* Read a 6 bit len encoding type */
- if (isencoded) *isencoded = 1;
- return buf[0] & 0x3F;
- } else if (type == REDIS_RDB_14BITLEN) {
- /* Read a 14 bit len */
- if (!readBytes(buf+1,1)) return REDIS_RDB_LENERR;
- return ((buf[0] & 0x3F) << 8) | buf[1];
- } else {
- /* Read a 32 bit len */
- if (!readBytes(&len, 4)) return REDIS_RDB_LENERR;
- return (unsigned int)ntohl(len);
- }
-}
-
-char *loadIntegerObject(int enctype) {
- uint32_t offset = CURR_OFFSET;
- unsigned char enc[4];
- long long val;
-
- if (enctype == REDIS_RDB_ENC_INT8) {
- uint8_t v;
- if (!readBytes(enc, 1)) return NULL;
- v = enc[0];
- val = (int8_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT16) {
- uint16_t v;
- if (!readBytes(enc, 2)) return NULL;
- v = enc[0]|(enc[1]<<8);
- val = (int16_t)v;
- } else if (enctype == REDIS_RDB_ENC_INT32) {
- uint32_t v;
- if (!readBytes(enc, 4)) return NULL;
- v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
- val = (int32_t)v;
- } else {
- SHIFT_ERROR(offset, "Unknown integer encoding (0x%02x)", enctype);
- return NULL;
- }
-
- /* convert val into string */
- char *buf;
- buf = malloc(sizeof(char) * 128);
- sprintf(buf, "%lld", val);
- return buf;
-}
-
-char* loadLzfStringObject() {
- unsigned int slen, clen;
- char *c, *s;
-
- if ((clen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
- if ((slen = loadLength(NULL)) == REDIS_RDB_LENERR) return NULL;
-
- c = malloc(clen);
- if (!readBytes(c, clen)) {
- free(c);
- return NULL;
- }
-
- s = malloc(slen+1);
- if (lzf_decompress(c,clen,s,slen) == 0) {
- free(c); free(s);
- return NULL;
- }
-
- free(c);
- return s;
-}
-
-/* returns NULL when not processable, char* when valid */
-char* loadStringObject() {
- uint32_t offset = CURR_OFFSET;
- int isencoded;
- uint32_t len;
-
- len = loadLength(&isencoded);
- if (isencoded) {
- switch(len) {
- case REDIS_RDB_ENC_INT8:
- case REDIS_RDB_ENC_INT16:
- case REDIS_RDB_ENC_INT32:
- return loadIntegerObject(len);
- case REDIS_RDB_ENC_LZF:
- return loadLzfStringObject();
- default:
- /* unknown encoding */
- SHIFT_ERROR(offset, "Unknown string encoding (0x%02x)", len);
- return NULL;
- }
- }
-
- if (len == REDIS_RDB_LENERR) return NULL;
-
- char *buf = malloc(sizeof(char) * (len+1));
- if (buf == NULL) return NULL;
- buf[len] = '\0';
- if (!readBytes(buf, len)) {
- free(buf);
- return NULL;
- }
- return buf;
-}
-
-int processStringObject(char** store) {
- unsigned long offset = CURR_OFFSET;
- char *key = loadStringObject();
- if (key == NULL) {
- SHIFT_ERROR(offset, "Error reading string object");
- free(key);
- return 0;
- }
-
- if (store != NULL) {
- *store = key;
- } else {
- free(key);
- }
- return 1;
-}
-
-double* loadDoubleValue() {
- char buf[256];
- unsigned char len;
- double* val;
-
- if (!readBytes(&len,1)) return NULL;
-
- val = malloc(sizeof(double));
- switch(len) {
- case 255: *val = R_NegInf; return val;
- case 254: *val = R_PosInf; return val;
- case 253: *val = R_Nan; return val;
- default:
- if (!readBytes(buf, len)) {
- free(val);
- return NULL;
- }
- buf[len] = '\0';
- sscanf(buf, "%lg", val);
- return val;
- }
-}
-
-int processDoubleValue(double** store) {
- unsigned long offset = CURR_OFFSET;
- double *val = loadDoubleValue();
- if (val == NULL) {
- SHIFT_ERROR(offset, "Error reading double value");
- free(val);
- return 0;
- }
-
- if (store != NULL) {
- *store = val;
- } else {
- free(val);
- }
- return 1;
-}
-
-int loadPair(entry *e) {
- uint32_t offset = CURR_OFFSET;
- uint32_t i;
-
- /* read key first */
- char *key;
- if (processStringObject(&key)) {
- e->key = key;
- } else {
- SHIFT_ERROR(offset, "Error reading entry key");
- return 0;
- }
-
- uint32_t length = 0;
- if (e->type == REDIS_LIST ||
- e->type == REDIS_SET ||
- e->type == REDIS_ZSET ||
- e->type == REDIS_HASH) {
- if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
- SHIFT_ERROR(offset, "Error reading %s length", types[e->type]);
- return 0;
- }
- }
-
- switch(e->type) {
- case REDIS_STRING:
- case REDIS_HASH_ZIPMAP:
- case REDIS_LIST_ZIPLIST:
- case REDIS_SET_INTSET:
- case REDIS_ZSET_ZIPLIST:
- case REDIS_HASH_ZIPLIST:
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading entry value");
- return 0;
- }
- break;
- case REDIS_LIST:
- case REDIS_SET:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- case REDIS_ZSET:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
- return 0;
- }
- offset = CURR_OFFSET;
- if (!processDoubleValue(NULL)) {
- SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- case REDIS_HASH:
- for (i = 0; i < length; i++) {
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element key at index %d (length: %d)", i, length);
- return 0;
- }
- offset = CURR_OFFSET;
- if (!processStringObject(NULL)) {
- SHIFT_ERROR(offset, "Error reading element value at index %d (length: %d)", i, length);
- return 0;
- }
- }
- break;
- default:
- SHIFT_ERROR(offset, "Type not implemented");
- return 0;
- }
- /* because we're done, we assume success */
- e->success = 1;
- return 1;
-}
-
-entry loadEntry() {
- entry e = { NULL, -1, 0 };
- uint32_t length, offset[4];
-
- /* reset error container */
- errors.level = 0;
-
- offset[0] = CURR_OFFSET;
- if (!loadType(&e)) {
- return e;
- }
-
- offset[1] = CURR_OFFSET;
- if (e.type == REDIS_SELECTDB) {
- if ((length = loadLength(NULL)) == REDIS_RDB_LENERR) {
- SHIFT_ERROR(offset[1], "Error reading database number");
- return e;
- }
- if (length > 63) {
- SHIFT_ERROR(offset[1], "Database number out of range (%d)", length);
- return e;
- }
- } else if (e.type == REDIS_EOF) {
- if (positions[level].offset < positions[level].size) {
- SHIFT_ERROR(offset[0], "Unexpected EOF");
- } else {
- e.success = 1;
- }
- return e;
- } else {
- /* optionally consume expire */
- if (e.type == REDIS_EXPIRETIME ||
- e.type == REDIS_EXPIRETIME_MS) {
- if (!processTime(e.type)) return e;
- if (!loadType(&e)) return e;
- }
-
- offset[1] = CURR_OFFSET;
- if (!loadPair(&e)) {
- SHIFT_ERROR(offset[1], "Error for type %s", types[e.type]);
- return e;
- }
- }
-
- /* all entries are followed by a valid type:
- * e.g. a new entry, SELECTDB, EXPIRE, EOF */
- offset[2] = CURR_OFFSET;
- if (peekType() == -1) {
- SHIFT_ERROR(offset[2], "Followed by invalid type");
- SHIFT_ERROR(offset[0], "Error for type %s", types[e.type]);
- e.success = 0;
- } else {
- e.success = 1;
- }
-
- return e;
-}
-
-void printCentered(int indent, int width, char* body) {
- char head[256], tail[256];
- memset(head, '\0', 256);
- memset(tail, '\0', 256);
-
- memset(head, '=', indent);
- memset(tail, '=', width - 2 - indent - strlen(body));
- printf("%s %s %s\n", head, body, tail);
-}
-
-void printValid(uint64_t ops, uint64_t bytes) {
- char body[80];
- sprintf(body, "Processed %llu valid opcodes (in %llu bytes)",
- (unsigned long long) ops, (unsigned long long) bytes);
- printCentered(4, 80, body);
-}
-
-void printSkipped(uint64_t bytes, uint64_t offset) {
- char body[80];
- sprintf(body, "Skipped %llu bytes (resuming at 0x%08llx)",
- (unsigned long long) bytes, (unsigned long long) offset);
- printCentered(4, 80, body);
-}
-
-void printErrorStack(entry *e) {
- unsigned int i;
- char body[64];
-
- if (e->type == -1) {
- sprintf(body, "Error trace");
- } else if (e->type >= 253) {
- sprintf(body, "Error trace (%s)", types[e->type]);
- } else if (!e->key) {
- sprintf(body, "Error trace (%s: (unknown))", types[e->type]);
- } else {
- char tmp[41];
- strncpy(tmp, e->key, 40);
-
- /* display truncation at the last 3 chars */
- if (strlen(e->key) > 40) {
- memset(&tmp[37], '.', 3);
- }
-
- /* display unprintable characters as ? */
- for (i = 0; i < strlen(tmp); i++) {
- if (tmp[i] <= 32) tmp[i] = '?';
- }
- sprintf(body, "Error trace (%s: %s)", types[e->type], tmp);
- }
-
- printCentered(4, 80, body);
-
- /* display error stack */
- for (i = 0; i < errors.level; i++) {
- printf("0x%08lx - %s\n",
- (unsigned long) errors.offset[i], errors.error[i]);
- }
-}
-
-void process(void) {
- uint64_t num_errors = 0, num_valid_ops = 0, num_valid_bytes = 0;
- entry entry;
- int dump_version = processHeader();
-
- /* Exclude the final checksum for RDB >= 5. Will be checked at the end. */
- if (dump_version >= 5) {
- if (positions[0].size < 8) {
- printf("RDB version >= 5 but no room for checksum.\n");
- exit(1);
- }
- positions[0].size -= 8;
- }
-
- level = 1;
- while(positions[0].offset < positions[0].size) {
- positions[1] = positions[0];
-
- entry = loadEntry();
- if (!entry.success) {
- printValid(num_valid_ops, num_valid_bytes);
- printErrorStack(&entry);
- num_errors++;
- num_valid_ops = 0;
- num_valid_bytes = 0;
-
- /* search for next valid entry */
- uint64_t offset = positions[0].offset + 1;
- int i = 0;
-
- while (!entry.success && offset < positions[0].size) {
- positions[1].offset = offset;
-
- /* find 3 consecutive valid entries */
- for (i = 0; i < 3; i++) {
- entry = loadEntry();
- if (!entry.success) break;
- }
- /* check if we found 3 consecutive valid entries */
- if (i < 3) {
- offset++;
- }
- }
-
- /* print how many bytes we have skipped to find a new valid opcode */
- if (offset < positions[0].size) {
- printSkipped(offset - positions[0].offset, offset);
- }
-
- positions[0].offset = offset;
- } else {
- num_valid_ops++;
- num_valid_bytes += positions[1].offset - positions[0].offset;
-
- /* advance position */
- positions[0] = positions[1];
- }
- free(entry.key);
- }
-
- /* because there is another potential error,
- * print how many valid ops we have processed */
- printValid(num_valid_ops, num_valid_bytes);
-
- /* expect an eof */
- if (entry.type != REDIS_EOF) {
- /* last byte should be EOF, add error */
- errors.level = 0;
- SHIFT_ERROR(positions[0].offset, "Expected EOF, got %s", types[entry.type]);
-
- /* this is an EOF error so reset type */
- entry.type = -1;
- printErrorStack(&entry);
-
- num_errors++;
- }
-
- /* Verify checksum */
- if (dump_version >= 5) {
- uint64_t crc = crc64(0,positions[0].data,positions[0].size);
- uint64_t crc2;
- unsigned char *p = (unsigned char*)positions[0].data+positions[0].size;
- crc2 = ((uint64_t)p[0] << 0) |
- ((uint64_t)p[1] << 8) |
- ((uint64_t)p[2] << 16) |
- ((uint64_t)p[3] << 24) |
- ((uint64_t)p[4] << 32) |
- ((uint64_t)p[5] << 40) |
- ((uint64_t)p[6] << 48) |
- ((uint64_t)p[7] << 56);
- if (crc != crc2) {
- SHIFT_ERROR(positions[0].offset, "RDB CRC64 does not match.");
- } else {
- printf("CRC64 checksum is OK\n");
- }
- }
-
- /* print summary on errors */
- if (num_errors) {
- printf("\n");
- printf("Total unprocessable opcodes: %llu\n",
- (unsigned long long) num_errors);
- }
-}
-
-int main(int argc, char **argv) {
- /* expect the first argument to be the dump file */
- if (argc <= 1) {
- printf("Usage: %s <dump.rdb>\n", argv[0]);
- exit(0);
- }
-
- int fd;
- off_t size;
- struct stat stat;
- void *data;
-
- fd = open(argv[1], O_RDONLY);
- if (fd < 1) {
- ERROR("Cannot open file: %s\n", argv[1]);
- }
- if (fstat(fd, &stat) == -1) {
- ERROR("Cannot stat: %s\n", argv[1]);
- } else {
- size = stat.st_size;
- }
-
- if (sizeof(size_t) == sizeof(int32_t) && size >= INT_MAX) {
- ERROR("Cannot check dump files >2GB on a 32-bit platform\n");
- }
-
- data = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
- if (data == MAP_FAILED) {
- ERROR("Cannot mmap: %s\n", argv[1]);
- }
-
- /* Initialize static vars */
- positions[0].data = data;
- positions[0].size = size;
- positions[0].offset = 0;
- errors.level = 0;
-
- /* Object types */
- sprintf(types[REDIS_STRING], "STRING");
- sprintf(types[REDIS_LIST], "LIST");
- sprintf(types[REDIS_SET], "SET");
- sprintf(types[REDIS_ZSET], "ZSET");
- sprintf(types[REDIS_HASH], "HASH");
-
- /* Object types only used for dumping to disk */
- sprintf(types[REDIS_EXPIRETIME], "EXPIRETIME");
- sprintf(types[REDIS_SELECTDB], "SELECTDB");
- sprintf(types[REDIS_EOF], "EOF");
-
- /* Double constants initialization */
- R_Zero = 0.0;
- R_PosInf = 1.0/R_Zero;
- R_NegInf = -1.0/R_Zero;
- R_Nan = R_Zero/R_Zero;
-
- process();
-
- munmap(data, size);
- close(fd);
- return 0;
-}
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
new file mode 100644
index 000000000..8de1d8f48
--- /dev/null
+++ b/src/redis-check-rdb.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2016, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "rdb.h"
+
+#include <stdarg.h>
+
+void createSharedObjects(void);
+void rdbLoadProgressCallback(rio *r, const void *buf, size_t len);
+int rdbCheckMode = 0;
+
+struct {
+ rio *rio;
+ robj *key; /* Current key we are reading. */
+ int key_type; /* Current key type if != -1. */
+ unsigned long keys; /* Number of keys processed. */
+ unsigned long expires; /* Number of keys with an expire. */
+ unsigned long already_expired; /* Number of keys already expired. */
+ int doing; /* The state while reading the RDB. */
+ int error_set; /* True if error is populated. */
+ char error[1024];
+} rdbstate;
+
+/* At every loading step try to remember what we were about to do, so that
+ * we can log this information when an error is encountered. */
+#define RDB_CHECK_DOING_START 0
+#define RDB_CHECK_DOING_READ_TYPE 1
+#define RDB_CHECK_DOING_READ_EXPIRE 2
+#define RDB_CHECK_DOING_READ_KEY 3
+#define RDB_CHECK_DOING_READ_OBJECT_VALUE 4
+#define RDB_CHECK_DOING_CHECK_SUM 5
+#define RDB_CHECK_DOING_READ_LEN 6
+#define RDB_CHECK_DOING_READ_AUX 7
+
+char *rdb_check_doing_string[] = {
+ "start",
+ "read-type",
+ "read-expire",
+ "read-key",
+ "read-object-value",
+ "check-sum",
+ "read-len",
+ "read-aux"
+};
+
+char *rdb_type_string[] = {
+ "string",
+ "list-linked",
+ "set-hashtable",
+ "zset-v1",
+ "hash-hashtable",
+ "zset-v2",
+ "module-value",
+ "","",
+ "hash-zipmap",
+ "list-ziplist",
+ "set-intset",
+ "zset-ziplist",
+ "hash-ziplist",
+ "quicklist",
+ "stream"
+};
+
+/* Show a few stats collected into 'rdbstate' */
+void rdbShowGenericInfo(void) {
+ printf("[info] %lu keys read\n", rdbstate.keys);
+ printf("[info] %lu expires\n", rdbstate.expires);
+ printf("[info] %lu already expired\n", rdbstate.already_expired);
+}
+
+/* Called on RDB errors. Provides details about the RDB and the offset
+ * we were when the error was detected. */
+void rdbCheckError(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("--- RDB ERROR DETECTED ---\n");
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+ printf("[additional info] While doing: %s\n",
+ rdb_check_doing_string[rdbstate.doing]);
+ if (rdbstate.key)
+ printf("[additional info] Reading key '%s'\n",
+ (char*)rdbstate.key->ptr);
+ if (rdbstate.key_type != -1)
+ printf("[additional info] Reading type %d (%s)\n",
+ rdbstate.key_type,
+ ((unsigned)rdbstate.key_type <
+ sizeof(rdb_type_string)/sizeof(char*)) ?
+ rdb_type_string[rdbstate.key_type] : "unknown");
+ rdbShowGenericInfo();
+}
+
+/* Print informations during RDB checking. */
+void rdbCheckInfo(const char *fmt, ...) {
+ char msg[1024];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ printf("[offset %llu] %s\n",
+ (unsigned long long) (rdbstate.rio ?
+ rdbstate.rio->processed_bytes : 0), msg);
+}
+
+/* Used inside rdb.c in order to log specific errors happening inside
+ * the RDB loading internals. */
+void rdbCheckSetError(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(rdbstate.error, sizeof(rdbstate.error), fmt, ap);
+ va_end(ap);
+ rdbstate.error_set = 1;
+}
+
+/* During RDB check we setup a special signal handler for memory violations
+ * and similar conditions, so that we can log the offending part of the RDB
+ * if the crash is due to broken content. */
+void rdbCheckHandleCrash(int sig, siginfo_t *info, void *secret) {
+ UNUSED(sig);
+ UNUSED(info);
+ UNUSED(secret);
+
+ rdbCheckError("Server crash checking the specified RDB file!");
+ exit(1);
+}
+
+void rdbCheckSetupSignals(void) {
+ struct sigaction act;
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
+ act.sa_sigaction = rdbCheckHandleCrash;
+ sigaction(SIGSEGV, &act, NULL);
+ sigaction(SIGBUS, &act, NULL);
+ sigaction(SIGFPE, &act, NULL);
+ sigaction(SIGILL, &act, NULL);
+}
+
+/* Check the specified RDB file. Return 0 if the RDB looks sane, otherwise
+ * 1 is returned.
+ * The file is specified as a filename in 'rdbfilename' if 'fp' is not NULL,
+ * otherwise the already open file 'fp' is checked. */
+int redis_check_rdb(char *rdbfilename, FILE *fp) {
+ uint64_t dbid;
+ int type, rdbver;
+ char buf[1024];
+ long long expiretime, now = mstime();
+ static rio rdb; /* Pointed by global struct riostate. */
+
+ int closefile = (fp == NULL);
+ if (fp == NULL && (fp = fopen(rdbfilename,"r")) == NULL) return 1;
+
+ rioInitWithFile(&rdb,fp);
+ rdbstate.rio = &rdb;
+ rdb.update_cksum = rdbLoadProgressCallback;
+ if (rioRead(&rdb,buf,9) == 0) goto eoferr;
+ buf[9] = '\0';
+ if (memcmp(buf,"REDIS",5) != 0) {
+ rdbCheckError("Wrong signature trying to load DB from file");
+ goto err;
+ }
+ rdbver = atoi(buf+5);
+ if (rdbver < 1 || rdbver > RDB_VERSION) {
+ rdbCheckError("Can't handle RDB format version %d",rdbver);
+ goto err;
+ }
+
+ expiretime = -1;
+ startLoading(fp);
+ while(1) {
+ robj *key, *val;
+
+ /* Read type. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_TYPE;
+ if ((type = rdbLoadType(&rdb)) == -1) goto eoferr;
+
+ /* Handle special types. */
+ if (type == RDB_OPCODE_EXPIRETIME) {
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ /* EXPIRETIME: load an expire associated with the next key
+ * to load. Note that after loading an expire we need to
+ * load the actual type, and continue. */
+ if ((expiretime = rdbLoadTime(&rdb)) == -1) goto eoferr;
+ expiretime *= 1000;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EXPIRETIME_MS) {
+ /* EXPIRETIME_MS: milliseconds precision expire times introduced
+ * with RDB v3. Like EXPIRETIME but no with more precision. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_EXPIRE;
+ if ((expiretime = rdbLoadMillisecondTime(&rdb, rdbver)) == -1) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_FREQ) {
+ /* FREQ: LFU frequency. */
+ uint8_t byte;
+ if (rioRead(&rdb,&byte,1) == 0) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_IDLE) {
+ /* IDLE: LRU idle time. */
+ if (rdbLoadLen(&rdb,NULL) == RDB_LENERR) goto eoferr;
+ continue; /* Read next opcode. */
+ } else if (type == RDB_OPCODE_EOF) {
+ /* EOF: End of file, exit the main loop. */
+ break;
+ } else if (type == RDB_OPCODE_SELECTDB) {
+ /* SELECTDB: Select the specified database. */
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((dbid = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ rdbCheckInfo("Selecting DB ID %d", dbid);
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_RESIZEDB) {
+ /* RESIZEDB: Hint about the size of the keys in the currently
+ * selected data base, in order to avoid useless rehashing. */
+ uint64_t db_size, expires_size;
+ rdbstate.doing = RDB_CHECK_DOING_READ_LEN;
+ if ((db_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ if ((expires_size = rdbLoadLen(&rdb,NULL)) == RDB_LENERR)
+ goto eoferr;
+ continue; /* Read type again. */
+ } else if (type == RDB_OPCODE_AUX) {
+ /* AUX: generic string-string fields. Use to add state to RDB
+ * which is backward compatible. Implementations of RDB loading
+ * are requierd to skip AUX fields they don't understand.
+ *
+ * An AUX field is composed of two strings: key and value. */
+ robj *auxkey, *auxval;
+ rdbstate.doing = RDB_CHECK_DOING_READ_AUX;
+ if ((auxkey = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ if ((auxval = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+
+ rdbCheckInfo("AUX FIELD %s = '%s'",
+ (char*)auxkey->ptr, (char*)auxval->ptr);
+ decrRefCount(auxkey);
+ decrRefCount(auxval);
+ continue; /* Read type again. */
+ } else {
+ if (!rdbIsObjectType(type)) {
+ rdbCheckError("Invalid object type: %d", type);
+ goto err;
+ }
+ rdbstate.key_type = type;
+ }
+
+ /* Read key */
+ rdbstate.doing = RDB_CHECK_DOING_READ_KEY;
+ if ((key = rdbLoadStringObject(&rdb)) == NULL) goto eoferr;
+ rdbstate.key = key;
+ rdbstate.keys++;
+ /* Read value */
+ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE;
+ if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr;
+ /* Check if the key already expired. */
+ if (expiretime != -1 && expiretime < now)
+ rdbstate.already_expired++;
+ if (expiretime != -1) rdbstate.expires++;
+ rdbstate.key = NULL;
+ decrRefCount(key);
+ decrRefCount(val);
+ rdbstate.key_type = -1;
+ expiretime = -1;
+ }
+ /* Verify the checksum if RDB version is >= 5 */
+ if (rdbver >= 5 && server.rdb_checksum) {
+ uint64_t cksum, expected = rdb.cksum;
+
+ rdbstate.doing = RDB_CHECK_DOING_CHECK_SUM;
+ if (rioRead(&rdb,&cksum,8) == 0) goto eoferr;
+ memrev64ifbe(&cksum);
+ if (cksum == 0) {
+ rdbCheckInfo("RDB file was saved with checksum disabled: no check performed.");
+ } else if (cksum != expected) {
+ rdbCheckError("RDB CRC error");
+ goto err;
+ } else {
+ rdbCheckInfo("Checksum OK");
+ }
+ }
+
+ if (closefile) fclose(fp);
+ return 0;
+
+eoferr: /* unexpected end of file is handled here with a fatal exit */
+ if (rdbstate.error_set) {
+ rdbCheckError(rdbstate.error);
+ } else {
+ rdbCheckError("Unexpected EOF reading RDB file");
+ }
+err:
+ if (closefile) fclose(fp);
+ return 1;
+}
+
+/* RDB check main: called form redis.c when Redis is executed with the
+ * redis-check-rdb alias, on during RDB loading errors.
+ *
+ * The function works in two ways: can be called with argc/argv as a
+ * standalone executable, or called with a non NULL 'fp' argument if we
+ * already have an open file to check. This happens when the function
+ * is used to check an RDB preamble inside an AOF file.
+ *
+ * When called with fp = NULL, the function never returns, but exits with the
+ * status code according to success (RDB is sane) or error (RDB is corrupted).
+ * Otherwise if called with a non NULL fp, the function returns C_OK or
+ * C_ERR depending on the success or failure. */
+int redis_check_rdb_main(int argc, char **argv, FILE *fp) {
+ if (argc != 2 && fp == NULL) {
+ fprintf(stderr, "Usage: %s <rdb-file-name>\n", argv[0]);
+ exit(1);
+ }
+ /* In order to call the loading functions we need to create the shared
+ * integer objects, however since this function may be called from
+ * an already initialized Redis instance, check if we really need to. */
+ if (shared.integers[0] == NULL)
+ createSharedObjects();
+ server.loading_process_events_interval_bytes = 0;
+ rdbCheckMode = 1;
+ rdbCheckInfo("Checking RDB file %s", argv[1]);
+ rdbCheckSetupSignals();
+ int retval = redis_check_rdb(argv[1],fp);
+ if (retval == 0) {
+ rdbCheckInfo("\\o/ RDB looks OK! \\o/");
+ rdbShowGenericInfo();
+ }
+ if (fp) return (retval == 0) ? C_OK : C_ERR;
+ exit(retval);
+}
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 3c1458742..0e8777bd2 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -44,16 +44,19 @@
#include <assert.h>
#include <fcntl.h>
#include <limits.h>
+#include <math.h>
-#include "hiredis.h"
-#include "sds.h"
+#include <hiredis.h>
+#include <sds.h> /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */
+#include "dict.h"
+#include "adlist.h"
#include "zmalloc.h"
#include "linenoise.h"
#include "help.h"
#include "anet.h"
#include "ae.h"
-#define REDIS_NOTUSED(V) ((void) V)
+#define UNUSED(V) ((void) V)
#define OUTPUT_STANDARD 0
#define OUTPUT_RAW 1
@@ -62,6 +65,118 @@
#define REDIS_CLI_DEFAULT_PIPE_TIMEOUT 30 /* seconds */
#define REDIS_CLI_HISTFILE_ENV "REDISCLI_HISTFILE"
#define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history"
+#define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE"
+#define REDIS_CLI_RCFILE_DEFAULT ".redisclirc"
+
+#define CLUSTER_MANAGER_SLOTS 16384
+#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000
+#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10
+#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2
+
+#define CLUSTER_MANAGER_INVALID_HOST_ARG \
+ "[ERR] Invalid arguments: you need to pass either a valid " \
+ "address (ie. 120.0.0.1:7000) or space separated IP " \
+ "and port (ie. 120.0.0.1 7000)\n"
+#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL)
+#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1))
+#define CLUSTER_MANAGER_COMMAND(n,...) \
+ (redisCommand(n->context, __VA_ARGS__))
+
+#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc)
+
+#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \
+ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \
+ n->ip, n->port, err);
+
+#define clusterManagerLogInfo(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__)
+
+#define clusterManagerLogErr(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__)
+
+#define clusterManagerLogWarn(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__)
+
+#define clusterManagerLogOk(...) \
+ clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__)
+
+#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0
+#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1
+#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2
+#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3
+#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4
+#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5
+
+#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0
+#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1
+#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2
+#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3
+#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4
+#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5
+#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6
+#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7
+#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8
+
+#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0
+#define CLUSTER_MANAGER_OPT_COLD 1 << 1
+#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2
+#define CLUSTER_MANAGER_OPT_QUIET 1 << 6
+#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7
+
+#define CLUSTER_MANAGER_LOG_LVL_INFO 1
+#define CLUSTER_MANAGER_LOG_LVL_WARN 2
+#define CLUSTER_MANAGER_LOG_LVL_ERR 3
+#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4
+
+#define LOG_COLOR_BOLD "29;1m"
+#define LOG_COLOR_RED "31;1m"
+#define LOG_COLOR_GREEN "32;1m"
+#define LOG_COLOR_YELLOW "33;1m"
+#define LOG_COLOR_RESET "0m"
+
+/* cliConnect() flags. */
+#define CC_FORCE (1<<0) /* Re-connect if already connected. */
+#define CC_QUIET (1<<1) /* Don't log connecting errors. */
+
+/* --latency-dist palettes. */
+int spectrum_palette_color_size = 19;
+int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196};
+
+int spectrum_palette_mono_size = 13;
+int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253};
+
+/* The actual palette in use. */
+int *spectrum_palette;
+int spectrum_palette_size;
+
+/* Dict Helpers */
+
+static uint64_t dictSdsHash(const void *key);
+static int dictSdsKeyCompare(void *privdata, const void *key1,
+ const void *key2);
+static void dictSdsDestructor(void *privdata, void *val);
+static void dictListDestructor(void *privdata, void *val);
+
+/* Cluster Manager Command Info */
+typedef struct clusterManagerCommand {
+ char *name;
+ int argc;
+ char **argv;
+ int flags;
+ int replicas;
+ char *from;
+ char *to;
+ char **weight;
+ int weight_argc;
+ char *master_id;
+ int slots;
+ int timeout;
+ int pipeline;
+ float threshold;
+} clusterManagerCommand;
+
+static void createClusterManagerCommand(char *cmdname, int argc, char **argv);
+
static redisContext *context;
static struct config {
@@ -76,7 +191,10 @@ static struct config {
int monitor_mode;
int pubsub_mode;
int latency_mode;
+ int latency_dist_mode;
int latency_history;
+ int lru_test_mode;
+ long long lru_test_sample_size;
int cluster_mode;
int cluster_reissue_command;
int slave_mode;
@@ -90,25 +208,44 @@ static struct config {
char *pattern;
char *rdb_filename;
int bigkeys;
+ int hotkeys;
int stdinarg; /* get last arg from stdin. (-x option) */
char *auth;
int output; /* output mode, see OUTPUT_* defines */
sds mb_delim;
char prompt[128];
char *eval;
+ int eval_ldb;
+ int eval_ldb_sync; /* Ask for synchronous mode of the Lua debugger. */
+ int eval_ldb_end; /* Lua debugging session ended. */
+ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */
int last_cmd_type;
+ int verbose;
+ clusterManagerCommand cluster_manager_command;
+ int no_auth_warning;
} config;
+/* User preferences. */
+static struct pref {
+ int hints;
+} pref;
+
static volatile sig_atomic_t force_cancel_loop = 0;
static void usage(void);
static void slaveMode(void);
char *redisGitSHA1(void);
char *redisGitDirty(void);
+static int cliConnect(int force);
+
+static char *getInfoField(char *info, char *field);
+static long getLongInfoField(char *info, char *field);
/*------------------------------------------------------------------------------
* Utility functions
*--------------------------------------------------------------------------- */
+uint16_t crc16(const char *buf, int len);
+
static long long ustime(void) {
struct timeval tv;
long long ust;
@@ -124,43 +261,177 @@ static long long mstime(void) {
}
static void cliRefreshPrompt(void) {
- int len;
+ if (config.eval_ldb) return;
+
+ sds prompt = sdsempty();
+ if (config.hostsocket != NULL) {
+ prompt = sdscatfmt(prompt,"redis %s",config.hostsocket);
+ } else {
+ char addr[256];
+ anetFormatAddr(addr, sizeof(addr), config.hostip, config.hostport);
+ prompt = sdscatlen(prompt,addr,strlen(addr));
+ }
- if (config.hostsocket != NULL)
- len = snprintf(config.prompt,sizeof(config.prompt),"redis %s",
- config.hostsocket);
- else
- len = anetFormatAddr(config.prompt, sizeof(config.prompt),
- config.hostip, config.hostport);
/* Add [dbnum] if needed */
- if (config.dbnum != 0 && config.last_cmd_type != REDIS_REPLY_ERROR)
- len += snprintf(config.prompt+len,sizeof(config.prompt)-len,"[%d]",
- config.dbnum);
- snprintf(config.prompt+len,sizeof(config.prompt)-len,"> ");
+ if (config.dbnum != 0)
+ prompt = sdscatfmt(prompt,"[%i]",config.dbnum);
+
+ /* Copy the prompt in the static buffer. */
+ prompt = sdscatlen(prompt,"> ",2);
+ snprintf(config.prompt,sizeof(config.prompt),"%s",prompt);
+ sdsfree(prompt);
}
-static sds getHistoryPath() {
+/* Return the name of the dotfile for the specified 'dotfilename'.
+ * Normally it just concatenates user $HOME to the file specified
+ * in 'dotfilename'. However if the environment varialbe 'envoverride'
+ * is set, its value is taken as the path.
+ *
+ * The function returns NULL (if the file is /dev/null or cannot be
+ * obtained for some error), or an SDS string that must be freed by
+ * the user. */
+static sds getDotfilePath(char *envoverride, char *dotfilename) {
char *path = NULL;
- sds historyPath = NULL;
+ sds dotPath = NULL;
- /* check the env for a histfile override */
- path = getenv(REDIS_CLI_HISTFILE_ENV);
+ /* Check the env for a dotfile override. */
+ path = getenv(envoverride);
if (path != NULL && *path != '\0') {
if (!strcmp("/dev/null", path)) {
return NULL;
}
- /* if the env is set, return it */
- historyPath = sdscatprintf(sdsempty(), "%s", path);
+ /* If the env is set, return it. */
+ dotPath = sdsnew(path);
} else {
char *home = getenv("HOME");
if (home != NULL && *home != '\0') {
- /* otherwise, return the default */
- historyPath = sdscatprintf(sdsempty(), "%s/%s", home, REDIS_CLI_HISTFILE_DEFAULT);
+ /* If no override is set use $HOME/<dotfilename>. */
+ dotPath = sdscatprintf(sdsempty(), "%s/%s", home, dotfilename);
+ }
+ }
+ return dotPath;
+}
+
+/* URL-style percent decoding. */
+#define isHexChar(c) (isdigit(c) || (c >= 'a' && c <= 'f'))
+#define decodeHexChar(c) (isdigit(c) ? c - '0' : c - 'a' + 10)
+#define decodeHex(h, l) ((decodeHexChar(h) << 4) + decodeHexChar(l))
+
+static sds percentDecode(const char *pe, size_t len) {
+ const char *end = pe + len;
+ sds ret = sdsempty();
+ const char *curr = pe;
+
+ while (curr < end) {
+ if (*curr == '%') {
+ if ((end - curr) < 2) {
+ fprintf(stderr, "Incomplete URI encoding\n");
+ exit(1);
+ }
+
+ char h = tolower(*(++curr));
+ char l = tolower(*(++curr));
+ if (!isHexChar(h) || !isHexChar(l)) {
+ fprintf(stderr, "Illegal character in URI encoding\n");
+ exit(1);
+ }
+ char c = decodeHex(h, l);
+ ret = sdscatlen(ret, &c, 1);
+ curr++;
+ } else {
+ ret = sdscatlen(ret, curr++, 1);
+ }
+ }
+
+ return ret;
+}
+
+/* Parse a URI and extract the server connection information.
+ * URI scheme is based on the the provisional specification[1] excluding support
+ * for query parameters. Valid URIs are:
+ * scheme: "redis://"
+ * authority: [<username> ":"] <password> "@"] [<hostname> [":" <port>]]
+ * path: ["/" [<db>]]
+ *
+ * [1]: https://www.iana.org/assignments/uri-schemes/prov/redis */
+static void parseRedisUri(const char *uri) {
+
+ const char *scheme = "redis://";
+ const char *curr = uri;
+ const char *end = uri + strlen(uri);
+ const char *userinfo, *username, *port, *host, *path;
+
+ /* URI must start with a valid scheme. */
+ if (strncasecmp(scheme, curr, strlen(scheme))) {
+ fprintf(stderr,"Invalid URI scheme\n");
+ exit(1);
+ }
+ curr += strlen(scheme);
+ if (curr == end) return;
+
+ /* Extract user info. */
+ if ((userinfo = strchr(curr,'@'))) {
+ if ((username = strchr(curr, ':')) && username < userinfo) {
+ /* If provided, username is ignored. */
+ curr = username + 1;
}
+
+ config.auth = percentDecode(curr, userinfo - curr);
+ curr = userinfo + 1;
+ }
+ if (curr == end) return;
+
+ /* Extract host and port. */
+ path = strchr(curr, '/');
+ if (*curr != '/') {
+ host = path ? path - 1 : end;
+ if ((port = strchr(curr, ':'))) {
+ config.hostport = atoi(port + 1);
+ host = port - 1;
+ }
+ config.hostip = sdsnewlen(curr, host - curr + 1);
}
+ curr = path ? path + 1 : end;
+ if (curr == end) return;
- return historyPath;
+ /* Extract database number. */
+ config.dbnum = atoi(curr);
+}
+
+static uint64_t dictSdsHash(const void *key) {
+ return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
+}
+
+static int dictSdsKeyCompare(void *privdata, const void *key1,
+ const void *key2)
+{
+ int l1,l2;
+ DICT_NOTUSED(privdata);
+
+ l1 = sdslen((sds)key1);
+ l2 = sdslen((sds)key2);
+ if (l1 != l2) return 0;
+ return memcmp(key1, key2, l1) == 0;
+}
+
+static void dictSdsDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+ sdsfree(val);
+}
+
+void dictListDestructor(void *privdata, void *val)
+{
+ DICT_NOTUSED(privdata);
+ listRelease((list*)val);
+}
+
+/* _serverAssert is needed by dict */
+void _serverAssert(const char *estr, const char *file, int line) {
+ fprintf(stderr, "=== ASSERTION FAILED ===");
+ fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr);
+ *((char*)-1) = 'x';
}
/*------------------------------------------------------------------------------
@@ -204,11 +475,11 @@ static void cliInitHelp(void) {
helpEntry tmp;
helpEntriesLen = len = commandslen+groupslen;
- helpEntries = malloc(sizeof(helpEntry)*len);
+ helpEntries = zmalloc(sizeof(helpEntry)*len);
for (i = 0; i < groupslen; i++) {
tmp.argc = 1;
- tmp.argv = malloc(sizeof(sds));
+ tmp.argv = zmalloc(sizeof(sds));
tmp.argv[0] = sdscatprintf(sdsempty(),"@%s",commandGroups[i]);
tmp.full = tmp.argv[0];
tmp.type = CLI_HELP_GROUP;
@@ -225,6 +496,66 @@ static void cliInitHelp(void) {
}
}
+/* cliInitHelp() setups the helpEntries array with the command and group
+ * names from the help.h file. However the Redis instance we are connecting
+ * to may support more commands, so this function integrates the previous
+ * entries with additional entries obtained using the COMMAND command
+ * available in recent versions of Redis. */
+static void cliIntegrateHelp(void) {
+ if (cliConnect(CC_QUIET) == REDIS_ERR) return;
+
+ redisReply *reply = redisCommand(context, "COMMAND");
+ if(reply == NULL || reply->type != REDIS_REPLY_ARRAY) return;
+
+ /* Scan the array reported by COMMAND and fill only the entries that
+ * don't already match what we have. */
+ for (size_t j = 0; j < reply->elements; j++) {
+ redisReply *entry = reply->element[j];
+ if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 ||
+ entry->element[0]->type != REDIS_REPLY_STRING ||
+ entry->element[1]->type != REDIS_REPLY_INTEGER ||
+ entry->element[3]->type != REDIS_REPLY_INTEGER) return;
+ char *cmdname = entry->element[0]->str;
+ int i;
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ helpEntry *he = helpEntries+i;
+ if (!strcasecmp(he->argv[0],cmdname))
+ break;
+ }
+ if (i != helpEntriesLen) continue;
+
+ helpEntriesLen++;
+ helpEntries = zrealloc(helpEntries,sizeof(helpEntry)*helpEntriesLen);
+ helpEntry *new = helpEntries+(helpEntriesLen-1);
+
+ new->argc = 1;
+ new->argv = zmalloc(sizeof(sds));
+ new->argv[0] = sdsnew(cmdname);
+ new->full = new->argv[0];
+ new->type = CLI_HELP_COMMAND;
+ sdstoupper(new->argv[0]);
+
+ struct commandHelp *ch = zmalloc(sizeof(*ch));
+ ch->name = new->argv[0];
+ ch->params = sdsempty();
+ int args = llabs(entry->element[1]->integer);
+ args--; /* Remove the command name itself. */
+ if (entry->element[3]->integer == 1) {
+ ch->params = sdscat(ch->params,"key ");
+ args--;
+ }
+ while(args--) ch->params = sdscat(ch->params,"arg ");
+ if (entry->element[1]->integer < 0)
+ ch->params = sdscat(ch->params,"...options...");
+ ch->summary = "Help not available";
+ ch->group = 0;
+ ch->since = "not known";
+ new->org = ch;
+ }
+ freeReplyObject(reply);
+}
+
/* Output command help to stdout. */
static void cliOutputCommandHelp(struct commandHelp *help, int group) {
printf("\r\n \x1b[1m%s\x1b[0m \x1b[90m%s\x1b[0m\r\n", help->name, help->params);
@@ -239,11 +570,17 @@ static void cliOutputCommandHelp(struct commandHelp *help, int group) {
static void cliOutputGenericHelp(void) {
sds version = cliVersion();
printf(
- "redis-cli %s\r\n"
- "Type: \"help @<group>\" to get a list of commands in <group>\r\n"
- " \"help <command>\" for help on <command>\r\n"
- " \"help <tab>\" to get a list of possible help topics\r\n"
- " \"quit\" to exit\r\n",
+ "redis-cli %s\n"
+ "To get help about Redis commands type:\n"
+ " \"help @<group>\" to get a list of commands in <group>\n"
+ " \"help <command>\" for help on <command>\n"
+ " \"help <tab>\" to get a list of possible help topics\n"
+ " \"quit\" to exit\n"
+ "\n"
+ "To set redis-cli preferences:\n"
+ " \":set hints\" enable online hints\n"
+ " \":set nohints\" disable online hints\n"
+ "Set your preferences in ~/.redisclirc\n",
version
);
sdsfree(version);
@@ -294,6 +631,7 @@ static void cliOutputHelp(int argc, char **argv) {
printf("\r\n");
}
+/* Linenoise completion callback. */
static void completionCallback(const char *buf, linenoiseCompletions *lc) {
size_t startpos = 0;
int mask;
@@ -322,6 +660,58 @@ static void completionCallback(const char *buf, linenoiseCompletions *lc) {
}
}
+/* Linenoise hints callback. */
+static char *hintsCallback(const char *buf, int *color, int *bold) {
+ if (!pref.hints) return NULL;
+
+ int i, argc, buflen = strlen(buf);
+ sds *argv = sdssplitargs(buf,&argc);
+ int endspace = buflen && isspace(buf[buflen-1]);
+
+ /* Check if the argument list is empty and return ASAP. */
+ if (argc == 0) {
+ sdsfreesplitres(argv,argc);
+ return NULL;
+ }
+
+ for (i = 0; i < helpEntriesLen; i++) {
+ if (!(helpEntries[i].type & CLI_HELP_COMMAND)) continue;
+
+ if (strcasecmp(argv[0],helpEntries[i].full) == 0)
+ {
+ *color = 90;
+ *bold = 0;
+ sds hint = sdsnew(helpEntries[i].org->params);
+
+ /* Remove arguments from the returned hint to show only the
+ * ones the user did not yet typed. */
+ int toremove = argc-1;
+ while(toremove > 0 && sdslen(hint)) {
+ if (hint[0] == '[') break;
+ if (hint[0] == ' ') toremove--;
+ sdsrange(hint,1,-1);
+ }
+
+ /* Add an initial space if needed. */
+ if (!endspace) {
+ sds newhint = sdsnewlen(" ",1);
+ newhint = sdscatsds(newhint,hint);
+ sdsfree(hint);
+ hint = newhint;
+ }
+
+ sdsfreesplitres(argv,argc);
+ return hint;
+ }
+ }
+ sdsfreesplitres(argv,argc);
+ return NULL;
+}
+
+static void freeHintsCallback(void *ptr) {
+ sdsfree(ptr);
+}
+
/*------------------------------------------------------------------------------
* Networking / parsing
*--------------------------------------------------------------------------- */
@@ -354,12 +744,15 @@ static int cliSelect(void) {
return REDIS_ERR;
}
-/* Connect to the server. If force is not zero the connection is performed
- * even if there is already a connected socket. */
-static int cliConnect(int force) {
- if (context == NULL || force) {
- if (context != NULL)
+/* Connect to the server. It is possible to pass certain flags to the function:
+ * CC_FORCE: The connection is performed even if there is already
+ * a connected socket.
+ * CC_QUIET: Don't print errors if connection fails. */
+static int cliConnect(int flags) {
+ if (context == NULL || flags & CC_FORCE) {
+ if (context != NULL) {
redisFree(context);
+ }
if (config.hostsocket == NULL) {
context = redisConnect(config.hostip,config.hostport);
@@ -368,11 +761,15 @@ static int cliConnect(int force) {
}
if (context->err) {
- fprintf(stderr,"Could not connect to Redis at ");
- if (config.hostsocket == NULL)
- fprintf(stderr,"%s:%d: %s\n",config.hostip,config.hostport,context->errstr);
- else
- fprintf(stderr,"%s: %s\n",config.hostsocket,context->errstr);
+ if (!(flags & CC_QUIET)) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ if (config.hostsocket == NULL)
+ fprintf(stderr,"%s:%d: %s\n",
+ config.hostip,config.hostport,context->errstr);
+ else
+ fprintf(stderr,"%s: %s\n",
+ config.hostsocket,context->errstr);
+ }
redisFree(context);
context = NULL;
return REDIS_ERR;
@@ -443,7 +840,7 @@ static sds cliFormatReplyTTY(redisReply *r, char *prefix) {
_prefix = sdscat(sdsnew(prefix),_prefixlen);
/* Setup prefix format for every entry */
- snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%dd) ",idxlen);
+ snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%ud) ",idxlen);
for (i = 0; i < r->elements; i++) {
/* Don't use the prefix for the first element, as the parent
@@ -465,6 +862,50 @@ static sds cliFormatReplyTTY(redisReply *r, char *prefix) {
return out;
}
+int isColorTerm(void) {
+ char *t = getenv("TERM");
+ return t != NULL && strstr(t,"xterm") != NULL;
+}
+
+/* Helper function for sdsCatColorizedLdbReply() appending colorize strings
+ * to an SDS string. */
+sds sdscatcolor(sds o, char *s, size_t len, char *color) {
+ if (!isColorTerm()) return sdscatlen(o,s,len);
+
+ int bold = strstr(color,"bold") != NULL;
+ int ccode = 37; /* Defaults to white. */
+ if (strstr(color,"red")) ccode = 31;
+ else if (strstr(color,"green")) ccode = 32;
+ else if (strstr(color,"yellow")) ccode = 33;
+ else if (strstr(color,"blue")) ccode = 34;
+ else if (strstr(color,"magenta")) ccode = 35;
+ else if (strstr(color,"cyan")) ccode = 36;
+ else if (strstr(color,"white")) ccode = 37;
+
+ o = sdscatfmt(o,"\033[%i;%i;49m",bold,ccode);
+ o = sdscatlen(o,s,len);
+ o = sdscat(o,"\033[0m");
+ return o;
+}
+
+/* Colorize Lua debugger status replies according to the prefix they
+ * have. */
+sds sdsCatColorizedLdbReply(sds o, char *s, size_t len) {
+ char *color = "white";
+
+ if (strstr(s,"<debug>")) color = "bold";
+ if (strstr(s,"<redis>")) color = "green";
+ if (strstr(s,"<reply>")) color = "cyan";
+ if (strstr(s,"<error>")) color = "red";
+ if (strstr(s,"<hint>")) color = "bold";
+ if (strstr(s,"<value>") || strstr(s,"<retval>")) color = "magenta";
+ if (len > 4 && isdigit(s[3])) {
+ if (s[1] == '>') color = "yellow"; /* Current line. */
+ else if (s[2] == '#') color = "bold"; /* Break point. */
+ }
+ return sdscatcolor(o,s,len,color);
+}
+
static sds cliFormatReplyRaw(redisReply *r) {
sds out = sdsempty(), tmp;
size_t i;
@@ -479,7 +920,24 @@ static sds cliFormatReplyRaw(redisReply *r) {
break;
case REDIS_REPLY_STATUS:
case REDIS_REPLY_STRING:
- out = sdscatlen(out,r->str,r->len);
+ if (r->type == REDIS_REPLY_STATUS && config.eval_ldb) {
+ /* The Lua debugger replies with arrays of simple (status)
+ * strings. We colorize the output for more fun if this
+ * is a debugging session. */
+
+ /* Detect the end of a debugging session. */
+ if (strstr(r->str,"<endsession>") == r->str) {
+ config.enable_ldb_on_eval = 0;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 1; /* Signal the caller session ended. */
+ config.output = OUTPUT_STANDARD;
+ cliRefreshPrompt();
+ } else {
+ out = sdsCatColorizedLdbReply(out,r->str,r->len);
+ }
+ } else {
+ out = sdscatlen(out,r->str,r->len);
+ }
break;
case REDIS_REPLY_INTEGER:
out = sdscatprintf(out,"%lld",r->integer);
@@ -518,7 +976,7 @@ static sds cliFormatReplyCSV(redisReply *r) {
out = sdscatrepr(out,r->str,r->len);
break;
case REDIS_REPLY_NIL:
- out = sdscat(out,"NIL\n");
+ out = sdscat(out,"NIL");
break;
case REDIS_REPLY_ARRAY:
for (i = 0; i < r->elements; i++) {
@@ -582,7 +1040,7 @@ static int cliReadReply(int output_raw_strings) {
p = strchr(s+1,' '); /* MOVED[S]3999[P]127.0.0.1:6381 */
*p = '\0';
slot = atoi(s+1);
- s = strchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */
+ s = strrchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */
*s = '\0';
sdsfree(config.hostip);
config.hostip = sdsnew(p+1);
@@ -615,12 +1073,13 @@ static int cliReadReply(int output_raw_strings) {
return REDIS_OK;
}
-static int cliSendCommand(int argc, char **argv, int repeat) {
+static int cliSendCommand(int argc, char **argv, long repeat) {
char *command = argv[0];
size_t *argvlen;
int j, output_raw;
- if (!strcasecmp(command,"help") || !strcasecmp(command,"?")) {
+ if (!config.eval_ldb && /* In debugging mode, let's pass "help" to Redis. */
+ (!strcasecmp(command,"help") || !strcasecmp(command,"?"))) {
cliOutputHelp(--argc, ++argv);
return REDIS_OK;
}
@@ -629,13 +1088,17 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
output_raw = 0;
if (!strcasecmp(command,"info") ||
- (argc == 3 && !strcasecmp(command,"debug") &&
- (!strcasecmp(argv[1],"jemalloc") &&
- !strcasecmp(argv[2],"info"))) ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"htstats")) ||
+ (argc >= 2 && !strcasecmp(command,"debug") &&
+ !strcasecmp(argv[1],"htstats-key")) ||
+ (argc >= 2 && !strcasecmp(command,"memory") &&
+ (!strcasecmp(argv[1],"malloc-stats") ||
+ !strcasecmp(argv[1],"doctor"))) ||
(argc == 2 && !strcasecmp(command,"cluster") &&
(!strcasecmp(argv[1],"nodes") ||
!strcasecmp(argv[1],"info"))) ||
- (argc == 2 && !strcasecmp(command,"client") &&
+ (argc >= 2 && !strcasecmp(command,"client") &&
!strcasecmp(argv[1],"list")) ||
(argc == 3 && !strcasecmp(command,"latency") &&
!strcasecmp(argv[1],"graph")) ||
@@ -652,12 +1115,30 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
if (!strcasecmp(command,"sync") ||
!strcasecmp(command,"psync")) config.slave_mode = 1;
+ /* When the user manually calls SCRIPT DEBUG, setup the activation of
+ * debugging mode on the next eval if needed. */
+ if (argc == 3 && !strcasecmp(argv[0],"script") &&
+ !strcasecmp(argv[1],"debug"))
+ {
+ if (!strcasecmp(argv[2],"yes") || !strcasecmp(argv[2],"sync")) {
+ config.enable_ldb_on_eval = 1;
+ } else {
+ config.enable_ldb_on_eval = 0;
+ }
+ }
+
+ /* Actually activate LDB on EVAL if needed. */
+ if (!strcasecmp(command,"eval") && config.enable_ldb_on_eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ }
+
/* Setup argument length */
- argvlen = malloc(argc*sizeof(size_t));
+ argvlen = zmalloc(argc*sizeof(size_t));
for (j = 0; j < argc; j++)
argvlen[j] = sdslen(argv[j]);
- while(repeat--) {
+ while(repeat-- > 0) {
redisAppendCommandArgv(context,argc,(const char**)argv,argvlen);
while (config.monitor_mode) {
if (cliReadReply(output_raw) != REDIS_OK) exit(1);
@@ -676,40 +1157,51 @@ static int cliSendCommand(int argc, char **argv, int repeat) {
printf("Entering slave output mode... (press Ctrl-C to quit)\n");
slaveMode();
config.slave_mode = 0;
- free(argvlen);
+ zfree(argvlen);
return REDIS_ERR; /* Error = slaveMode lost connection to master */
}
if (cliReadReply(output_raw) != REDIS_OK) {
- free(argvlen);
+ zfree(argvlen);
return REDIS_ERR;
} else {
/* Store database number when SELECT was successfully executed. */
- if (!strcasecmp(command,"select") && argc == 2) {
+ if (!strcasecmp(command,"select") && argc == 2 && config.last_cmd_type != REDIS_REPLY_ERROR) {
config.dbnum = atoi(argv[1]);
cliRefreshPrompt();
} else if (!strcasecmp(command,"auth") && argc == 2) {
cliSelect();
}
+
+
+ /* Issue the command again if we got redirected in cluster mode */
+ if (config.cluster_mode && config.cluster_reissue_command) {
+ cliConnect(CC_FORCE);
+ config.cluster_reissue_command = 0;
+ /* for a '-MOVED' or '-ASK' response, we need to issue the command again, so
+ * add repeat by 1. */
+ repeat++;
+ }
}
if (config.interval) usleep(config.interval);
fflush(stdout); /* Make it grep friendly */
}
- free(argvlen);
+ zfree(argvlen);
return REDIS_OK;
}
-/* Send the INFO command, reconnecting the link if needed. */
-static redisReply *reconnectingInfo(void) {
- redisContext *c = context;
+/* Send a command reconnecting the link if needed. */
+static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ...) {
redisReply *reply = NULL;
int tries = 0;
+ va_list ap;
assert(!c->err);
while(reply == NULL) {
while (c->err & (REDIS_ERR_IO | REDIS_ERR_EOF)) {
- printf("Reconnecting (%d)...\r", ++tries);
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
+ printf("Reconnecting... %d\r", ++tries);
fflush(stdout);
redisFree(c);
@@ -717,12 +1209,15 @@ static redisReply *reconnectingInfo(void) {
usleep(1000000);
}
- reply = redisCommand(c,"INFO");
+ va_start(ap,fmt);
+ reply = redisvCommand(c,fmt,ap);
+ va_end(ap);
+
if (c->err && !(c->err & (REDIS_ERR_IO | REDIS_ERR_EOF))) {
fprintf(stderr, "Error: %s\n", c->errstr);
exit(1);
} else if (tries > 0) {
- printf("\n");
+ printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */
}
}
@@ -760,8 +1255,12 @@ static int parseOptions(int argc, char **argv) {
config.interval = seconds*1000000;
} else if (!strcmp(argv[i],"-n") && !lastarg) {
config.dbnum = atoi(argv[++i]);
+ } else if (!strcmp(argv[i], "--no-auth-warning")) {
+ config.no_auth_warning = 1;
} else if (!strcmp(argv[i],"-a") && !lastarg) {
config.auth = argv[++i];
+ } else if (!strcmp(argv[i],"-u") && !lastarg) {
+ parseRedisUri(argv[++i]);
} else if (!strcmp(argv[i],"--raw")) {
config.output = OUTPUT_RAW;
} else if (!strcmp(argv[i],"--no-raw")) {
@@ -770,9 +1269,17 @@ static int parseOptions(int argc, char **argv) {
config.output = OUTPUT_CSV;
} else if (!strcmp(argv[i],"--latency")) {
config.latency_mode = 1;
+ } else if (!strcmp(argv[i],"--latency-dist")) {
+ config.latency_dist_mode = 1;
+ } else if (!strcmp(argv[i],"--mono")) {
+ spectrum_palette = spectrum_palette_mono;
+ spectrum_palette_size = spectrum_palette_mono_size;
} else if (!strcmp(argv[i],"--latency-history")) {
config.latency_mode = 1;
config.latency_history = 1;
+ } else if (!strcmp(argv[i],"--lru-test") && !lastarg) {
+ config.lru_test_mode = 1;
+ config.lru_test_sample_size = strtoll(argv[++i],NULL,10);
} else if (!strcmp(argv[i],"--slave")) {
config.slave_mode = 1;
} else if (!strcmp(argv[i],"--stat")) {
@@ -793,18 +1300,104 @@ static int parseOptions(int argc, char **argv) {
config.pipe_timeout = atoi(argv[++i]);
} else if (!strcmp(argv[i],"--bigkeys")) {
config.bigkeys = 1;
+ } else if (!strcmp(argv[i],"--hotkeys")) {
+ config.hotkeys = 1;
} else if (!strcmp(argv[i],"--eval") && !lastarg) {
config.eval = argv[++i];
+ } else if (!strcmp(argv[i],"--ldb")) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ } else if (!strcmp(argv[i],"--ldb-sync-mode")) {
+ config.eval_ldb = 1;
+ config.eval_ldb_sync = 1;
+ config.output = OUTPUT_RAW;
} else if (!strcmp(argv[i],"-c")) {
config.cluster_mode = 1;
} else if (!strcmp(argv[i],"-d") && !lastarg) {
sdsfree(config.mb_delim);
config.mb_delim = sdsnew(argv[++i]);
+ } else if (!strcmp(argv[i],"--verbose")) {
+ config.verbose = 1;
+ } else if (!strcmp(argv[i],"--cluster") && !lastarg) {
+ if (CLUSTER_MANAGER_MODE()) usage();
+ char *cmd = argv[++i];
+ int j = i;
+ while (j < argc && argv[j][0] != '-') j++;
+ if (j > i) j--;
+ createClusterManagerCommand(cmd, j - i, argv + i + 1);
+ i = j;
+ } else if (!strcmp(argv[i],"--cluster") && lastarg) {
+ usage();
+ } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) {
+ config.cluster_manager_command.replicas = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) {
+ config.cluster_manager_command.master_id = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) {
+ config.cluster_manager_command.from = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) {
+ config.cluster_manager_command.to = argv[++i];
+ } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) {
+ if (config.cluster_manager_command.weight != NULL) {
+ fprintf(stderr, "WARNING: you cannot use --cluster-weight "
+ "more than once.\n"
+ "You can set more weights by adding them "
+ "as a space-separated list, ie:\n"
+ "--cluster-weight n1=w n2=w\n");
+ exit(1);
+ }
+ int widx = i + 1;
+ char **weight = argv + widx;
+ int wargc = 0;
+ for (; widx < argc; widx++) {
+ if (strstr(argv[widx], "--") == argv[widx]) break;
+ if (strchr(argv[widx], '=') == NULL) break;
+ wargc++;
+ }
+ if (wargc > 0) {
+ config.cluster_manager_command.weight = weight;
+ config.cluster_manager_command.weight_argc = wargc;
+ i += wargc;
+ }
+ } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) {
+ config.cluster_manager_command.slots = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) {
+ config.cluster_manager_command.timeout = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) {
+ config.cluster_manager_command.pipeline = atoi(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) {
+ config.cluster_manager_command.threshold = atof(argv[++i]);
+ } else if (!strcmp(argv[i],"--cluster-yes")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_YES;
+ } else if (!strcmp(argv[i],"--cluster-simulate")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_SIMULATE;
+ } else if (!strcmp(argv[i],"--cluster-replace")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_REPLACE;
+ } else if (!strcmp(argv[i],"--cluster-copy")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_COPY;
+ } else if (!strcmp(argv[i],"--cluster-slave")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_SLAVE;
+ } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) {
+ config.cluster_manager_command.flags |=
+ CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
} else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) {
sds version = cliVersion();
printf("redis-cli %s\n", version);
sdsfree(version);
exit(0);
+ } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') {
+ if (config.cluster_manager_command.argc == 0) {
+ int j = i + 1;
+ while (j < argc && argv[j][0] != '-') j++;
+ int cmd_argc = j - i;
+ config.cluster_manager_command.argc = cmd_argc;
+ config.cluster_manager_command.argv = argv + i;
+ if (cmd_argc > 1) i = j - 1;
+ }
} else {
if (argv[i][0] == '-') {
fprintf(stderr,
@@ -817,6 +1410,19 @@ static int parseOptions(int argc, char **argv) {
}
}
}
+
+ /* --ldb requires --eval. */
+ if (config.eval_ldb && config.eval == NULL) {
+ fprintf(stderr,"Options --ldb and --ldb-sync-mode require --eval.\n");
+ fprintf(stderr,"Try %s --help for more information.\n", argv[0]);
+ exit(1);
+ }
+
+ if (!config.no_auth_warning && config.auth != NULL) {
+ fputs("Warning: Using a password with '-a' or '-u' option on the command"
+ " line interface may not be safe.\n", stderr);
+ }
+
return i;
}
@@ -847,6 +1453,7 @@ static void usage(void) {
" -p <port> Server port (default: 6379).\n"
" -s <socket> Server socket (overrides hostname and port).\n"
" -a <password> Password to use when connecting to the server.\n"
+" -u <uri> Server URI.\n"
" -r <repeat> Execute specified command N times.\n"
" -i <interval> When -r is used, waits <interval> seconds per command.\n"
" It is possible to specify sub-second times like -i 0.1.\n"
@@ -860,8 +1467,17 @@ static void usage(void) {
" --csv Output in CSV format.\n"
" --stat Print rolling stats about server: mem, clients, ...\n"
" --latency Enter a special mode continuously sampling latency.\n"
+" If you use this mode in an interactive session it runs\n"
+" forever displaying real-time stats. Otherwise if --raw or\n"
+" --csv is specified, or if you redirect the output to a non\n"
+" TTY, it samples the latency for 1 second (you can use\n"
+" -i to change the interval), then produces a single output\n"
+" and exits.\n"
" --latency-history Like --latency but tracking latency changes over time.\n"
" Default time interval is 15 sec. Change it using -i.\n"
+" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n"
+" Default time interval is 1 sec. Change it using -i.\n"
+" --lru-test <keys> Simulate a cache workload with an 80-20 distribution.\n"
" --slave Simulate a slave showing commands received from the master.\n"
" --rdb <filename> Transfer an RDB dump from remote server to local file.\n"
" --pipe Transfer raw Redis protocol from stdin to server.\n"
@@ -869,13 +1485,30 @@ static void usage(void) {
" no reply is received within <n> seconds.\n"
" Default timeout: %d. Use 0 to wait forever.\n"
" --bigkeys Sample Redis keys looking for big keys.\n"
+" --hotkeys Sample Redis keys looking for hot keys.\n"
+" only works when maxmemory-policy is *lfu.\n"
" --scan List all keys using the SCAN command.\n"
" --pattern <pat> Useful with --scan to specify a SCAN pattern.\n"
" --intrinsic-latency <sec> Run a test to measure intrinsic system latency.\n"
" The test will run for the specified amount of seconds.\n"
" --eval <file> Send an EVAL command using the Lua script at <file>.\n"
+" --ldb Used with --eval enable the Redis Lua debugger.\n"
+" --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n"
+" this mode the server is blocked and script changes are\n"
+" are not rolled back from the server memory.\n"
+" --cluster <command> [args...] [opts...]\n"
+" Cluster Manager command and arguments (see below).\n"
+" --verbose Verbose mode.\n"
+" --no-auth-warning Don't show warning message when using password on command\n"
+" line interface.\n"
" --help Output this help and exit.\n"
" --version Output version and exit.\n"
+"\n",
+ version, REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
+ /* Using another fprintf call to avoid -Woverlength-strings compile warning */
+ fprintf(stderr,
+"Cluster Manager Commands:\n"
+" Use --cluster help to list all available cluster manager commands.\n"
"\n"
"Examples:\n"
" cat /etc/passwd | redis-cli -x set mypasswd\n"
@@ -888,13 +1521,22 @@ static void usage(void) {
" (Note: when using --eval the comma separates KEYS[] from ARGV[] items)\n"
"\n"
"When no command is given, redis-cli starts in interactive mode.\n"
-"Type \"help\" in interactive mode for information on available commands.\n"
-"\n",
- version, REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
+"Type \"help\" in interactive mode for information on available commands\n"
+"and settings.\n"
+"\n");
sdsfree(version);
exit(1);
}
+static int confirmWithYes(char *msg) {
+ printf("%s (type 'yes' to accept): ", msg);
+ fflush(stdout);
+ char buf[4];
+ int nread = read(fileno(stdin),buf,4);
+ buf[3] = '\0';
+ return (nread != 0 && !strcmp("yes", buf));
+}
+
/* Turn the plain C strings into Sds strings */
static char **convertToSds(int count, char** args) {
int j;
@@ -910,7 +1552,7 @@ static int issueCommandRepeat(int argc, char **argv, long repeat) {
while (1) {
config.cluster_reissue_command = 0;
if (cliSendCommand(argc,argv,repeat) != REDIS_OK) {
- cliConnect(1);
+ cliConnect(CC_FORCE);
/* If we still cannot send the command print error.
* We'll try to reconnect the next time. */
@@ -918,13 +1560,8 @@ static int issueCommandRepeat(int argc, char **argv, long repeat) {
cliPrintContextError();
return REDIS_ERR;
}
- }
- /* Issue the command again if we got redirected in cluster mode */
- if (config.cluster_mode && config.cluster_reissue_command) {
- cliConnect(1);
- } else {
- break;
- }
+ } else
+ break;
}
return REDIS_OK;
}
@@ -933,6 +1570,68 @@ static int issueCommand(int argc, char **argv) {
return issueCommandRepeat(argc, argv, config.repeat);
}
+/* Split the user provided command into multiple SDS arguments.
+ * This function normally uses sdssplitargs() from sds.c which is able
+ * to understand "quoted strings", escapes and so forth. However when
+ * we are in Lua debugging mode and the "eval" command is used, we want
+ * the remaining Lua script (after "e " or "eval ") to be passed verbatim
+ * as a single big argument. */
+static sds *cliSplitArgs(char *line, int *argc) {
+ if (config.eval_ldb && (strstr(line,"eval ") == line ||
+ strstr(line,"e ") == line))
+ {
+ sds *argv = sds_malloc(sizeof(sds)*2);
+ *argc = 2;
+ int len = strlen(line);
+ int elen = line[1] == ' ' ? 2 : 5; /* "e " or "eval "? */
+ argv[0] = sdsnewlen(line,elen-1);
+ argv[1] = sdsnewlen(line+elen,len-elen);
+ return argv;
+ } else {
+ return sdssplitargs(line,argc);
+ }
+}
+
+/* Set the CLI preferences. This function is invoked when an interactive
+ * ":command" is called, or when reading ~/.redisclirc file, in order to
+ * set user preferences. */
+void cliSetPreferences(char **argv, int argc, int interactive) {
+ if (!strcasecmp(argv[0],":set") && argc >= 2) {
+ if (!strcasecmp(argv[1],"hints")) pref.hints = 1;
+ else if (!strcasecmp(argv[1],"nohints")) pref.hints = 0;
+ else {
+ printf("%sunknown redis-cli preference '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[1]);
+ }
+ } else {
+ printf("%sunknown redis-cli internal command '%s'\n",
+ interactive ? "" : ".redisclirc: ",
+ argv[0]);
+ }
+}
+
+/* Load the ~/.redisclirc file if any. */
+void cliLoadPreferences(void) {
+ sds rcfile = getDotfilePath(REDIS_CLI_RCFILE_ENV,REDIS_CLI_RCFILE_DEFAULT);
+ if (rcfile == NULL) return;
+ FILE *fp = fopen(rcfile,"r");
+ char buf[1024];
+
+ if (fp) {
+ while(fgets(buf,sizeof(buf),fp) != NULL) {
+ sds *argv;
+ int argc;
+
+ argv = sdssplitargs(buf,&argc);
+ if (argc > 0) cliSetPreferences(argv,argc,0);
+ sdsfreesplitres(argv,argc);
+ }
+ fclose(fp);
+ }
+ sdsfree(rcfile);
+}
+
static void repl(void) {
sds historyfile = NULL;
int history = 0;
@@ -940,58 +1639,110 @@ static void repl(void) {
int argc;
sds *argv;
+ /* Initialize the help and, if possible, use the COMMAND command in order
+ * to retrieve missing entries. */
+ cliInitHelp();
+ cliIntegrateHelp();
+
config.interactive = 1;
linenoiseSetMultiLine(1);
linenoiseSetCompletionCallback(completionCallback);
+ linenoiseSetHintsCallback(hintsCallback);
+ linenoiseSetFreeHintsCallback(freeHintsCallback);
- /* Only use history when stdin is a tty. */
+ /* Only use history and load the rc file when stdin is a tty. */
if (isatty(fileno(stdin))) {
- historyfile = getHistoryPath();
+ historyfile = getDotfilePath(REDIS_CLI_HISTFILE_ENV,REDIS_CLI_HISTFILE_DEFAULT);
+ //keep in-memory history always regardless if history file can be determined
+ history = 1;
if (historyfile != NULL) {
- history = 1;
linenoiseHistoryLoad(historyfile);
}
+ cliLoadPreferences();
}
cliRefreshPrompt();
while((line = linenoise(context ? config.prompt : "not connected> ")) != NULL) {
if (line[0] != '\0') {
- argv = sdssplitargs(line,&argc);
- if (history) linenoiseHistoryAdd(line);
- if (historyfile) linenoiseHistorySave(historyfile);
+ long repeat = 1;
+ int skipargs = 0;
+ char *endptr = NULL;
+
+ argv = cliSplitArgs(line,&argc);
+
+ /* check if we have a repeat command option and
+ * need to skip the first arg */
+ if (argv && argc > 0) {
+ errno = 0;
+ repeat = strtol(argv[0], &endptr, 10);
+ if (argc > 1 && *endptr == '\0') {
+ if (errno == ERANGE || errno == EINVAL || repeat <= 0) {
+ fputs("Invalid redis-cli repeat command option value.\n", stdout);
+ sdsfreesplitres(argv, argc);
+ linenoiseFree(line);
+ continue;
+ }
+ skipargs = 1;
+ } else {
+ repeat = 1;
+ }
+ }
+
+ /* Won't save auth command in history file */
+ if (!(argv && argc > 0 && !strcasecmp(argv[0+skipargs], "auth"))) {
+ if (history) linenoiseHistoryAdd(line);
+ if (historyfile) linenoiseHistorySave(historyfile);
+ }
if (argv == NULL) {
printf("Invalid argument(s)\n");
- free(line);
+ linenoiseFree(line);
continue;
} else if (argc > 0) {
if (strcasecmp(argv[0],"quit") == 0 ||
strcasecmp(argv[0],"exit") == 0)
{
exit(0);
+ } else if (argv[0][0] == ':') {
+ cliSetPreferences(argv,argc,1);
+ sdsfreesplitres(argv,argc);
+ linenoiseFree(line);
+ continue;
+ } else if (strcasecmp(argv[0],"restart") == 0) {
+ if (config.eval) {
+ config.eval_ldb = 1;
+ config.output = OUTPUT_RAW;
+ return; /* Return to evalMode to restart the session. */
+ } else {
+ printf("Use 'restart' only in Lua debugging mode.");
+ }
} else if (argc == 3 && !strcasecmp(argv[0],"connect")) {
sdsfree(config.hostip);
config.hostip = sdsnew(argv[1]);
config.hostport = atoi(argv[2]);
cliRefreshPrompt();
- cliConnect(1);
+ cliConnect(CC_FORCE);
} else if (argc == 1 && !strcasecmp(argv[0],"clear")) {
linenoiseClearScreen();
} else {
long long start_time = mstime(), elapsed;
- int repeat, skipargs = 0;
-
- repeat = atoi(argv[0]);
- if (argc > 1 && repeat) {
- skipargs = 1;
- } else {
- repeat = 1;
- }
issueCommandRepeat(argc-skipargs, argv+skipargs, repeat);
+ /* If our debugging session ended, show the EVAL final
+ * reply. */
+ if (config.eval_ldb_end) {
+ config.eval_ldb_end = 0;
+ cliReadReply(0);
+ printf("\n(Lua debugging session ended%s)\n\n",
+ config.eval_ldb_sync ? "" :
+ " -- dataset changes rolled back");
+ }
+
elapsed = mstime()-start_time;
- if (elapsed >= 500) {
+ if (elapsed >= 500 &&
+ config.output == OUTPUT_STANDARD)
+ {
printf("(%.2fs)\n",(double)elapsed/1000);
}
}
@@ -1000,7 +1751,7 @@ static void repl(void) {
sdsfreesplitres(argv,argc);
}
/* linenoise() returns malloc-ed lines like readline() */
- free(line);
+ linenoiseFree(line);
}
exit(0);
}
@@ -1022,47 +1773,3565 @@ static int noninteractive(int argc, char **argv) {
*--------------------------------------------------------------------------- */
static int evalMode(int argc, char **argv) {
- sds script = sdsempty();
+ sds script = NULL;
FILE *fp;
char buf[1024];
size_t nread;
char **argv2;
- int j, got_comma = 0, keys = 0;
+ int j, got_comma, keys;
+ int retval = REDIS_OK;
- /* Load the script from the file, as an sds string. */
- fp = fopen(config.eval,"r");
- if (!fp) {
+ while(1) {
+ if (config.eval_ldb) {
+ printf(
+ "Lua debugging session started, please use:\n"
+ "quit -- End the session.\n"
+ "restart -- Restart the script in debug mode again.\n"
+ "help -- Show Lua script debugging commands.\n\n"
+ );
+ }
+
+ sdsfree(script);
+ script = sdsempty();
+ got_comma = 0;
+ keys = 0;
+
+ /* Load the script from the file, as an sds string. */
+ fp = fopen(config.eval,"r");
+ if (!fp) {
+ fprintf(stderr,
+ "Can't open file '%s': %s\n", config.eval, strerror(errno));
+ exit(1);
+ }
+ while((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
+ script = sdscatlen(script,buf,nread);
+ }
+ fclose(fp);
+
+ /* If we are debugging a script, enable the Lua debugger. */
+ if (config.eval_ldb) {
+ redisReply *reply = redisCommand(context,
+ config.eval_ldb_sync ?
+ "SCRIPT DEBUG sync": "SCRIPT DEBUG yes");
+ if (reply) freeReplyObject(reply);
+ }
+
+ /* Create our argument vector */
+ argv2 = zmalloc(sizeof(sds)*(argc+3));
+ argv2[0] = sdsnew("EVAL");
+ argv2[1] = script;
+ for (j = 0; j < argc; j++) {
+ if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) {
+ got_comma = 1;
+ continue;
+ }
+ argv2[j+3-got_comma] = sdsnew(argv[j]);
+ if (!got_comma) keys++;
+ }
+ argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
+
+ /* Call it */
+ int eval_ldb = config.eval_ldb; /* Save it, may be reverteed. */
+ retval = issueCommand(argc+3-got_comma, argv2);
+ if (eval_ldb) {
+ if (!config.eval_ldb) {
+ /* If the debugging session ended immediately, there was an
+ * error compiling the script. Show it and don't enter
+ * the REPL at all. */
+ printf("Eval debugging session can't start:\n");
+ cliReadReply(0);
+ break; /* Return to the caller. */
+ } else {
+ strncpy(config.prompt,"lua debugger> ",sizeof(config.prompt));
+ repl();
+ /* Restart the session if repl() returned. */
+ cliConnect(CC_FORCE);
+ printf("\n");
+ }
+ } else {
+ break; /* Return to the caller. */
+ }
+ }
+ return retval;
+}
+
+/*------------------------------------------------------------------------------
+ * Cluster Manager
+ *--------------------------------------------------------------------------- */
+
+/* The Cluster Manager global structure */
+static struct clusterManager {
+ list *nodes; /* List of nodes in the configuration. */
+ list *errors;
+} cluster_manager;
+
+/* Used by clusterManagerFixSlotsCoverage */
+dict *clusterManagerUncoveredSlots = NULL;
+
+typedef struct clusterManagerNode {
+ redisContext *context;
+ sds name;
+ char *ip;
+ int port;
+ uint64_t current_epoch;
+ time_t ping_sent;
+ time_t ping_recv;
+ int flags;
+ list *flags_str; /* Flags string representations */
+ sds replicate; /* Master ID if node is a slave */
+ list replicas;
+ int dirty; /* Node has changes that can be flushed */
+ uint8_t slots[CLUSTER_MANAGER_SLOTS];
+ int slots_count;
+ int replicas_count;
+ list *friends;
+ sds *migrating; /* An array of sds where even strings are slots and odd
+ * strings are the destination node IDs. */
+ sds *importing; /* An array of sds where even strings are slots and odd
+ * strings are the source node IDs. */
+ int migrating_count; /* Length of the migrating array (migrating slots*2) */
+ int importing_count; /* Length of the importing array (importing slots*2) */
+ float weight; /* Weight used by rebalance */
+ int balance; /* Used by rebalance */
+} clusterManagerNode;
+
+/* Data structure used to represent a sequence of cluster nodes. */
+typedef struct clusterManagerNodeArray {
+ clusterManagerNode **nodes; /* Actual nodes array */
+ clusterManagerNode **alloc; /* Pointer to the allocated memory */
+ int len; /* Actual length of the array */
+ int count; /* Non-NULL nodes count */
+} clusterManagerNodeArray;
+
+/* Used for the reshard table. */
+typedef struct clusterManagerReshardTableItem {
+ clusterManagerNode *source;
+ int slot;
+} clusterManagerReshardTableItem;
+
+static dictType clusterManagerDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ dictSdsDestructor /* val destructor */
+};
+
+typedef int clusterManagerCommandProc(int argc, char **argv);
+
+/* Cluster Manager helper functions */
+
+static clusterManagerNode *clusterManagerNewNode(char *ip, int port);
+static clusterManagerNode *clusterManagerNodeByName(const char *name);
+static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n);
+static void clusterManagerNodeResetSlots(clusterManagerNode *node);
+static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err);
+static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node,
+ char *err);
+static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts,
+ char **err);
+static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts);
+static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err);
+static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
+ int ip_count, clusterManagerNode ***offending, int *offending_len);
+static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
+ int ip_count);
+static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent);
+static void clusterManagerShowNodes(void);
+static void clusterManagerShowClusterInfo(void);
+static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err);
+static void clusterManagerWaitForClusterJoin(void);
+static int clusterManagerCheckCluster(int quiet);
+static void clusterManagerLog(int level, const char* fmt, ...);
+static int clusterManagerIsConfigConsistent(void);
+static void clusterManagerOnError(sds err);
+static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array,
+ int len);
+static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array);
+static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array,
+ clusterManagerNode **nodeptr);
+static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array,
+ clusterManagerNode *node);
+
+/* Cluster Manager commands. */
+
+static int clusterManagerCommandCreate(int argc, char **argv);
+static int clusterManagerCommandAddNode(int argc, char **argv);
+static int clusterManagerCommandDeleteNode(int argc, char **argv);
+static int clusterManagerCommandInfo(int argc, char **argv);
+static int clusterManagerCommandCheck(int argc, char **argv);
+static int clusterManagerCommandFix(int argc, char **argv);
+static int clusterManagerCommandReshard(int argc, char **argv);
+static int clusterManagerCommandRebalance(int argc, char **argv);
+static int clusterManagerCommandSetTimeout(int argc, char **argv);
+static int clusterManagerCommandImport(int argc, char **argv);
+static int clusterManagerCommandCall(int argc, char **argv);
+static int clusterManagerCommandHelp(int argc, char **argv);
+
+typedef struct clusterManagerCommandDef {
+ char *name;
+ clusterManagerCommandProc *proc;
+ int arity;
+ char *args;
+ char *options;
+} clusterManagerCommandDef;
+
+clusterManagerCommandDef clusterManagerCommands[] = {
+ {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN",
+ "replicas <arg>"},
+ {"check", clusterManagerCommandCheck, -1, "host:port", NULL},
+ {"info", clusterManagerCommandInfo, -1, "host:port", NULL},
+ {"fix", clusterManagerCommandFix, -1, "host:port", NULL},
+ {"reshard", clusterManagerCommandReshard, -1, "host:port",
+ "from <arg>,to <arg>,slots <arg>,yes,timeout <arg>,pipeline <arg>"},
+ {"rebalance", clusterManagerCommandRebalance, -1, "host:port",
+ "weight <node1=w1...nodeN=wN>,use-empty-masters,"
+ "timeout <arg>,simulate,pipeline <arg>,threshold <arg>"},
+ {"add-node", clusterManagerCommandAddNode, 2,
+ "new_host:new_port existing_host:existing_port", "slave,master-id <arg>"},
+ {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL},
+ {"call", clusterManagerCommandCall, -2,
+ "host:port command arg arg .. arg", NULL},
+ {"set-timeout", clusterManagerCommandSetTimeout, 2,
+ "host:port milliseconds", NULL},
+ {"import", clusterManagerCommandImport, 1, "host:port",
+ "from <arg>,copy,replace"},
+ {"help", clusterManagerCommandHelp, 0, NULL, NULL}
+};
+
+
+static void createClusterManagerCommand(char *cmdname, int argc, char **argv) {
+ clusterManagerCommand *cmd = &config.cluster_manager_command;
+ cmd->name = cmdname;
+ cmd->argc = argc;
+ cmd->argv = argc ? argv : NULL;
+ if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR;
+}
+
+
+static clusterManagerCommandProc *validateClusterManagerCommand(void) {
+ int i, commands_count = sizeof(clusterManagerCommands) /
+ sizeof(clusterManagerCommandDef);
+ clusterManagerCommandProc *proc = NULL;
+ char *cmdname = config.cluster_manager_command.name;
+ int argc = config.cluster_manager_command.argc;
+ for (i = 0; i < commands_count; i++) {
+ clusterManagerCommandDef cmddef = clusterManagerCommands[i];
+ if (!strcmp(cmddef.name, cmdname)) {
+ if ((cmddef.arity > 0 && argc != cmddef.arity) ||
+ (cmddef.arity < 0 && argc < (cmddef.arity * -1))) {
+ fprintf(stderr, "[ERR] Wrong number of arguments for "
+ "specified --cluster sub command\n");
+ return NULL;
+ }
+ proc = cmddef.proc;
+ }
+ }
+ if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n");
+ return proc;
+}
+
+/* Get host ip and port from command arguments. If only one argument has
+ * been provided it must be in the form of 'ip:port', elsewhere
+ * the first argument must be the ip and the second one the port.
+ * If host and port can be detected, it returns 1 and it stores host and
+ * port into variables referenced by'ip_ptr' and 'port_ptr' pointers,
+ * elsewhere it returns 0. */
+static int getClusterHostFromCmdArgs(int argc, char **argv,
+ char **ip_ptr, int *port_ptr) {
+ int port = 0;
+ char *ip = NULL;
+ if (argc == 1) {
+ char *addr = argv[0];
+ char *c = strrchr(addr, '@');
+ if (c != NULL) *c = '\0';
+ c = strrchr(addr, ':');
+ if (c != NULL) {
+ *c = '\0';
+ ip = addr;
+ port = atoi(++c);
+ } else return 0;
+ } else {
+ ip = argv[0];
+ port = atoi(argv[1]);
+ }
+ if (!ip || !port) return 0;
+ else {
+ *ip_ptr = ip;
+ *port_ptr = port;
+ }
+ return 1;
+}
+
+static void freeClusterManagerNodeFlags(list *flags) {
+ listIter li;
+ listNode *ln;
+ listRewind(flags, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds flag = ln->value;
+ sdsfree(flag);
+ }
+ listRelease(flags);
+}
+
+static void freeClusterManagerNode(clusterManagerNode *node) {
+ if (node->context != NULL) redisFree(node->context);
+ if (node->friends != NULL) {
+ listIter li;
+ listNode *ln;
+ listRewind(node->friends,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *fn = ln->value;
+ freeClusterManagerNode(fn);
+ }
+ listRelease(node->friends);
+ node->friends = NULL;
+ }
+ if (node->name != NULL) sdsfree(node->name);
+ if (node->replicate != NULL) sdsfree(node->replicate);
+ if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip)
+ sdsfree(node->ip);
+ int i;
+ if (node->migrating != NULL) {
+ for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]);
+ zfree(node->migrating);
+ }
+ if (node->importing != NULL) {
+ for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]);
+ zfree(node->importing);
+ }
+ if (node->flags_str != NULL) {
+ freeClusterManagerNodeFlags(node->flags_str);
+ node->flags_str = NULL;
+ }
+ zfree(node);
+}
+
+static void freeClusterManager(void) {
+ listIter li;
+ listNode *ln;
+ if (cluster_manager.nodes != NULL) {
+ listRewind(cluster_manager.nodes,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ freeClusterManagerNode(n);
+ }
+ listRelease(cluster_manager.nodes);
+ cluster_manager.nodes = NULL;
+ }
+ if (cluster_manager.errors != NULL) {
+ listRewind(cluster_manager.errors,&li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds err = ln->value;
+ sdsfree(err);
+ }
+ listRelease(cluster_manager.errors);
+ cluster_manager.errors = NULL;
+ }
+ if (clusterManagerUncoveredSlots != NULL)
+ dictRelease(clusterManagerUncoveredSlots);
+}
+
+static clusterManagerNode *clusterManagerNewNode(char *ip, int port) {
+ clusterManagerNode *node = zmalloc(sizeof(*node));
+ node->context = NULL;
+ node->name = NULL;
+ node->ip = ip;
+ node->port = port;
+ node->current_epoch = 0;
+ node->ping_sent = 0;
+ node->ping_recv = 0;
+ node->flags = 0;
+ node->flags_str = NULL;
+ node->replicate = NULL;
+ node->dirty = 0;
+ node->friends = NULL;
+ node->migrating = NULL;
+ node->importing = NULL;
+ node->migrating_count = 0;
+ node->importing_count = 0;
+ node->replicas_count = 0;
+ node->weight = 1.0f;
+ node->balance = 0;
+ clusterManagerNodeResetSlots(node);
+ return node;
+}
+
+/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the
+ * latest case, if the 'err' arg is not NULL, it gets allocated with a copy
+ * of reply error (it's up to the caller function to free it), elsewhere
+ * the error is directly printed. */
+static int clusterManagerCheckRedisReply(clusterManagerNode *n,
+ redisReply *r, char **err)
+{
+ int is_err = 0;
+ if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) {
+ if (is_err) {
+ if (err != NULL) {
+ *err = zmalloc((r->len + 1) * sizeof(char));
+ strcpy(*err, r->str);
+ } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str);
+ }
+ return 0;
+ }
+ return 1;
+}
+
+static int clusterManagerNodeConnect(clusterManagerNode *node) {
+ if (node->context) redisFree(node->context);
+ node->context = redisConnect(node->ip, node->port);
+ if (node->context->err) {
+ fprintf(stderr,"Could not connect to Redis at ");
+ fprintf(stderr,"%s:%d: %s\n", node->ip, node->port,
+ node->context->errstr);
+ redisFree(node->context);
+ node->context = NULL;
+ return 0;
+ }
+ /* Set aggressive KEEP_ALIVE socket option in the Redis context socket
+ * in order to prevent timeouts caused by the execution of long
+ * commands. At the same time this improves the detection of real
+ * errors. */
+ anetKeepAlive(NULL, node->context->fd, REDIS_CLI_KEEPALIVE_INTERVAL);
+ if (config.auth) {
+ redisReply *reply = redisCommand(node->context,"AUTH %s",config.auth);
+ int ok = clusterManagerCheckRedisReply(node, reply, NULL);
+ if (reply != NULL) freeReplyObject(reply);
+ if (!ok) return 0;
+ }
+ return 1;
+}
+
+static void clusterManagerRemoveNodeFromList(list *nodelist,
+ clusterManagerNode *node) {
+ listIter li;
+ listNode *ln;
+ listRewind(nodelist, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ if (node == ln->value) {
+ listDelNode(nodelist, ln);
+ break;
+ }
+ }
+}
+
+/* Return the node with the specified name (ID) or NULL. */
+static clusterManagerNode *clusterManagerNodeByName(const char *name) {
+ if (cluster_manager.nodes == NULL) return NULL;
+ clusterManagerNode *found = NULL;
+ sds lcname = sdsempty();
+ lcname = sdscpy(lcname, name);
+ sdstolower(lcname);
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->name && !sdscmp(n->name, lcname)) {
+ found = n;
+ break;
+ }
+ }
+ sdsfree(lcname);
+ return found;
+}
+
+/* Like clusterManagerNodeByName but the specified name can be just the first
+ * part of the node ID as long as the prefix in unique across the
+ * cluster.
+ */
+static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name)
+{
+ if (cluster_manager.nodes == NULL) return NULL;
+ clusterManagerNode *found = NULL;
+ sds lcname = sdsempty();
+ lcname = sdscpy(lcname, name);
+ sdstolower(lcname);
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->name &&
+ strstr(n->name, lcname) == n->name) {
+ found = n;
+ break;
+ }
+ }
+ sdsfree(lcname);
+ return found;
+}
+
+static void clusterManagerNodeResetSlots(clusterManagerNode *node) {
+ memset(node->slots, 0, sizeof(node->slots));
+ node->slots_count = 0;
+}
+
+/* Call "INFO" redis command on the specified node and return the reply. */
+static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node,
+ char **err)
+{
+ redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO");
+ if (err != NULL) *err = NULL;
+ if (info == NULL) return NULL;
+ if (info->type == REDIS_REPLY_ERROR) {
+ if (err != NULL) {
+ *err = zmalloc((info->len + 1) * sizeof(char));
+ strcpy(*err, info->str);
+ }
+ freeReplyObject(info);
+ return NULL;
+ }
+ return info;
+}
+
+static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) {
+ redisReply *info = clusterManagerGetNodeRedisInfo(node, err);
+ if (info == NULL) return 0;
+ int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled");
+ freeReplyObject(info);
+ return is_cluster;
+}
+
+/* Checks whether the node is empty. Node is considered not-empty if it has
+ * some key or if it already knows other nodes */
+static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) {
+ redisReply *info = clusterManagerGetNodeRedisInfo(node, err);
+ int is_empty = 1;
+ if (info == NULL) return 0;
+ if (strstr(info->str, "db0:") != NULL) {
+ is_empty = 0;
+ goto result;
+ }
+ freeReplyObject(info);
+ info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO");
+ if (err != NULL) *err = NULL;
+ if (!clusterManagerCheckRedisReply(node, info, err)) {
+ is_empty = 0;
+ goto result;
+ }
+ long known_nodes = getLongInfoField(info->str, "cluster_known_nodes");
+ is_empty = (known_nodes == 1);
+result:
+ freeReplyObject(info);
+ return is_empty;
+}
+
+/* Return the anti-affinity score, which is a measure of the amount of
+ * violations of anti-affinity in the current cluster layout, that is, how
+ * badly the masters and slaves are distributed in the different IP
+ * addresses so that slaves of the same master are not in the master
+ * host and are also in different hosts.
+ *
+ * The score is calculated as follows:
+ *
+ * SAME_AS_MASTER = 10000 * each slave in the same IP of its master.
+ * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave
+ of the same master.
+ * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE
+ *
+ * So a greater score means a worse anti-affinity level, while zero
+ * means perfect anti-affinity.
+ *
+ * The anti affinity optimizator will try to get a score as low as
+ * possible. Since we do not want to sacrifice the fact that slaves should
+ * not be in the same host as the master, we assign 10000 times the score
+ * to this violation, so that we'll optimize for the second factor only
+ * if it does not impact the first one.
+ *
+ * The ipnodes argument is an array of clusterManagerNodeArray, one for
+ * each IP, while ip_count is the total number of IPs in the configuration.
+ *
+ * The function returns the above score, and the list of
+ * offending slaves can be stored into the 'offending' argument,
+ * so that the optimizer can try changing the configuration of the
+ * slaves violating the anti-affinity goals. */
+static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
+ int ip_count, clusterManagerNode ***offending, int *offending_len)
+{
+ int score = 0, i, j;
+ int node_len = cluster_manager.nodes->len;
+ clusterManagerNode **offending_p = NULL;
+ if (offending != NULL) {
+ *offending = zcalloc(node_len * sizeof(clusterManagerNode*));
+ offending_p = *offending;
+ }
+ /* For each set of nodes in the same host, split by
+ * related nodes (masters and slaves which are involved in
+ * replication of each other) */
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = &(ipnodes[i]);
+ dict *related = dictCreate(&clusterManagerDictType, NULL);
+ char *ip = NULL;
+ for (j = 0; j < node_array->len; j++) {
+ clusterManagerNode *node = node_array->nodes[j];
+ if (node == NULL) continue;
+ if (!ip) ip = node->ip;
+ sds types, otypes;
+ // We always use the Master ID as key
+ sds key = (!node->replicate ? node->name : node->replicate);
+ assert(key != NULL);
+ dictEntry *entry = dictFind(related, key);
+ if (entry) otypes = (sds) dictGetVal(entry);
+ else {
+ otypes = sdsempty();
+ dictAdd(related, key, otypes);
+ }
+ // Master type 'm' is always set as the first character of the
+ // types string.
+ if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes);
+ else types = sdscat(otypes, "s");
+ if (types != otypes) dictReplace(related, key, types);
+ }
+ /* Now it's trivial to check, for each related group having the
+ * same host, what is their local score. */
+ dictIterator *iter = dictGetIterator(related);
+ dictEntry *entry;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds types = (sds) dictGetVal(entry);
+ sds name = (sds) dictGetKey(entry);
+ int typeslen = sdslen(types);
+ if (typeslen < 2) continue;
+ if (types[0] == 'm') score += (10000 * (typeslen - 1));
+ else score += (1 * typeslen);
+ if (offending == NULL) continue;
+ /* Populate the list of offending nodes. */
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->replicate == NULL) continue;
+ if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) {
+ *(offending_p++) = n;
+ if (offending_len != NULL) (*offending_len)++;
+ break;
+ }
+ }
+ }
+ //if (offending_len != NULL) *offending_len = offending_p - *offending;
+ dictReleaseIterator(iter);
+ dictRelease(related);
+ }
+ return score;
+}
+
+static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
+ int ip_count)
+{
+ clusterManagerNode **offenders = NULL;
+ int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count,
+ NULL, NULL);
+ if (score == 0) goto cleanup;
+ clusterManagerLogInfo(">>> Trying to optimize slaves allocation "
+ "for anti-affinity\n");
+ int node_len = cluster_manager.nodes->len;
+ int maxiter = 500 * node_len; // Effort is proportional to cluster size...
+ srand(time(NULL));
+ while (maxiter > 0) {
+ int offending_len = 0;
+ if (offenders != NULL) {
+ zfree(offenders);
+ offenders = NULL;
+ }
+ score = clusterManagerGetAntiAffinityScore(ipnodes,
+ ip_count,
+ &offenders,
+ &offending_len);
+ if (score == 0) break; // Optimal anti affinity reached
+ /* We'll try to randomly swap a slave's assigned master causing
+ * an affinity problem with another random slave, to see if we
+ * can improve the affinity. */
+ int rand_idx = rand() % offending_len;
+ clusterManagerNode *first = offenders[rand_idx],
+ *second = NULL;
+ clusterManagerNode **other_replicas = zcalloc((node_len - 1) *
+ sizeof(*other_replicas));
+ int other_replicas_count = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n != first && n->replicate != NULL)
+ other_replicas[other_replicas_count++] = n;
+ }
+ if (other_replicas_count == 0) {
+ zfree(other_replicas);
+ break;
+ }
+ rand_idx = rand() % other_replicas_count;
+ second = other_replicas[rand_idx];
+ char *first_master = first->replicate,
+ *second_master = second->replicate;
+ first->replicate = second_master, first->dirty = 1;
+ second->replicate = first_master, second->dirty = 1;
+ int new_score = clusterManagerGetAntiAffinityScore(ipnodes,
+ ip_count,
+ NULL, NULL);
+ /* If the change actually makes thing worse, revert. Otherwise
+ * leave as it is because the best solution may need a few
+ * combined swaps. */
+ if (new_score > score) {
+ first->replicate = first_master;
+ second->replicate = second_master;
+ }
+ zfree(other_replicas);
+ maxiter--;
+ }
+ score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL);
+ char *msg;
+ int perfect = (score == 0);
+ int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS :
+ CLUSTER_MANAGER_LOG_LVL_WARN);
+ if (perfect) msg = "[OK] Perfect anti-affinity obtained!";
+ else if (score >= 10000)
+ msg = ("[WARNING] Some slaves are in the same host as their master");
+ else
+ msg=("[WARNING] Some slaves of the same master are in the same host");
+ clusterManagerLog(log_level, "%s\n", msg);
+cleanup:
+ zfree(offenders);
+}
+
+/* Return a representable string of the node's flags */
+static sds clusterManagerNodeFlagString(clusterManagerNode *node) {
+ sds flags = sdsempty();
+ if (!node->flags_str) return flags;
+ int empty = 1;
+ listIter li;
+ listNode *ln;
+ listRewind(node->flags_str, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds flag = ln->value;
+ if (strcmp(flag, "myself") == 0) continue;
+ if (!empty) flags = sdscat(flags, ",");
+ flags = sdscatfmt(flags, "%S", flag);
+ empty = 0;
+ }
+ return flags;
+}
+
+/* Return a representable string of the node's slots */
+static sds clusterManagerNodeSlotsString(clusterManagerNode *node) {
+ sds slots = sdsempty();
+ int first_range_idx = -1, last_slot_idx = -1, i;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ int has_slot = node->slots[i];
+ if (has_slot) {
+ if (first_range_idx == -1) {
+ if (sdslen(slots)) slots = sdscat(slots, ",");
+ first_range_idx = i;
+ slots = sdscatfmt(slots, "[%u", i);
+ }
+ last_slot_idx = i;
+ } else {
+ if (last_slot_idx >= 0) {
+ if (first_range_idx == last_slot_idx)
+ slots = sdscat(slots, "]");
+ else slots = sdscatfmt(slots, "-%u]", last_slot_idx);
+ }
+ last_slot_idx = -1;
+ first_range_idx = -1;
+ }
+ }
+ if (last_slot_idx >= 0) {
+ if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]");
+ else slots = sdscatfmt(slots, "-%u]", last_slot_idx);
+ }
+ return slots;
+}
+
+/* -----------------------------------------------------------------------------
+ * Key space handling
+ * -------------------------------------------------------------------------- */
+
+/* We have 16384 hash slots. The hash slot of a given key is obtained
+ * as the least significant 14 bits of the crc16 of the key.
+ *
+ * However if the key contains the {...} pattern, only the part between
+ * { and } is hashed. This may be useful in the future to force certain
+ * keys to be in the same node (assuming no resharding is in progress). */
+static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) {
+ int s, e; /* start-end indexes of { and } */
+
+ for (s = 0; s < keylen; s++)
+ if (key[s] == '{') break;
+
+ /* No '{' ? Hash the whole key. This is the base case. */
+ if (s == keylen) return crc16(key,keylen) & 0x3FFF;
+
+ /* '{' found? Check if we have the corresponding '}'. */
+ for (e = s+1; e < keylen; e++)
+ if (key[e] == '}') break;
+
+ /* No '}' or nothing between {} ? Hash the whole key. */
+ if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF;
+
+ /* If we are here there is both a { and a } on its right. Hash
+ * what is in the middle between { and }. */
+ return crc16(key+s+1,e-s-1) & 0x3FFF;
+}
+
+/* Return a string representation of the cluster node. */
+static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) {
+ sds info = sdsempty();
+ sds spaces = sdsempty();
+ int i;
+ for (i = 0; i < indent; i++) spaces = sdscat(spaces, " ");
+ if (indent) info = sdscat(info, spaces);
+ int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE);
+ char *role = (is_master ? "M" : "S");
+ sds slots = NULL;
+ if (node->dirty && node->replicate != NULL)
+ info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port);
+ else {
+ slots = clusterManagerNodeSlotsString(node);
+ sds flags = clusterManagerNodeFlagString(node);
+ info = sdscatfmt(info, "%s: %S %s:%u\n"
+ "%s slots:%S (%u slots) "
+ "%S",
+ role, node->name, node->ip, node->port, spaces,
+ slots, node->slots_count, flags);
+ sdsfree(slots);
+ sdsfree(flags);
+ }
+ if (node->replicate != NULL)
+ info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate);
+ else if (node->replicas_count)
+ info = sdscatfmt(info, "\n%s %U additional replica(s)",
+ spaces, node->replicas_count);
+ sdsfree(spaces);
+ return info;
+}
+
+static void clusterManagerShowNodes(void) {
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ sds info = clusterManagerNodeInfo(node, 0);
+ printf("%s\n", (char *) info);
+ sdsfree(info);
+ }
+}
+
+static void clusterManagerShowClusterInfo(void) {
+ int masters = 0;
+ int keys = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) {
+ if (!node->name) continue;
+ int replicas = 0;
+ int dbsize = -1;
+ char name[9];
+ memcpy(name, node->name, 8);
+ name[8] = '\0';
+ listIter ri;
+ listNode *rn;
+ listRewind(cluster_manager.nodes, &ri);
+ while ((rn = listNext(&ri)) != NULL) {
+ clusterManagerNode *n = rn->value;
+ if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE))
+ continue;
+ if (n->replicate && !strcmp(n->replicate, node->name))
+ replicas++;
+ }
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE");
+ if (reply != NULL || reply->type == REDIS_REPLY_INTEGER)
+ dbsize = reply->integer;
+ if (dbsize < 0) {
+ char *err = "";
+ if (reply != NULL && reply->type == REDIS_REPLY_ERROR)
+ err = reply->str;
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ if (reply != NULL) freeReplyObject(reply);
+ return;
+ };
+ if (reply != NULL) freeReplyObject(reply);
+ printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n",
+ node->ip, node->port, name, dbsize,
+ node->slots_count, replicas);
+ masters++;
+ keys += dbsize;
+ }
+ }
+ clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters);
+ float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS;
+ printf("%.2f keys per slot on average.\n", keys_per_slot);
+}
+
+/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */
+static int clusterManagerAddSlots(clusterManagerNode *node, char**err)
+{
+ redisReply *reply = NULL;
+ void *_reply = NULL;
+ int success = 1;
+ /* First two args are used for the command itself. */
+ int argc = node->slots_count + 2;
+ sds *argv = zmalloc(argc * sizeof(*argv));
+ size_t *argvlen = zmalloc(argc * sizeof(*argvlen));
+ argv[0] = "CLUSTER";
+ argv[1] = "ADDSLOTS";
+ argvlen[0] = 7;
+ argvlen[1] = 8;
+ *err = NULL;
+ int i, argv_idx = 2;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ if (argv_idx >= argc) break;
+ if (node->slots[i]) {
+ argv[argv_idx] = sdsfromlonglong((long long) i);
+ argvlen[argv_idx] = sdslen(argv[argv_idx]);
+ argv_idx++;
+ }
+ }
+ if (!argv_idx) {
+ success = 0;
+ goto cleanup;
+ }
+ redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen);
+ if (redisGetReply(node->context, &_reply) != REDIS_OK) {
+ success = 0;
+ goto cleanup;
+ }
+ reply = (redisReply*) _reply;
+ success = clusterManagerCheckRedisReply(node, reply, err);
+cleanup:
+ zfree(argvlen);
+ if (argv != NULL) {
+ for (i = 2; i < argc; i++) sdsfree(argv[i]);
+ zfree(argv);
+ }
+ if (reply != NULL) freeReplyObject(reply);
+ return success;
+}
+
+/* Set slot status to "importing" or "migrating" */
+static int clusterManagerSetSlot(clusterManagerNode *node1,
+ clusterManagerNode *node2,
+ int slot, const char *status, char **err) {
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER "
+ "SETSLOT %d %s %s",
+ slot, status,
+ (char *) node2->name);
+ if (err != NULL) *err = NULL;
+ if (!reply) return 0;
+ int success = 1;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ strcpy(*err, reply->str);
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err);
+ }
+ goto cleanup;
+ }
+cleanup:
+ freeReplyObject(reply);
+ return success;
+}
+
+/* Migrate keys taken from reply->elements. It returns the reply from the
+ * MIGRATE command, or NULL if something goes wrong. If the argument 'dots'
+ * is not NULL, a dot will be printed for every migrated key. */
+static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source,
+ clusterManagerNode *target,
+ redisReply *reply,
+ int replace, int timeout,
+ char *dots)
+{
+ redisReply *migrate_reply = NULL;
+ char **argv = NULL;
+ size_t *argv_len = NULL;
+ int c = (replace ? 8 : 7);
+ if (config.auth) c += 2;
+ size_t argc = c + reply->elements;
+ size_t i, offset = 6; // Keys Offset
+ argv = zcalloc(argc * sizeof(char *));
+ argv_len = zcalloc(argc * sizeof(size_t));
+ char portstr[255];
+ char timeoutstr[255];
+ snprintf(portstr, 10, "%d", target->port);
+ snprintf(timeoutstr, 10, "%d", timeout);
+ argv[0] = "MIGRATE";
+ argv_len[0] = 7;
+ argv[1] = target->ip;
+ argv_len[1] = strlen(target->ip);
+ argv[2] = portstr;
+ argv_len[2] = strlen(portstr);
+ argv[3] = "";
+ argv_len[3] = 0;
+ argv[4] = "0";
+ argv_len[4] = 1;
+ argv[5] = timeoutstr;
+ argv_len[5] = strlen(timeoutstr);
+ if (replace) {
+ argv[offset] = "REPLACE";
+ argv_len[offset] = 7;
+ offset++;
+ }
+ if (config.auth) {
+ argv[offset] = "AUTH";
+ argv_len[offset] = 4;
+ offset++;
+ argv[offset] = config.auth;
+ argv_len[offset] = strlen(config.auth);
+ offset++;
+ }
+ argv[offset] = "KEYS";
+ argv_len[offset] = 4;
+ offset++;
+ for (i = 0; i < reply->elements; i++) {
+ redisReply *entry = reply->element[i];
+ size_t idx = i + offset;
+ assert(entry->type == REDIS_REPLY_STRING);
+ argv[idx] = (char *) sdsnew(entry->str);
+ argv_len[idx] = entry->len;
+ if (dots) dots[i] = '.';
+ }
+ if (dots) dots[reply->elements] = '\0';
+ void *_reply = NULL;
+ redisAppendCommandArgv(source->context,argc,
+ (const char**)argv,argv_len);
+ int success = (redisGetReply(source->context, &_reply) == REDIS_OK);
+ for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]);
+ if (!success) goto cleanup;
+ migrate_reply = (redisReply *) _reply;
+cleanup:
+ zfree(argv);
+ zfree(argv_len);
+ return migrate_reply;
+}
+
+/* Migrate all keys in the given slot from source to target.*/
+static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source,
+ clusterManagerNode *target,
+ int slot, int timeout,
+ int pipeline, int verbose,
+ char **err)
+{
+ int success = 1;
+ int do_fix = (config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX);
+ while (1) {
+ char *dots = NULL;
+ redisReply *reply = NULL, *migrate_reply = NULL;
+ reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER "
+ "GETKEYSINSLOT %d %d", slot,
+ pipeline);
+ success = (reply != NULL);
+ if (!success) return 0;
+ if (reply->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ strcpy(*err, reply->str);
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err);
+ }
+ goto next;
+ }
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ size_t count = reply->elements;
+ if (count == 0) {
+ freeReplyObject(reply);
+ break;
+ }
+ if (verbose) dots = zmalloc((count+1) * sizeof(char));
+ /* Calling MIGRATE command. */
+ migrate_reply = clusterManagerMigrateKeysInReply(source, target,
+ reply, 0, timeout,
+ dots);
+ if (migrate_reply == NULL) goto next;
+ if (migrate_reply->type == REDIS_REPLY_ERROR) {
+ if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) {
+ clusterManagerLogWarn("*** Target key exists. "
+ "Replacing it for FIX.\n");
+ freeReplyObject(migrate_reply);
+ /* Try to migrate keys adding REPLACE option. */
+ migrate_reply = clusterManagerMigrateKeysInReply(source,
+ target,
+ reply,
+ 1, timeout,
+ NULL);
+ success = (migrate_reply != NULL &&
+ migrate_reply->type != REDIS_REPLY_ERROR);
+ } else success = 0;
+ if (!success) {
+ if (migrate_reply != NULL) {
+ if (err) {
+ *err = zmalloc((migrate_reply->len + 1) * sizeof(char));
+ strcpy(*err, migrate_reply->str);
+ }
+ printf("\n");
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(source,
+ migrate_reply->str);
+ }
+ goto next;
+ }
+ }
+ if (verbose) {
+ printf("%s", dots);
+ fflush(stdout);
+ }
+next:
+ if (reply != NULL) freeReplyObject(reply);
+ if (migrate_reply != NULL) freeReplyObject(migrate_reply);
+ if (dots) zfree(dots);
+ if (!success) break;
+ }
+ return success;
+}
+
+/* Move slots between source and target nodes using MIGRATE.
+ *
+ * Options:
+ * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key.
+ * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots /
+ * reconfiguring the nodes.
+ * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes.
+ * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages.
+*/
+static int clusterManagerMoveSlot(clusterManagerNode *source,
+ clusterManagerNode *target,
+ int slot, int opts, char**err)
+{
+ if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) {
+ printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip,
+ source->port, target->ip, target->port);
+ fflush(stdout);
+ }
+ if (err != NULL) *err = NULL;
+ int pipeline = config.cluster_manager_command.pipeline,
+ timeout = config.cluster_manager_command.timeout,
+ print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE),
+ option_cold = (opts & CLUSTER_MANAGER_OPT_COLD),
+ success = 1;
+ if (!option_cold) {
+ success = clusterManagerSetSlot(target, source, slot,
+ "importing", err);
+ if (!success) return 0;
+ success = clusterManagerSetSlot(source, target, slot,
+ "migrating", err);
+ if (!success) return 0;
+ }
+ success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout,
+ pipeline, print_dots, err);
+ if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n");
+ if (!success) return 0;
+ /* Set the new node as the owner of the slot in all the known nodes. */
+ if (!option_cold) {
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER "
+ "SETSLOT %d %s %s",
+ slot, "node",
+ target->name);
+ success = (r != NULL);
+ if (!success) return 0;
+ if (r->type == REDIS_REPLY_ERROR) {
+ success = 0;
+ if (err != NULL) {
+ *err = zmalloc((r->len + 1) * sizeof(char));
+ strcpy(*err, r->str);
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err);
+ }
+ }
+ freeReplyObject(r);
+ if (!success) return 0;
+ }
+ }
+ /* Update the node logical config */
+ if (opts & CLUSTER_MANAGER_OPT_UPDATE) {
+ source->slots[slot] = 0;
+ target->slots[slot] = 1;
+ }
+ return 1;
+}
+
+/* Flush the dirty node configuration by calling replicate for slaves or
+ * adding the slots defined in the masters. */
+static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) {
+ if (!node->dirty) return 0;
+ redisReply *reply = NULL;
+ int is_err = 0, success = 1;
+ if (err != NULL) *err = NULL;
+ if (node->replicate != NULL) {
+ reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s",
+ node->replicate);
+ if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) {
+ if (is_err && err != NULL) {
+ *err = zmalloc((reply->len + 1) * sizeof(char));
+ strcpy(*err, reply->str);
+ }
+ success = 0;
+ /* If the cluster did not already joined it is possible that
+ * the slave does not know the master node yet. So on errors
+ * we return ASAP leaving the dirty flag set, to flush the
+ * config later. */
+ goto cleanup;
+ }
+ } else {
+ int added = clusterManagerAddSlots(node, err);
+ if (!added || *err != NULL) success = 0;
+ }
+ node->dirty = 0;
+cleanup:
+ if (reply != NULL) freeReplyObject(reply);
+ return success;
+}
+
+/* Wait until the cluster configuration is consistent. */
+static void clusterManagerWaitForClusterJoin(void) {
+ printf("Waiting for the cluster to join\n");
+ while(!clusterManagerIsConfigConsistent()) {
+ printf(".");
+ fflush(stdout);
+ sleep(1);
+ }
+ printf("\n");
+}
+
+/* Load node's cluster configuration by calling "CLUSTER NODES" command.
+ * Node's configuration (name, replicate, slots, ...) is then updated.
+ * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument,
+ * and node already knows other nodes, the node's friends list is populated
+ * with the other nodes info. */
+static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts,
+ char **err)
+{
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES");
+ int success = 1;
+ *err = NULL;
+ if (!clusterManagerCheckRedisReply(node, reply, err)) {
+ success = 0;
+ goto cleanup;
+ }
+ int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS);
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL,
+ *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL,
+ *link_status = NULL;
+ UNUSED(link_status);
+ int i = 0;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ switch(i++){
+ case 0: name = token; break;
+ case 1: addr = token; break;
+ case 2: flags = token; break;
+ case 3: master_id = token; break;
+ case 4: ping_sent = token; break;
+ case 5: ping_recv = token; break;
+ case 6: config_epoch = token; break;
+ case 7: link_status = token; break;
+ }
+ if (i == 8) break; // Slots
+ }
+ if (!flags) {
+ success = 0;
+ goto cleanup;
+ }
+ int myself = (strstr(flags, "myself") != NULL);
+ clusterManagerNode *currentNode = NULL;
+ if (myself) {
+ node->flags |= CLUSTER_MANAGER_FLAG_MYSELF;
+ currentNode = node;
+ clusterManagerNodeResetSlots(node);
+ if (i == 8) {
+ int remaining = strlen(line);
+ while (remaining > 0) {
+ p = strchr(line, ' ');
+ if (p == NULL) p = line + remaining;
+ remaining -= (p - line);
+
+ char *slotsdef = line;
+ *p = '\0';
+ if (remaining) {
+ line = p + 1;
+ remaining--;
+ } else line = p;
+ char *dash = NULL;
+ if (slotsdef[0] == '[') {
+ slotsdef++;
+ if ((p = strstr(slotsdef, "->-"))) { // Migrating
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds dst = sdsnew(p);
+ node->migrating_count += 2;
+ node->migrating = zrealloc(node->migrating,
+ (node->migrating_count * sizeof(sds)));
+ node->migrating[node->migrating_count - 2] =
+ slot;
+ node->migrating[node->migrating_count - 1] =
+ dst;
+ } else if ((p = strstr(slotsdef, "-<-"))) {//Importing
+ *p = '\0';
+ p += 3;
+ char *closing_bracket = strchr(p, ']');
+ if (closing_bracket) *closing_bracket = '\0';
+ sds slot = sdsnew(slotsdef);
+ sds src = sdsnew(p);
+ node->importing_count += 2;
+ node->importing = zrealloc(node->importing,
+ (node->importing_count * sizeof(sds)));
+ node->importing[node->importing_count - 2] =
+ slot;
+ node->importing[node->importing_count - 1] =
+ src;
+ }
+ } else if ((dash = strchr(slotsdef, '-')) != NULL) {
+ p = dash;
+ int start, stop;
+ *p = '\0';
+ start = atoi(slotsdef);
+ stop = atoi(p + 1);
+ node->slots_count += (stop - (start - 1));
+ while (start <= stop) node->slots[start++] = 1;
+ } else if (p > slotsdef) {
+ node->slots[atoi(slotsdef)] = 1;
+ node->slots_count++;
+ }
+ }
+ }
+ node->dirty = 0;
+ } else if (!getfriends) {
+ if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue;
+ else break;
+ } else {
+ if (addr == NULL) {
+ fprintf(stderr, "Error: invalid CLUSTER NODES reply\n");
+ success = 0;
+ goto cleanup;
+ }
+ char *c = strrchr(addr, '@');
+ if (c != NULL) *c = '\0';
+ c = strrchr(addr, ':');
+ if (c == NULL) {
+ fprintf(stderr, "Error: invalid CLUSTER NODES reply\n");
+ success = 0;
+ goto cleanup;
+ }
+ *c = '\0';
+ int port = atoi(++c);
+ currentNode = clusterManagerNewNode(sdsnew(addr), port);
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND;
+ if (node->friends == NULL) node->friends = listCreate();
+ listAddNodeTail(node->friends, currentNode);
+ }
+ if (name != NULL) {
+ if (currentNode->name) sdsfree(currentNode->name);
+ currentNode->name = sdsnew(name);
+ }
+ if (currentNode->flags_str != NULL)
+ freeClusterManagerNodeFlags(currentNode->flags_str);
+ currentNode->flags_str = listCreate();
+ int flag_len;
+ while ((flag_len = strlen(flags)) > 0) {
+ sds flag = NULL;
+ char *fp = strchr(flags, ',');
+ if (fp) {
+ *fp = '\0';
+ flag = sdsnew(flags);
+ flags = fp + 1;
+ } else {
+ flag = sdsnew(flags);
+ flags += flag_len;
+ }
+ if (strcmp(flag, "noaddr") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR;
+ else if (strcmp(flag, "disconnected") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT;
+ else if (strcmp(flag, "fail") == 0)
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL;
+ else if (strcmp(flag, "slave") == 0) {
+ currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE;
+ if (master_id != NULL) {
+ if (currentNode->replicate) sdsfree(currentNode->replicate);
+ currentNode->replicate = sdsnew(master_id);
+ }
+ }
+ listAddNodeTail(currentNode->flags_str, flag);
+ }
+ if (config_epoch != NULL)
+ currentNode->current_epoch = atoll(config_epoch);
+ if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent);
+ if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv);
+ if (!getfriends && myself) break;
+ }
+cleanup:
+ if (reply) freeReplyObject(reply);
+ return success;
+}
+
+/* Retrieves info about the cluster using argument 'node' as the starting
+ * point. All nodes will be loaded inside the cluster_manager.nodes list.
+ * Warning: if something goes wrong, it will free the starting node before
+ * returning 0. */
+static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) {
+ if (node->context == NULL && !clusterManagerNodeConnect(node)) {
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ opts |= CLUSTER_MANAGER_OPT_GETFRIENDS;
+ char *e = NULL;
+ if (!clusterManagerNodeIsCluster(node, &e)) {
+ clusterManagerPrintNotClusterNodeError(node, e);
+ if (e) zfree(e);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ e = NULL;
+ if (!clusterManagerNodeLoadInfo(node, opts, &e)) {
+ if (e) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e);
+ zfree(e);
+ }
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ listIter li;
+ listNode *ln;
+ if (cluster_manager.nodes != NULL) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL)
+ freeClusterManagerNode((clusterManagerNode *) ln->value);
+ listRelease(cluster_manager.nodes);
+ }
+ cluster_manager.nodes = listCreate();
+ listAddNodeTail(cluster_manager.nodes, node);
+ if (node->friends != NULL) {
+ listRewind(node->friends, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *friend = ln->value;
+ if (!friend->ip || !friend->port) goto invalid_friend;
+ if (!friend->context && !clusterManagerNodeConnect(friend))
+ goto invalid_friend;
+ e = NULL;
+ if (clusterManagerNodeLoadInfo(friend, 0, &e)) {
+ if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR |
+ CLUSTER_MANAGER_FLAG_DISCONNECT |
+ CLUSTER_MANAGER_FLAG_FAIL))
+ goto invalid_friend;
+ listAddNodeTail(cluster_manager.nodes, friend);
+ } else {
+ clusterManagerLogErr("[ERR] Unable to load info for "
+ "node %s:%d\n",
+ friend->ip, friend->port);
+ goto invalid_friend;
+ }
+ continue;
+invalid_friend:
+ freeClusterManagerNode(friend);
+ }
+ listRelease(node->friends);
+ node->friends = NULL;
+ }
+ // Count replicas for each node
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->replicate != NULL) {
+ clusterManagerNode *master = clusterManagerNodeByName(n->replicate);
+ if (master == NULL) {
+ clusterManagerLogWarn("*** WARNING: %s:%d claims to be "
+ "slave of unknown node ID %s.\n",
+ n->ip, n->port, n->replicate);
+ } else master->replicas_count++;
+ }
+ }
+ return 1;
+}
+
+/* Compare functions used by various sorting operations. */
+int clusterManagerSlotCompare(const void *slot1, const void *slot2) {
+ const char **i1 = (const char **)slot1;
+ const char **i2 = (const char **)slot2;
+ return strcmp(*i1, *i2);
+}
+
+int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) {
+ clusterManagerNode *node1 = *((clusterManagerNode **) n1);
+ clusterManagerNode *node2 = *((clusterManagerNode **) n2);
+ return node2->slots_count - node1->slots_count;
+}
+
+int clusterManagerCompareNodeBalance(const void *n1, const void *n2) {
+ clusterManagerNode *node1 = *((clusterManagerNode **) n1);
+ clusterManagerNode *node2 = *((clusterManagerNode **) n2);
+ return node1->balance - node2->balance;
+}
+
+static sds clusterManagerGetConfigSignature(clusterManagerNode *node) {
+ sds signature = NULL;
+ int node_count = 0, i = 0, name_len = 0;
+ char **node_configs = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES");
+ if (reply == NULL || reply->type == REDIS_REPLY_ERROR)
+ goto cleanup;
+ char *lines = reply->str, *p, *line;
+ while ((p = strstr(lines, "\n")) != NULL) {
+ i = 0;
+ *p = '\0';
+ line = lines;
+ lines = p + 1;
+ char *nodename = NULL;
+ int tot_size = 0;
+ while ((p = strchr(line, ' ')) != NULL) {
+ *p = '\0';
+ char *token = line;
+ line = p + 1;
+ if (i == 0) {
+ nodename = token;
+ tot_size = (p - token);
+ name_len = tot_size++; // Make room for ':' in tot_size
+ } else if (i == 8) break;
+ i++;
+ }
+ if (i != 8) continue;
+ if (nodename == NULL) continue;
+ int remaining = strlen(line);
+ if (remaining == 0) continue;
+ char **slots = NULL;
+ int c = 0;
+ while (remaining > 0) {
+ p = strchr(line, ' ');
+ if (p == NULL) p = line + remaining;
+ int size = (p - line);
+ remaining -= size;
+ tot_size += size;
+ char *slotsdef = line;
+ *p = '\0';
+ if (remaining) {
+ line = p + 1;
+ remaining--;
+ } else line = p;
+ if (slotsdef[0] != '[') {
+ c++;
+ slots = zrealloc(slots, (c * sizeof(char *)));
+ slots[c - 1] = slotsdef;
+ }
+ }
+ if (c > 0) {
+ if (c > 1)
+ qsort(slots, c, sizeof(char *), clusterManagerSlotCompare);
+ node_count++;
+ node_configs =
+ zrealloc(node_configs, (node_count * sizeof(char *)));
+ /* Make room for '|' separators. */
+ tot_size += (sizeof(char) * (c - 1));
+ char *cfg = zmalloc((sizeof(char) * tot_size) + 1);
+ memcpy(cfg, nodename, name_len);
+ char *sp = cfg + name_len;
+ *(sp++) = ':';
+ for (i = 0; i < c; i++) {
+ if (i > 0) *(sp++) = '|';
+ int slen = strlen(slots[i]);
+ memcpy(sp, slots[i], slen);
+ sp += slen;
+ }
+ *(sp++) = '\0';
+ node_configs[node_count - 1] = cfg;
+ }
+ zfree(slots);
+ }
+ if (node_count > 0) {
+ if (node_count > 1) {
+ qsort(node_configs, node_count, sizeof(char *),
+ clusterManagerSlotCompare);
+ }
+ signature = sdsempty();
+ for (i = 0; i < node_count; i++) {
+ if (i > 0) signature = sdscatprintf(signature, "%c", '|');
+ signature = sdscatfmt(signature, "%s", node_configs[i]);
+ }
+ }
+cleanup:
+ if (reply != NULL) freeReplyObject(reply);
+ if (node_configs != NULL) {
+ for (i = 0; i < node_count; i++) zfree(node_configs[i]);
+ zfree(node_configs);
+ }
+ return signature;
+}
+
+static int clusterManagerIsConfigConsistent(void) {
+ if (cluster_manager.nodes == NULL) return 0;
+ int consistent = (listLength(cluster_manager.nodes) <= 1);
+ // If the Cluster has only one node, it's always consistent
+ if (consistent) return 1;
+ sds first_cfg = NULL;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ sds cfg = clusterManagerGetConfigSignature(node);
+ if (cfg == NULL) {
+ consistent = 0;
+ break;
+ }
+ if (first_cfg == NULL) first_cfg = cfg;
+ else {
+ consistent = !sdscmp(first_cfg, cfg);
+ sdsfree(cfg);
+ if (!consistent) break;
+ }
+ }
+ if (first_cfg != NULL) sdsfree(first_cfg);
+ return consistent;
+}
+
+/* Add the error string to cluster_manager.errors and print it. */
+static void clusterManagerOnError(sds err) {
+ if (cluster_manager.errors == NULL)
+ cluster_manager.errors = listCreate();
+ listAddNodeTail(cluster_manager.errors, err);
+ clusterManagerLogErr("%s\n", (char *) err);
+}
+
+/* Check the slots coverage of the cluster. The 'all_slots' argument must be
+ * and array of 16384 bytes. Every covered slot will be set to 1 in the
+ * 'all_slots' array. The function returns the total number if covered slots.*/
+static int clusterManagerGetCoveredSlots(char *all_slots) {
+ if (cluster_manager.nodes == NULL) return 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ int totslots = 0, i;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ if (node->slots[i] && !all_slots[i]) {
+ all_slots[i] = 1;
+ totslots++;
+ }
+ }
+ }
+ return totslots;
+}
+
+static void clusterManagerPrintSlotsList(list *slots) {
+ listIter li;
+ listNode *ln;
+ listRewind(slots, &li);
+ sds first = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ if (!first) first = slot;
+ else printf(", ");
+ printf("%s", slot);
+ }
+ printf("\n");
+}
+
+/* Return the node, among 'nodes' with the greatest number of keys
+ * in the specified slot. */
+static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes,
+ int slot,
+ char **err)
+{
+ clusterManagerNode *node = NULL;
+ int numkeys = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(nodes, &li);
+ if (err) *err = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ redisReply *r =
+ CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot);
+ int success = clusterManagerCheckRedisReply(n, r, err);
+ if (success) {
+ if (r->integer > numkeys || node == NULL) {
+ numkeys = r->integer;
+ node = n;
+ }
+ }
+ if (r != NULL) freeReplyObject(r);
+ /* If the reply contains errors */
+ if (!success) {
+ if (err != NULL && *err != NULL)
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err);
+ node = NULL;
+ break;
+ }
+ }
+ return node;
+}
+
+/* This function returns the master that has the least number of replicas
+ * in the cluster. If there are multiple masters with the same smaller
+ * number of replicas, one at random is returned. */
+
+static clusterManagerNode *clusterManagerNodeWithLeastReplicas() {
+ clusterManagerNode *node = NULL;
+ int lowest_count = 0;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (node == NULL || n->replicas_count < lowest_count) {
+ node = n;
+ lowest_count = n->replicas_count;
+ }
+ }
+ return node;
+}
+
+static int clusterManagerFixSlotsCoverage(char *all_slots) {
+ int i, fixed = 0;
+ list *none = NULL, *single = NULL, *multi = NULL;
+ clusterManagerLogInfo(">>> Fixing slots coverage...\n");
+ printf("List of not covered slots: \n");
+ int uncovered_count = 0;
+ sds log = sdsempty();
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ int covered = all_slots[i];
+ if (!covered) {
+ sds key = sdsfromlonglong((long long) i);
+ if (uncovered_count++ > 0) printf(",");
+ printf("%s", (char *) key);
+ list *slot_nodes = listCreate();
+ sds slot_nodes_str = sdsempty();
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER GETKEYSINSLOT %d %d", i, 1);
+ if (!clusterManagerCheckRedisReply(n, reply, NULL)) {
+ fixed = -1;
+ if (reply) freeReplyObject(reply);
+ goto cleanup;
+ }
+ assert(reply->type == REDIS_REPLY_ARRAY);
+ if (reply->elements > 0) {
+ listAddNodeTail(slot_nodes, n);
+ if (listLength(slot_nodes) > 1)
+ slot_nodes_str = sdscat(slot_nodes_str, ", ");
+ slot_nodes_str = sdscatfmt(slot_nodes_str,
+ "%s:%u", n->ip, n->port);
+ }
+ freeReplyObject(reply);
+ }
+ log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S",
+ key, listLength(slot_nodes), slot_nodes_str);
+ sdsfree(slot_nodes_str);
+ dictAdd(clusterManagerUncoveredSlots, key, slot_nodes);
+ }
+ }
+ printf("\n%s\n", log);
+ /* For every slot, take action depending on the actual condition:
+ * 1) No node has keys for this slot.
+ * 2) A single node has keys for this slot.
+ * 3) Multiple nodes have keys for this slot. */
+ none = listCreate();
+ single = listCreate();
+ multi = listCreate();
+ dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots);
+ dictEntry *entry;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ list *nodes = (list *) dictGetVal(entry);
+ switch (listLength(nodes)){
+ case 0: listAddNodeTail(none, slot); break;
+ case 1: listAddNodeTail(single, slot); break;
+ default: listAddNodeTail(multi, slot); break;
+ }
+ }
+ dictReleaseIterator(iter);
+
+ /* Handle case "1": keys in no node. */
+ if (listLength(none) > 0) {
+ printf("The following uncovered slots have no keys "
+ "across the cluster:\n");
+ clusterManagerPrintSlotsList(none);
+ if (confirmWithYes("Fix these slots by covering with a random node?")){
+ srand(time(NULL));
+ listIter li;
+ listNode *ln;
+ listRewind(none, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ long idx = (long) (rand() % listLength(cluster_manager.nodes));
+ listNode *node_n = listIndex(cluster_manager.nodes, idx);
+ assert(node_n != NULL);
+ clusterManagerNode *n = node_n->value;
+ clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n",
+ slot, n->ip, n->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER ADDSLOTS %s", slot);
+ if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1;
+ if (r) freeReplyObject(r);
+ if (fixed < 0) goto cleanup;
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ n->slots[atoi(slot)] = 1;
+ fixed++;
+ }
+ }
+ }
+
+ /* Handle case "2": keys only in one node. */
+ if (listLength(single) > 0) {
+ printf("The following uncovered slots have keys in just one node:\n");
+ clusterManagerPrintSlotsList(single);
+ if (confirmWithYes("Fix these slots by covering with those nodes?")){
+ listIter li;
+ listNode *ln;
+ listRewind(single, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot);
+ assert(entry != NULL);
+ list *nodes = (list *) dictGetVal(entry);
+ listNode *fn = listFirst(nodes);
+ assert(fn != NULL);
+ clusterManagerNode *n = fn->value;
+ clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n",
+ slot, n->ip, n->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER ADDSLOTS %s", slot);
+ if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1;
+ if (r) freeReplyObject(r);
+ if (fixed < 0) goto cleanup;
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ n->slots[atoi(slot)] = 1;
+ fixed++;
+ }
+ }
+ }
+
+ /* Handle case "3": keys in multiple nodes. */
+ if (listLength(multi) > 0) {
+ printf("The following uncovered slots have keys in multiple nodes:\n");
+ clusterManagerPrintSlotsList(multi);
+ if (confirmWithYes("Fix these slots by moving keys "
+ "into a single node?")) {
+ listIter li;
+ listNode *ln;
+ listRewind(multi, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ sds slot = ln->value;
+ dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot);
+ assert(entry != NULL);
+ list *nodes = (list *) dictGetVal(entry);
+ int s = atoi(slot);
+ clusterManagerNode *target =
+ clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL);
+ if (target == NULL) {
+ fixed = -1;
+ goto cleanup;
+ }
+ clusterManagerLogInfo(">>> Covering slot %s moving keys "
+ "to %s:%d\n", slot,
+ target->ip, target->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(target,
+ "CLUSTER ADDSLOTS %s", slot);
+ if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1;
+ if (r) freeReplyObject(r);
+ if (fixed < 0) goto cleanup;
+ r = CLUSTER_MANAGER_COMMAND(target,
+ "CLUSTER SETSLOT %s %s", slot, "STABLE");
+ if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1;
+ if (r) freeReplyObject(r);
+ if (fixed < 0) goto cleanup;
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ target->slots[atoi(slot)] = 1;
+ listIter nli;
+ listNode *nln;
+ listRewind(nodes, &nli);
+ while ((nln = listNext(&nli)) != NULL) {
+ clusterManagerNode *src = nln->value;
+ if (src == target) continue;
+ /* Set the source node in 'importing' state
+ * (even if we will actually migrate keys away)
+ * in order to avoid receiving redirections
+ * for MIGRATE. */
+ redisReply *r = CLUSTER_MANAGER_COMMAND(src,
+ "CLUSTER SETSLOT %s %s %s", slot,
+ "IMPORTING", target->name);
+ if (!clusterManagerCheckRedisReply(target, r, NULL))
+ fixed = -1;
+ if (r) freeReplyObject(r);
+ if (fixed < 0) goto cleanup;
+ int opts = CLUSTER_MANAGER_OPT_VERBOSE |
+ CLUSTER_MANAGER_OPT_COLD;
+ if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) {
+ fixed = -1;
+ goto cleanup;
+ }
+ }
+ fixed++;
+ }
+ }
+ }
+cleanup:
+ sdsfree(log);
+ if (none) listRelease(none);
+ if (single) listRelease(single);
+ if (multi) listRelease(multi);
+ return fixed;
+}
+
+/* Slot 'slot' was found to be in importing or migrating state in one or
+ * more nodes. This function fixes this condition by migrating keys where
+ * it seems more sensible. */
+static int clusterManagerFixOpenSlot(int slot) {
+ clusterManagerLogInfo(">>> Fixing open slot %d\n", slot);
+ /* Try to obtain the current slot owner, according to the current
+ * nodes configuration. */
+ int success = 1;
+ list *owners = listCreate();
+ list *migrating = listCreate();
+ list *importing = listCreate();
+ sds migrating_str = sdsempty();
+ sds importing_str = sdsempty();
+ clusterManagerNode *owner = NULL;
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->slots[slot]) {
+ if (owner == NULL) owner = n;
+ listAddNodeTail(owners, n);
+ }
+ }
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->migrating) {
+ for (int i = 0; i < n->migrating_count; i += 2) {
+ sds migrating_slot = n->migrating[i];
+ if (atoi(migrating_slot) == slot) {
+ char *sep = (listLength(migrating) == 0 ? "" : ",");
+ migrating_str = sdscatfmt(migrating_str, "%s%S:%u",
+ sep, n->ip, n->port);
+ listAddNodeTail(migrating, n);
+ break;
+ }
+ }
+ }
+ if (n->importing) {
+ for (int i = 0; i < n->importing_count; i += 2) {
+ sds importing_slot = n->importing[i];
+ if (atoi(importing_slot) == slot) {
+ char *sep = (listLength(importing) == 0 ? "" : ",");
+ importing_str = sdscatfmt(importing_str, "%s%S:%u",
+ sep, n->ip, n->port);
+ listAddNodeTail(importing, n);
+ break;
+ }
+ }
+ }
+ }
+ printf("Set as migrating in: %s\n", migrating_str);
+ printf("Set as importing in: %s\n", importing_str);
+ /* If there is no slot owner, set as owner the slot with the biggest
+ * number of keys, among the set of migrating / importing nodes. */
+ if (owner == NULL) {
+ clusterManagerLogInfo(">>> Nobody claims ownership, "
+ "selecting an owner...\n");
+ owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes,
+ slot, NULL);
+ // If we still don't have an owner, we can't fix it.
+ if (owner == NULL) {
+ clusterManagerLogErr("[ERR] Can't select a slot owner. "
+ "Impossible to fix.\n");
+ success = 0;
+ goto cleanup;
+ }
+
+ // Use ADDSLOTS to assign the slot.
+ clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n",
+ owner->ip, owner->port);
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER "
+ "SETSLOT %d %s",
+ slot, "STABLE");
+ success = clusterManagerCheckRedisReply(owner, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ if (!success) goto cleanup;
+ reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot);
+ success = clusterManagerCheckRedisReply(owner, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ if (!success) goto cleanup;
+ /* Since CLUSTER ADDSLOTS succeeded, we also update the slot
+ * info into the node struct, in order to keep it synced */
+ owner->slots[slot] = 1;
+ /* Make sure this information will propagate. Not strictly needed
+ * since there is no past owner, so all the other nodes will accept
+ * whatever epoch this node will claim the slot with. */
+ reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH");
+ success = clusterManagerCheckRedisReply(owner, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ if (!success) goto cleanup;
+ /* Remove the owner from the list of migrating/importing
+ * nodes. */
+ clusterManagerRemoveNodeFromList(migrating, owner);
+ clusterManagerRemoveNodeFromList(importing, owner);
+ }
+ /* If there are multiple owners of the slot, we need to fix it
+ * so that a single node is the owner and all the other nodes
+ * are in importing state. Later the fix can be handled by one
+ * of the base cases above.
+ *
+ * Note that this case also covers multiple nodes having the slot
+ * in migrating state, since migrating is a valid state only for
+ * slot owners. */
+ if (listLength(owners) > 1) {
+ owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL);
+ listRewind(owners, &li);
+ redisReply *reply = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot);
+ success = clusterManagerCheckRedisReply(n, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ if (!success) goto cleanup;
+ success = clusterManagerSetSlot(n, owner, slot, "importing", NULL);
+ if (!success) goto cleanup;
+ clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates
+ listAddNodeTail(importing, n);
+ }
+ reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH");
+ success = clusterManagerCheckRedisReply(owner, reply, NULL);
+ if (reply) freeReplyObject(reply);
+ if (!success) goto cleanup;
+ }
+ int move_opts = CLUSTER_MANAGER_OPT_VERBOSE;
+ /* Case 1: The slot is in migrating state in one slot, and in
+ * importing state in 1 slot. That's trivial to address. */
+ if (listLength(migrating) == 1 && listLength(importing) == 1) {
+ clusterManagerNode *src = listFirst(migrating)->value;
+ clusterManagerNode *dst = listFirst(importing)->value;
+ success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL);
+ }
+ /* Case 2: There are multiple nodes that claim the slot as importing,
+ * they probably got keys about the slot after a restart so opened
+ * the slot. In this case we just move all the keys to the owner
+ * according to the configuration. */
+ else if (listLength(migrating) == 0 && listLength(importing) > 0) {
+ clusterManagerLogInfo(">>> Moving all the %d slot keys to its "
+ "owner %s:%d\n", slot, owner->ip, owner->port);
+ move_opts |= CLUSTER_MANAGER_OPT_COLD;
+ listRewind(importing, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == owner) continue;
+ success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL);
+ if (!success) goto cleanup;
+ clusterManagerLogInfo(">>> Setting %d as STABLE in "
+ "%s:%d\n", slot, n->ip, n->port);
+
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s",
+ slot, "STABLE");
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) goto cleanup;
+ }
+ } else {
+ int try_to_close_slot = (listLength(importing) == 0 &&
+ listLength(migrating) == 1);
+ if (try_to_close_slot) {
+ clusterManagerNode *n = listFirst(migrating)->value;
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n,
+ "CLUSTER GETKEYSINSLOT %d %d", slot, 10);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) {
+ if (success) try_to_close_slot = (r->elements == 0);
+ freeReplyObject(r);
+ }
+ if (!success) goto cleanup;
+ }
+ /* Case 3: There are no slots claiming to be in importing state, but
+ * there is a migrating node that actually don't have any key. We
+ * can just close the slot, probably a reshard interrupted in the middle. */
+ if (try_to_close_slot) {
+ clusterManagerNode *n = listFirst(migrating)->value;
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s",
+ slot, "STABLE");
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) goto cleanup;
+ } else {
+ success = 0;
+ clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot "
+ "yet (work in progress). Slot is set as "
+ "migrating in %s, as importing in %s, "
+ "owner is %s:%d\n", migrating_str,
+ importing_str, owner->ip, owner->port);
+ }
+ }
+cleanup:
+ listRelease(owners);
+ listRelease(migrating);
+ listRelease(importing);
+ sdsfree(migrating_str);
+ sdsfree(importing_str);
+ return success;
+}
+
+static int clusterManagerCheckCluster(int quiet) {
+ listNode *ln = listFirst(cluster_manager.nodes);
+ if (!ln) return 0;
+ int result = 1;
+ int do_fix = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_FIX;
+ clusterManagerNode *node = ln->value;
+ clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n",
+ node->ip, node->port);
+ if (!quiet) clusterManagerShowNodes();
+ if (!clusterManagerIsConfigConsistent()) {
+ sds err = sdsnew("[ERR] Nodes don't agree about configuration!");
+ clusterManagerOnError(err);
+ result = 0;
+ } else {
+ clusterManagerLogOk("[OK] All nodes agree about slots "
+ "configuration.\n");
+ }
+ // Check open slots
+ clusterManagerLogInfo(">>> Check for open slots...\n");
+ listIter li;
+ listRewind(cluster_manager.nodes, &li);
+ int i;
+ dict *open_slots = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->migrating != NULL) {
+ if (open_slots == NULL)
+ open_slots = dictCreate(&clusterManagerDictType, NULL);
+ sds errstr = sdsempty();
+ errstr = sdscatprintf(errstr,
+ "[WARNING] Node %s:%d has slots in "
+ "migrating state ",
+ n->ip,
+ n->port);
+ for (i = 0; i < n->migrating_count; i += 2) {
+ sds slot = n->migrating[i];
+ dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1]));
+ char *fmt = (i > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ errstr = sdscat(errstr, ".");
+ clusterManagerOnError(errstr);
+ }
+ if (n->importing != NULL) {
+ if (open_slots == NULL)
+ open_slots = dictCreate(&clusterManagerDictType, NULL);
+ sds errstr = sdsempty();
+ errstr = sdscatprintf(errstr,
+ "[WARNING] Node %s:%d has slots in "
+ "importing state ",
+ n->ip,
+ n->port);
+ for (i = 0; i < n->importing_count; i += 2) {
+ sds slot = n->importing[i];
+ dictAdd(open_slots, slot, sdsdup(n->importing[i + 1]));
+ char *fmt = (i > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ errstr = sdscat(errstr, ".");
+ clusterManagerOnError(errstr);
+ }
+ }
+ if (open_slots != NULL) {
+ result = 0;
+ dictIterator *iter = dictGetIterator(open_slots);
+ dictEntry *entry;
+ sds errstr = sdsnew("[WARNING] The following slots are open: ");
+ i = 0;
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ char *fmt = (i++ > 0 ? ",%S" : "%S");
+ errstr = sdscatfmt(errstr, fmt, slot);
+ }
+ clusterManagerLogErr("%s.\n", (char *) errstr);
+ sdsfree(errstr);
+ if (do_fix) {
+ // Fix open slots.
+ dictReleaseIterator(iter);
+ iter = dictGetIterator(open_slots);
+ while ((entry = dictNext(iter)) != NULL) {
+ sds slot = (sds) dictGetKey(entry);
+ result = clusterManagerFixOpenSlot(atoi(slot));
+ if (!result) break;
+ }
+ }
+ dictReleaseIterator(iter);
+ dictRelease(open_slots);
+ }
+ clusterManagerLogInfo(">>> Check slots coverage...\n");
+ char slots[CLUSTER_MANAGER_SLOTS];
+ memset(slots, 0, CLUSTER_MANAGER_SLOTS);
+ int coverage = clusterManagerGetCoveredSlots(slots);
+ if (coverage == CLUSTER_MANAGER_SLOTS) {
+ clusterManagerLogOk("[OK] All %d slots covered.\n",
+ CLUSTER_MANAGER_SLOTS);
+ } else {
+ sds err = sdsempty();
+ err = sdscatprintf(err, "[ERR] Not all %d slots are "
+ "covered by nodes.\n",
+ CLUSTER_MANAGER_SLOTS);
+ clusterManagerOnError(err);
+ result = 0;
+ if (do_fix/* && result*/) {
+ dictType dtype = clusterManagerDictType;
+ dtype.valDestructor = dictListDestructor;
+ clusterManagerUncoveredSlots = dictCreate(&dtype, NULL);
+ int fixed = clusterManagerFixSlotsCoverage(slots);
+ if (fixed > 0) result = 1;
+ }
+ }
+ return result;
+}
+
+static clusterManagerNode *clusterNodeForResharding(char *id,
+ clusterManagerNode *target,
+ int *raise_err)
+{
+ clusterManagerNode *node = NULL;
+ const char *invalid_node_msg = "*** The specified node (%s) is not known "
+ "or not a master, please retry.\n";
+ node = clusterManagerNodeByName(id);
+ *raise_err = 0;
+ if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) {
+ clusterManagerLogErr(invalid_node_msg, id);
+ *raise_err = 1;
+ return NULL;
+ } else if (node != NULL && target != NULL) {
+ if (!strcmp(node->name, target->name)) {
+ clusterManagerLogErr( "*** It is not possible to use "
+ "the target node as "
+ "source node.\n");
+ return NULL;
+ }
+ }
+ return node;
+}
+
+static list *clusterManagerComputeReshardTable(list *sources, int numslots) {
+ list *moved = listCreate();
+ int src_count = listLength(sources), i = 0, tot_slots = 0, j;
+ clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted));
+ listIter li;
+ listNode *ln;
+ listRewind(sources, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ tot_slots += node->slots_count;
+ sorted[i++] = node;
+ }
+ qsort(sorted, src_count, sizeof(clusterManagerNode *),
+ clusterManagerSlotCountCompareDesc);
+ for (i = 0; i < src_count; i++) {
+ clusterManagerNode *node = sorted[i];
+ float n = ((float) numslots / tot_slots * node->slots_count);
+ if (i == 0) n = ceil(n);
+ else n = floor(n);
+ int max = (int) n, count = 0;
+ for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) {
+ int slot = node->slots[j];
+ if (!slot) continue;
+ if (count >= max || (int)listLength(moved) >= numslots) break;
+ clusterManagerReshardTableItem *item = zmalloc(sizeof(*item));
+ item->source = node;
+ item->slot = j;
+ listAddNodeTail(moved, item);
+ count++;
+ }
+ }
+ zfree(sorted);
+ return moved;
+}
+
+static void clusterManagerShowReshardTable(list *table) {
+ listIter li;
+ listNode *ln;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ clusterManagerNode *n = item->source;
+ printf(" Moving slot %d from %s\n", item->slot, (char *) n->name);
+ }
+}
+
+static void clusterManagerReleaseReshardTable(list *table) {
+ if (table != NULL) {
+ listIter li;
+ listNode *ln;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ zfree(item);
+ }
+ listRelease(table);
+ }
+}
+
+static void clusterManagerLog(int level, const char* fmt, ...) {
+ int use_colors =
+ (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR);
+ if (use_colors) {
+ printf("\033[");
+ switch (level) {
+ case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break;
+ case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break;
+ case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break;
+ case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break;
+ default: printf(LOG_COLOR_RESET); break;
+ }
+ }
+ va_list ap;
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ if (use_colors) printf("\033[" LOG_COLOR_RESET);
+}
+
+static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array,
+ int alloc_len)
+{
+ array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));
+ array->alloc = array->nodes;
+ array->len = alloc_len;
+ array->count = 0;
+}
+
+/* Reset array->nodes to the original array allocation and re-count non-NULL
+ * nodes. */
+static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) {
+ if (array->nodes > array->alloc) {
+ array->len = array->nodes - array->alloc;
+ array->nodes = array->alloc;
+ array->count = 0;
+ int i = 0;
+ for(; i < array->len; i++) {
+ if (array->nodes[i] != NULL) array->count++;
+ }
+ }
+}
+
+/* Shift array->nodes and store the shifted node into 'nodeptr'. */
+static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array,
+ clusterManagerNode **nodeptr)
+{
+ assert(array->nodes < (array->nodes + array->len));
+ /* If the first node to be shifted is not NULL, decrement count. */
+ if (*array->nodes != NULL) array->count--;
+ /* Store the first node to be shifted into 'nodeptr'. */
+ *nodeptr = *array->nodes;
+ /* Shift the nodes array and decrement length. */
+ array->nodes++;
+ array->len--;
+}
+
+static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array,
+ clusterManagerNode *node)
+{
+ assert(array->nodes < (array->nodes + array->len));
+ assert(node != NULL);
+ assert(array->count < array->len);
+ array->nodes[array->count++] = node;
+}
+
+static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node,
+ char *err)
+{
+ char *msg;
+ if (err) msg = err;
+ else {
+ msg = "is not empty. Either the node already knows other "
+ "nodes (check with CLUSTER NODES) or contains some "
+ "key in database 0.";
+ }
+ clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg);
+}
+
+static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node,
+ char *err)
+{
+ char *msg = (err ? err : "is not configured as a cluster node.");
+ clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg);
+}
+
+/* Execute redis-cli in Cluster Manager mode */
+static void clusterManagerMode(clusterManagerCommandProc *proc) {
+ int argc = config.cluster_manager_command.argc;
+ char **argv = config.cluster_manager_command.argv;
+ cluster_manager.nodes = NULL;
+ if (!proc(argc, argv)) goto cluster_manager_err;
+ freeClusterManager();
+ exit(0);
+cluster_manager_err:
+ freeClusterManager();
+ sdsfree(config.hostip);
+ sdsfree(config.mb_delim);
+ exit(1);
+}
+
+/* Cluster Manager Commands */
+
+static int clusterManagerCommandCreate(int argc, char **argv) {
+ int i, j, success = 1;
+ cluster_manager.nodes = listCreate();
+ for (i = 0; i < argc; i++) {
+ char *addr = argv[i];
+ char *c = strrchr(addr, '@');
+ if (c != NULL) *c = '\0';
+ c = strrchr(addr, ':');
+ if (c == NULL) {
+ fprintf(stderr, "Invalid address format: %s\n", addr);
+ return 0;
+ }
+ *c = '\0';
+ char *ip = addr;
+ int port = atoi(++c);
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerNodeConnect(node)) {
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ char *err = NULL;
+ if (!clusterManagerNodeIsCluster(node, &err)) {
+ clusterManagerPrintNotClusterNodeError(node, err);
+ if (err) zfree(err);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ err = NULL;
+ if (!clusterManagerNodeLoadInfo(node, 0, &err)) {
+ if (err) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ err = NULL;
+ if (!clusterManagerNodeIsEmpty(node, &err)) {
+ clusterManagerPrintNotEmptyNodeError(node, err);
+ if (err) zfree(err);
+ freeClusterManagerNode(node);
+ return 0;
+ }
+ listAddNodeTail(cluster_manager.nodes, node);
+ }
+ int node_len = cluster_manager.nodes->len;
+ int replicas = config.cluster_manager_command.replicas;
+ int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas);
+ if (masters_count < 3) {
+ clusterManagerLogErr(
+ "*** ERROR: Invalid configuration for cluster creation.\n"
+ "*** Redis Cluster requires at least 3 master nodes.\n"
+ "*** This is not possible with %d nodes and %d replicas per node.",
+ node_len, replicas);
+ clusterManagerLogErr("\n*** At least %d nodes are required.\n",
+ 3 * (replicas + 1));
+ return 0;
+ }
+ clusterManagerLogInfo(">>> Performing hash slots allocation "
+ "on %d nodes...\n", node_len);
+ int interleaved_len = 0, ip_count = 0;
+ clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved));
+ char **ips = zcalloc(node_len * sizeof(char*));
+ clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes));
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ int found = 0;
+ for (i = 0; i < ip_count; i++) {
+ char *ip = ips[i];
+ if (!strcmp(ip, n->ip)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ ips[ip_count++] = n->ip;
+ }
+ clusterManagerNodeArray *node_array = &(ip_nodes[i]);
+ if (node_array->nodes == NULL)
+ clusterManagerNodeArrayInit(node_array, node_len);
+ clusterManagerNodeArrayAdd(node_array, n);
+ }
+ while (interleaved_len < node_len) {
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = &(ip_nodes[i]);
+ if (node_array->count > 0) {
+ clusterManagerNode *n = NULL;
+ clusterManagerNodeArrayShift(node_array, &n);
+ interleaved[interleaved_len++] = n;
+ }
+ }
+ }
+ clusterManagerNode **masters = interleaved;
+ interleaved += masters_count;
+ interleaved_len -= masters_count;
+ float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count;
+ long first = 0;
+ float cursor = 0.0f;
+ for (i = 0; i < masters_count; i++) {
+ clusterManagerNode *master = masters[i];
+ long last = lround(cursor + slots_per_node - 1);
+ if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1))
+ last = CLUSTER_MANAGER_SLOTS - 1;
+ if (last < first) last = first;
+ printf("Master[%d] -> Slots %lu - %lu\n", i, first, last);
+ master->slots_count = 0;
+ for (j = first; j <= last; j++) {
+ master->slots[j] = 1;
+ master->slots_count++;
+ }
+ master->dirty = 1;
+ first = last + 1;
+ cursor += slots_per_node;
+ }
+
+ int assign_unused = 0, available_count = interleaved_len;
+assign_replicas:
+ for (i = 0; i < masters_count; i++) {
+ clusterManagerNode *master = masters[i];
+ int assigned_replicas = 0;
+ while (assigned_replicas < replicas) {
+ if (available_count == 0) break;
+ clusterManagerNode *found = NULL, *slave = NULL;
+ int firstNodeIdx = -1;
+ for (j = 0; j < interleaved_len; j++) {
+ clusterManagerNode *n = interleaved[j];
+ if (n == NULL) continue;
+ if (strcmp(n->ip, master->ip)) {
+ found = n;
+ interleaved[j] = NULL;
+ break;
+ }
+ if (firstNodeIdx < 0) firstNodeIdx = j;
+ }
+ if (found) slave = found;
+ else if (firstNodeIdx >= 0) {
+ slave = interleaved[firstNodeIdx];
+ interleaved_len -= (interleaved - (interleaved + firstNodeIdx));
+ interleaved += (firstNodeIdx + 1);
+ }
+ if (slave != NULL) {
+ assigned_replicas++;
+ available_count--;
+ if (slave->replicate) sdsfree(slave->replicate);
+ slave->replicate = sdsnew(master->name);
+ slave->dirty = 1;
+ } else break;
+ printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port,
+ master->ip, master->port);
+ if (assign_unused) break;
+ }
+ }
+ if (!assign_unused && available_count > 0) {
+ assign_unused = 1;
+ printf("Adding extra replicas...\n");
+ goto assign_replicas;
+ }
+ for (i = 0; i < ip_count; i++) {
+ clusterManagerNodeArray *node_array = ip_nodes + i;
+ clusterManagerNodeArrayReset(node_array);
+ }
+ clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count);
+ clusterManagerShowNodes();
+ if (confirmWithYes("Can I set the above configuration?")) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ char *err = NULL;
+ int flushed = clusterManagerFlushNodeConfig(node, &err);
+ if (!flushed && node->dirty && !node->replicate) {
+ if (err != NULL) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ } else if (err != NULL) zfree(err);
+ }
+ clusterManagerLogInfo(">>> Nodes configuration updated\n");
+ clusterManagerLogInfo(">>> Assign a different config epoch to "
+ "each node\n");
+ int config_epoch = 1;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ redisReply *reply = NULL;
+ reply = CLUSTER_MANAGER_COMMAND(node,
+ "cluster set-config-epoch %d",
+ config_epoch++);
+ if (reply != NULL) freeReplyObject(reply);
+ }
+ clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join "
+ "the cluster\n");
+ clusterManagerNode *first = NULL;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (first == NULL) {
+ first = node;
+ continue;
+ }
+ redisReply *reply = NULL;
+ reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d",
+ first->ip, first->port);
+ int is_err = 0;
+ if (reply != NULL) {
+ if ((is_err = reply->type == REDIS_REPLY_ERROR))
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str);
+ freeReplyObject(reply);
+ } else {
+ is_err = 1;
+ fprintf(stderr, "Failed to send CLUSTER MEET command.\n");
+ }
+ if (is_err) {
+ success = 0;
+ goto cleanup;
+ }
+ }
+ /* Give one second for the join to start, in order to avoid that
+ * waiting for cluster join will find all the nodes agree about
+ * the config as they are still empty with unassigned slots. */
+ sleep(1);
+ clusterManagerWaitForClusterJoin();
+ /* Useful for the replicas */
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!node->dirty) continue;
+ char *err = NULL;
+ int flushed = clusterManagerFlushNodeConfig(node, &err);
+ if (!flushed && !node->replicate) {
+ if (err != NULL) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ }
+ }
+ // Reset Nodes
+ listRewind(cluster_manager.nodes, &li);
+ clusterManagerNode *first_node = NULL;
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *node = ln->value;
+ if (!first_node) first_node = node;
+ else freeClusterManagerNode(node);
+ }
+ listEmpty(cluster_manager.nodes);
+ if (!clusterManagerLoadInfoFromNode(first_node, 0)) {
+ success = 0;
+ goto cleanup;
+ }
+ clusterManagerCheckCluster(0);
+ }
+cleanup:
+ /* Free everything */
+ zfree(masters);
+ zfree(ips);
+ for (i = 0; i < node_len; i++) {
+ clusterManagerNodeArray *node_array = ip_nodes + i;
+ CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array);
+ }
+ zfree(ip_nodes);
+ return success;
+}
+
+static int clusterManagerCommandAddNode(int argc, char **argv) {
+ int success = 1;
+ redisReply *reply = NULL;
+ char *ref_ip = NULL, *ip = NULL;
+ int ref_port = 0, port = 0;
+ if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port))
+ goto invalid_args;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port))
+ goto invalid_args;
+ clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port,
+ ref_ip, ref_port);
+ // Check the existing cluster
+ clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port);
+ if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0;
+ if (!clusterManagerCheckCluster(0)) return 0;
+
+ /* If --cluster-master-id was specified, try to resolve it now so that we
+ * abort before starting with the node configuration. */
+ clusterManagerNode *master_node = NULL;
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) {
+ char *master_id = config.cluster_manager_command.master_id;
+ if (master_id != NULL) {
+ master_node = clusterManagerNodeByName(master_id);
+ if (master_node == NULL) {
+ clusterManagerLogErr("[ERR] No such master ID %s\n", master_id);
+ return 0;
+ }
+ } else {
+ master_node = clusterManagerNodeWithLeastReplicas();
+ assert(master_node != NULL);
+ printf("Automatically selected master %s:%d\n", master_node->ip,
+ master_node->port);
+ }
+ }
+
+ // Add the new node
+ clusterManagerNode *new_node = clusterManagerNewNode(ip, port);
+ int added = 0;
+ if (!clusterManagerNodeConnect(new_node)) {
+ clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n",
+ ip, port);
+ success = 0;
+ goto cleanup;
+ }
+ char *err = NULL;
+ if (!(success = clusterManagerNodeIsCluster(new_node, &err))) {
+ clusterManagerPrintNotClusterNodeError(new_node, err);
+ if (err) zfree(err);
+ goto cleanup;
+ }
+ if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) {
+ if (err) {
+ CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err);
+ zfree(err);
+ }
+ success = 0;
+ goto cleanup;
+ }
+ if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) {
+ clusterManagerPrintNotEmptyNodeError(new_node, err);
+ if (err) zfree(err);
+ goto cleanup;
+ }
+ clusterManagerNode *first = listFirst(cluster_manager.nodes)->value;
+ listAddNodeTail(cluster_manager.nodes, new_node);
+ added = 1;
+
+ // Send CLUSTER MEET command to the new node
+ clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it "
+ "join the cluster.\n", ip, port);
+ reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d",
+ first->ip, first->port);
+ if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL)))
+ goto cleanup;
+
+ /* Additional configuration is needed if the node is added as a slave. */
+ if (master_node) {
+ sleep(1);
+ clusterManagerWaitForClusterJoin();
+ clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n",
+ master_node->ip, master_node->port);
+ freeReplyObject(reply);
+ reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s",
+ master_node->name);
+ if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL)))
+ goto cleanup;
+ }
+ clusterManagerLogOk("[OK] New node added correctly.\n");
+cleanup:
+ if (!added && new_node) freeClusterManagerNode(new_node);
+ if (reply) freeReplyObject(reply);
+ return success;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandDeleteNode(int argc, char **argv) {
+ UNUSED(argc);
+ int success = 1;
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ char *node_id = argv[1];
+ clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n",
+ node_id, ip, port);
+ clusterManagerNode *ref_node = clusterManagerNewNode(ip, port);
+ clusterManagerNode *node = NULL;
+
+ // Load cluster information
+ if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0;
+
+ // Check if the node exists and is not empty
+ node = clusterManagerNodeByName(node_id);
+ if (node == NULL) {
+ clusterManagerLogErr("[ERR] No such node ID %s\n", node_id);
+ return 0;
+ }
+ if (node->slots_count != 0) {
+ clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data "
+ "away and try again.\n", node->ip, node->port);
+ return 0;
+ }
+
+ // Send CLUSTER FORGET to all the nodes but the node to remove
+ clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the "
+ "cluster...\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n == node) continue;
+ if (n->replicate && !strcasecmp(n->replicate, node_id)) {
+ // Reconfigure the slave to replicate with some other node
+ clusterManagerNode *master = clusterManagerNodeWithLeastReplicas();
+ assert(master != NULL);
+ clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n",
+ n->ip, n->port, master->ip, master->port);
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s",
+ master->name);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) return 0;
+ }
+ redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s",
+ node_id);
+ success = clusterManagerCheckRedisReply(n, r, NULL);
+ if (r) freeReplyObject(r);
+ if (!success) return 0;
+ }
+
+ // Finally shutdown the node
+ clusterManagerLogInfo(">>> SHUTDOWN the node.\n");
+ redisReply *r = redisCommand(node->context, "SHUTDOWN");
+ success = clusterManagerCheckRedisReply(node, r, NULL);
+ if (r) freeReplyObject(r);
+ return success;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandInfo(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(node, 0)) return 0;
+ clusterManagerShowClusterInfo();
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandCheck(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(node, 0)) return 0;
+ clusterManagerShowClusterInfo();
+ return clusterManagerCheckCluster(0);
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandFix(int argc, char **argv) {
+ config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX;
+ return clusterManagerCommandCheck(argc, argv);
+}
+
+static int clusterManagerCommandReshard(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(node, 0)) return 0;
+ clusterManagerCheckCluster(0);
+ if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) {
+ fflush(stdout);
fprintf(stderr,
- "Can't open file '%s': %s\n", config.eval, strerror(errno));
- exit(1);
+ "*** Please fix your cluster problems before resharding\n");
+ return 0;
}
- while((nread = fread(buf,1,sizeof(buf),fp)) != 0) {
- script = sdscatlen(script,buf,nread);
+ int slots = config.cluster_manager_command.slots;
+ if (!slots) {
+ while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) {
+ printf("How many slots do you want to move (from 1 to %d)? ",
+ CLUSTER_MANAGER_SLOTS);
+ fflush(stdout);
+ char buf[6];
+ int nread = read(fileno(stdin),buf,6);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ slots = atoi(buf);
+ }
}
- fclose(fp);
+ char buf[255];
+ char *to = config.cluster_manager_command.to,
+ *from = config.cluster_manager_command.from;
+ while (to == NULL) {
+ printf("What is the receiving node ID? ");
+ fflush(stdout);
+ int nread = read(fileno(stdin),buf,255);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ if (strlen(buf) > 0) to = buf;
+ }
+ int raise_err = 0;
+ clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err);
+ if (target == NULL) return 0;
+ list *sources = listCreate();
+ list *table = NULL;
+ int all = 0, result = 1;
+ if (from == NULL) {
+ printf("Please enter all the source node IDs.\n");
+ printf(" Type 'all' to use all the nodes as source nodes for "
+ "the hash slots.\n");
+ printf(" Type 'done' once you entered all the source nodes IDs.\n");
+ while (1) {
+ printf("Source node #%lu: ", listLength(sources) + 1);
+ fflush(stdout);
+ int nread = read(fileno(stdin),buf,255);
+ if (nread <= 0) continue;
+ int last_idx = nread - 1;
+ if (buf[last_idx] != '\n') {
+ int ch;
+ while ((ch = getchar()) != '\n' && ch != EOF) {}
+ }
+ buf[last_idx] = '\0';
+ if (!strcmp(buf, "done")) break;
+ else if (!strcmp(buf, "all")) {
+ all = 1;
+ break;
+ } else {
+ clusterManagerNode *src =
+ clusterNodeForResharding(buf, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ }
+ } else {
+ char *p;
+ while((p = strchr(from, ',')) != NULL) {
+ *p = '\0';
+ if (!strcmp(from, "all")) {
+ all = 1;
+ break;
+ } else {
+ clusterManagerNode *src =
+ clusterNodeForResharding(from, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ from = p + 1;
+ }
+ /* Check if there's still another source to process. */
+ if (!all && strlen(from) > 0) {
+ if (!strcmp(from, "all")) all = 1;
+ if (!all) {
+ clusterManagerNode *src =
+ clusterNodeForResharding(from, target, &raise_err);
+ if (src != NULL) listAddNodeTail(sources, src);
+ else if (raise_err) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ }
+ }
+ listIter li;
+ listNode *ln;
+ if (all) {
+ listEmpty(sources);
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
+ continue;
+ if (!sdscmp(n->name, target->name)) continue;
+ listAddNodeTail(sources, n);
+ }
+ }
+ if (listLength(sources) == 0) {
+ fprintf(stderr, "*** No source nodes given, operation aborted.\n");
+ result = 0;
+ goto cleanup;
+ }
+ printf("\nReady to move %d slots.\n", slots);
+ printf(" Source nodes:\n");
+ listRewind(sources, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *src = ln->value;
+ sds info = clusterManagerNodeInfo(src, 4);
+ printf("%s\n", info);
+ sdsfree(info);
+ }
+ printf(" Destination node:\n");
+ sds info = clusterManagerNodeInfo(target, 4);
+ printf("%s\n", info);
+ sdsfree(info);
+ table = clusterManagerComputeReshardTable(sources, slots);
+ printf(" Resharding plan:\n");
+ clusterManagerShowReshardTable(table);
+ if (!(config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_YES))
+ {
+ printf("Do you want to proceed with the proposed "
+ "reshard plan (yes/no)? ");
+ fflush(stdout);
+ char buf[4];
+ int nread = read(fileno(stdin),buf,4);
+ buf[3] = '\0';
+ if (nread <= 0 || strcmp("yes", buf) != 0) {
+ result = 0;
+ goto cleanup;
+ }
+ }
+ int opts = CLUSTER_MANAGER_OPT_VERBOSE;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ char *err = NULL;
+ result = clusterManagerMoveSlot(item->source, target, item->slot,
+ opts, &err);
+ if (!result) {
+ if (err != NULL) {
+ //clusterManagerLogErr("\n%s\n", err);
+ zfree(err);
+ }
+ goto cleanup;
+ }
+ }
+cleanup:
+ listRelease(sources);
+ clusterManagerReleaseReshardTable(table);
+ return result;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
- /* Create our argument vector */
- argv2 = zmalloc(sizeof(sds)*(argc+3));
- argv2[0] = sdsnew("EVAL");
- argv2[1] = script;
- for (j = 0; j < argc; j++) {
- if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) {
- got_comma = 1;
+static int clusterManagerCommandRebalance(int argc, char **argv) {
+ int port = 0;
+ char *ip = NULL;
+ clusterManagerNode **weightedNodes = NULL;
+ list *involved = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(node, 0)) return 0;
+ int result = 1, i;
+ if (config.cluster_manager_command.weight != NULL) {
+ for (i = 0; i < config.cluster_manager_command.weight_argc; i++) {
+ char *name = config.cluster_manager_command.weight[i];
+ char *p = strchr(name, '=');
+ if (p == NULL) {
+ result = 0;
+ goto cleanup;
+ }
+ *p = '\0';
+ float w = atof(++p);
+ clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name);
+ if (n == NULL) {
+ clusterManagerLogErr("*** No such master node %s\n", name);
+ result = 0;
+ goto cleanup;
+ }
+ n->weight = w;
+ }
+ }
+ float total_weight = 0;
+ int nodes_involved = 0;
+ int use_empty = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
+ involved = listCreate();
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ /* Compute the total cluster weight. */
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate)
continue;
+ if (!use_empty && n->slots_count == 0) {
+ n->weight = 0;
+ continue;
+ }
+ total_weight += n->weight;
+ nodes_involved++;
+ listAddNodeTail(involved, n);
+ }
+ weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *));
+ if (weightedNodes == NULL) goto cleanup;
+ /* Check cluster, only proceed if it looks sane. */
+ clusterManagerCheckCluster(1);
+ if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) {
+ clusterManagerLogErr("*** Please fix your cluster problems "
+ "before rebalancing\n");
+ result = 0;
+ goto cleanup;
+ }
+ /* Calculate the slots balance for each node. It's the number of
+ * slots the node should lose (if positive) or gain (if negative)
+ * in order to be balanced. */
+ int threshold_reached = 0, total_balance = 0;
+ float threshold = config.cluster_manager_command.threshold;
+ i = 0;
+ listRewind(involved, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ weightedNodes[i++] = n;
+ int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) *
+ n->weight);
+ n->balance = n->slots_count - expected;
+ total_balance += n->balance;
+ /* Compute the percentage of difference between the
+ * expected number of slots and the real one, to see
+ * if it's over the threshold specified by the user. */
+ int over_threshold = 0;
+ if (threshold > 0) {
+ if (n->slots_count > 0) {
+ float err_perc = fabs((100-(100.0*expected/n->slots_count)));
+ if (err_perc > threshold) over_threshold = 1;
+ } else if (expected > 1) {
+ over_threshold = 1;
+ }
+ }
+ if (over_threshold) threshold_reached = 1;
+ }
+ if (!threshold_reached) {
+ clusterManagerLogWarn("*** No rebalancing needed! "
+ "All nodes are within the %.2f%% threshold.\n",
+ config.cluster_manager_command.threshold);
+ goto cleanup;
+ }
+ /* Because of rounding, it is possible that the balance of all nodes
+ * summed does not give 0. Make sure that nodes that have to provide
+ * slots are always matched by nodes receiving slots. */
+ while (total_balance > 0) {
+ listRewind(involved, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->balance <= 0 && total_balance > 0) {
+ n->balance--;
+ total_balance--;
+ }
+ }
+ }
+ /* Sort nodes by their slots balance. */
+ qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *),
+ clusterManagerCompareNodeBalance);
+ clusterManagerLogInfo(">>> Rebalancing across %d nodes. "
+ "Total weight = %.2f\n",
+ nodes_involved, total_weight);
+ if (config.verbose) {
+ for (i = 0; i < nodes_involved; i++) {
+ clusterManagerNode *n = weightedNodes[i];
+ printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance);
+ }
+ }
+ /* Now we have at the start of the 'sn' array nodes that should get
+ * slots, at the end nodes that must give slots.
+ * We take two indexes, one at the start, and one at the end,
+ * incrementing or decrementing the indexes accordingly til we
+ * find nodes that need to get/provide slots. */
+ int dst_idx = 0;
+ int src_idx = nodes_involved - 1;
+ int simulate = config.cluster_manager_command.flags &
+ CLUSTER_MANAGER_CMD_FLAG_SIMULATE;
+ while (dst_idx < src_idx) {
+ clusterManagerNode *dst = weightedNodes[dst_idx];
+ clusterManagerNode *src = weightedNodes[src_idx];
+ int db = abs(dst->balance);
+ int sb = abs(src->balance);
+ int numslots = (db < sb ? db : sb);
+ if (numslots > 0) {
+ printf("Moving %d slots from %s:%d to %s:%d\n", numslots,
+ src->ip,
+ src->port,
+ dst->ip,
+ dst->port);
+ /* Actually move the slots. */
+ list *lsrc = listCreate(), *table = NULL;
+ listAddNodeTail(lsrc, src);
+ table = clusterManagerComputeReshardTable(lsrc, numslots);
+ listRelease(lsrc);
+ int table_len = (int) listLength(table);
+ if (!table || table_len != numslots) {
+ clusterManagerLogErr("*** Assertion failed: Reshard table "
+ "!= number of slots");
+ result = 0;
+ goto end_move;
+ }
+ if (simulate) {
+ for (i = 0; i < table_len; i++) printf("#");
+ } else {
+ int opts = CLUSTER_MANAGER_OPT_QUIET |
+ CLUSTER_MANAGER_OPT_UPDATE;
+ listRewind(table, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerReshardTableItem *item = ln->value;
+ result = clusterManagerMoveSlot(item->source,
+ dst,
+ item->slot,
+ opts, NULL);
+ if (!result) goto end_move;
+ printf("#");
+ fflush(stdout);
+ }
+
+ }
+ printf("\n");
+end_move:
+ clusterManagerReleaseReshardTable(table);
+ if (!result) goto cleanup;
+ }
+ /* Update nodes balance. */
+ dst->balance += numslots;
+ src->balance -= numslots;
+ if (dst->balance == 0) dst_idx++;
+ if (src->balance == 0) src_idx --;
+ }
+cleanup:
+ if (involved != NULL) listRelease(involved);
+ if (weightedNodes != NULL) zfree(weightedNodes);
+ return result;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandSetTimeout(int argc, char **argv) {
+ UNUSED(argc);
+ int port = 0;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ int timeout = atoi(argv[1]);
+ if (timeout < 100) {
+ fprintf(stderr, "Setting a node timeout of less than 100 "
+ "milliseconds is a bad idea.\n");
+ return 0;
+ }
+ // Load cluster information
+ clusterManagerNode *node = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(node, 0)) return 0;
+ int ok_count = 0, err_count = 0;
+
+ clusterManagerLogInfo(">>> Reconfiguring node timeout in every "
+ "cluster node...\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ char *err = NULL;
+ redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d",
+ "SET",
+ "cluster-node-timeout",
+ timeout);
+ if (reply == NULL) goto reply_err;
+ int ok = clusterManagerCheckRedisReply(n, reply, &err);
+ freeReplyObject(reply);
+ if (!ok) goto reply_err;
+ reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE");
+ if (reply == NULL) goto reply_err;
+ ok = clusterManagerCheckRedisReply(n, reply, &err);
+ freeReplyObject(reply);
+ if (!ok) goto reply_err;
+ clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip,
+ n->port);
+ ok_count++;
+ continue;
+reply_err:
+ if (err == NULL) err = "";
+ clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip,
+ n->port, err);
+ err_count++;
+ }
+ clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n",
+ ok_count, err_count);
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandImport(int argc, char **argv) {
+ int success = 1;
+ int port = 0, src_port = 0;
+ char *ip = NULL, *src_ip = NULL;
+ char *invalid_args_msg = NULL;
+ if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) {
+ invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG;
+ goto invalid_args;
+ }
+ if (config.cluster_manager_command.from == NULL) {
+ invalid_args_msg = "[ERR] Option '--cluster-from' is required for "
+ "subcommand 'import'.\n";
+ goto invalid_args;
+ }
+ char *src_host[] = {config.cluster_manager_command.from};
+ if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) {
+ invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to "
+ "pass a valid address (ie. 120.0.0.1:7000).\n";
+ goto invalid_args;
+ }
+ clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n",
+ src_ip, src_port, ip, port);
+
+ clusterManagerNode *refnode = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0;
+ if (!clusterManagerCheckCluster(0)) return 0;
+ char *reply_err = NULL;
+ redisReply *src_reply = NULL;
+ // Connect to the source node.
+ redisContext *src_ctx = redisConnect(src_ip, src_port);
+ if (src_ctx->err) {
+ success = 0;
+ fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip,
+ src_port, src_ctx->errstr);
+ goto cleanup;
+ }
+ src_reply = reconnectingRedisCommand(src_ctx, "INFO");
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ if (getLongInfoField(src_reply->str, "cluster_enabled")) {
+ clusterManagerLogErr("[ERR] The source node should not be a "
+ "cluster node.\n");
+ success = 0;
+ goto cleanup;
+ }
+ freeReplyObject(src_reply);
+ src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE");
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ int size = src_reply->integer, i;
+ clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size);
+
+ // Build a slot -> node map
+ clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS];
+ memset(slots_map, 0, sizeof(slots_map));
+ listIter li;
+ listNode *ln;
+ for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) {
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
+ if (n->slots_count == 0) continue;
+ if (n->slots[i]) {
+ slots_map[i] = n;
+ break;
+ }
}
- argv2[j+3-got_comma] = sdsnew(argv[j]);
- if (!got_comma) keys++;
}
- argv2[2] = sdscatprintf(sdsempty(),"%d",keys);
- /* Call it */
- return issueCommand(argc+3-got_comma, argv2);
+ char cmdfmt[50] = "MIGRATE %s %d %s %d %d";
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY)
+ strcat(cmdfmt, " %s");
+ if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE)
+ strcat(cmdfmt, " %s");
+
+ /* Use SCAN to iterate over the keys, migrating to the
+ * right node as needed. */
+ int cursor = -999, timeout = config.cluster_manager_command.timeout;
+ while (cursor != 0) {
+ if (cursor < 0) cursor = 0;
+ freeReplyObject(src_reply);
+ src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d",
+ cursor, 1000);
+ if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) {
+ if (src_reply && src_reply->str) reply_err = src_reply->str;
+ success = 0;
+ goto cleanup;
+ }
+ assert(src_reply->type == REDIS_REPLY_ARRAY);
+ assert(src_reply->elements >= 2);
+ assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY);
+ if (src_reply->element[0]->type == REDIS_REPLY_STRING)
+ cursor = atoi(src_reply->element[0]->str);
+ else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER)
+ cursor = src_reply->element[0]->integer;
+ int keycount = src_reply->element[1]->elements;
+ for (i = 0; i < keycount; i++) {
+ redisReply *kr = src_reply->element[1]->element[i];
+ assert(kr->type == REDIS_REPLY_STRING);
+ char *key = kr->str;
+ uint16_t slot = clusterManagerKeyHashSlot(key, kr->len);
+ clusterManagerNode *target = slots_map[slot];
+ printf("Migrating %s to %s:%d: ", key, target->ip, target->port);
+ redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt,
+ target->ip, target->port,
+ key, 0, timeout,
+ "COPY", "REPLACE");
+ if (!r || r->type == REDIS_REPLY_ERROR) {
+ if (r && r->str) {
+ clusterManagerLogErr("Source %s:%d replied with "
+ "error:\n%s\n", src_ip, src_port,
+ r->str);
+ }
+ success = 0;
+ }
+ freeReplyObject(r);
+ if (!success) goto cleanup;
+ clusterManagerLogOk("OK\n");
+ }
+ }
+cleanup:
+ if (reply_err)
+ clusterManagerLogErr("Source %s:%d replied with error:\n%s\n",
+ src_ip, src_port, reply_err);
+ if (src_ctx) redisFree(src_ctx);
+ if (src_reply) freeReplyObject(src_reply);
+ return success;
+invalid_args:
+ fprintf(stderr, "%s", invalid_args_msg);
+ return 0;
+}
+
+static int clusterManagerCommandCall(int argc, char **argv) {
+ int port = 0, i;
+ char *ip = NULL;
+ if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args;
+ clusterManagerNode *refnode = clusterManagerNewNode(ip, port);
+ if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0;
+ argc--;
+ argv++;
+ size_t *argvlen = zmalloc(argc*sizeof(size_t));
+ clusterManagerLogInfo(">>> Calling");
+ for (i = 0; i < argc; i++) {
+ argvlen[i] = strlen(argv[i]);
+ printf(" %s", argv[i]);
+ }
+ printf("\n");
+ listIter li;
+ listNode *ln;
+ listRewind(cluster_manager.nodes, &li);
+ while ((ln = listNext(&li)) != NULL) {
+ clusterManagerNode *n = ln->value;
+ if (!n->context && !clusterManagerNodeConnect(n)) continue;
+ redisReply *reply = NULL;
+ redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen);
+ int status = redisGetReply(n->context, (void **)(&reply));
+ if (status != REDIS_OK || reply == NULL )
+ printf("%s:%d: Failed!\n", n->ip, n->port);
+ else {
+ sds formatted_reply = cliFormatReplyTTY(reply, "");
+ printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply);
+ sdsfree(formatted_reply);
+ }
+ if (reply != NULL) freeReplyObject(reply);
+ }
+ zfree(argvlen);
+ return 1;
+invalid_args:
+ fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG);
+ return 0;
+}
+
+static int clusterManagerCommandHelp(int argc, char **argv) {
+ UNUSED(argc);
+ UNUSED(argv);
+ int commands_count = sizeof(clusterManagerCommands) /
+ sizeof(clusterManagerCommandDef);
+ int i = 0, j;
+ fprintf(stderr, "Cluster Manager Commands:\n");
+ int padding = 15;
+ for (; i < commands_count; i++) {
+ clusterManagerCommandDef *def = &(clusterManagerCommands[i]);
+ int namelen = strlen(def->name), padlen = padding - namelen;
+ fprintf(stderr, " %s", def->name);
+ for (j = 0; j < padlen; j++) fprintf(stderr, " ");
+ fprintf(stderr, "%s\n", (def->args ? def->args : ""));
+ if (def->options != NULL) {
+ int optslen = strlen(def->options);
+ char *p = def->options, *eos = p + optslen;
+ char *comma = NULL;
+ while ((comma = strchr(p, ',')) != NULL) {
+ int deflen = (int)(comma - p);
+ char buf[255];
+ memcpy(buf, p, deflen);
+ buf[deflen] = '\0';
+ for (j = 0; j < padding; j++) fprintf(stderr, " ");
+ fprintf(stderr, " --cluster-%s\n", buf);
+ p = comma + 1;
+ if (p >= eos) break;
+ }
+ if (p < eos) {
+ for (j = 0; j < padding; j++) fprintf(stderr, " ");
+ fprintf(stderr, " --cluster-%s\n", p);
+ }
+ }
+ }
+ fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you "
+ "can specify the host and port of any working node in "
+ "the cluster.\n\n");
+ return 0;
}
/*------------------------------------------------------------------------------
* Latency and latency history modes
*--------------------------------------------------------------------------- */
+static void latencyModePrint(long long min, long long max, double avg, long long count) {
+ if (config.output == OUTPUT_STANDARD) {
+ printf("min: %lld, max: %lld, avg: %.2f (%lld samples)",
+ min, max, avg, count);
+ fflush(stdout);
+ } else if (config.output == OUTPUT_CSV) {
+ printf("%lld,%lld,%.2f,%lld\n", min, max, avg, count);
+ } else if (config.output == OUTPUT_RAW) {
+ printf("%lld %lld %.2f %lld\n", min, max, avg, count);
+ }
+}
+
#define LATENCY_SAMPLE_RATE 10 /* milliseconds. */
#define LATENCY_HISTORY_DEFAULT_INTERVAL 15000 /* milliseconds. */
static void latencyMode(void) {
@@ -1074,10 +5343,18 @@ static void latencyMode(void) {
double avg;
long long history_start = mstime();
+ /* Set a default for the interval in case of --latency option
+ * with --raw, --csv or when it is redirected to non tty. */
+ if (config.interval == 0) {
+ config.interval = 1000;
+ } else {
+ config.interval /= 1000; /* We need to convert to milliseconds. */
+ }
+
if (!context) exit(1);
while(1) {
start = mstime();
- reply = redisCommand(context,"PING");
+ reply = reconnectingRedisCommand(context,"PING");
if (reply == NULL) {
fprintf(stderr,"\nI/O error\n");
exit(1);
@@ -1094,9 +5371,19 @@ static void latencyMode(void) {
tot += latency;
avg = (double) tot/count;
}
- printf("\x1b[0G\x1b[2Kmin: %lld, max: %lld, avg: %.2f (%lld samples)",
- min, max, avg, count);
- fflush(stdout);
+
+ if (config.output == OUTPUT_STANDARD) {
+ printf("\x1b[0G\x1b[2K"); /* Clear the line. */
+ latencyModePrint(min,max,avg,count);
+ } else {
+ if (config.latency_history) {
+ latencyModePrint(min,max,avg,count);
+ } else if (mstime()-history_start > config.interval) {
+ latencyModePrint(min,max,avg,count);
+ exit(0);
+ }
+ }
+
if (config.latency_history && mstime()-history_start > history_interval)
{
printf(" -- %.2f seconds range\n", (float)(mstime()-history_start)/1000);
@@ -1108,6 +5395,148 @@ static void latencyMode(void) {
}
/*------------------------------------------------------------------------------
+ * Latency distribution mode -- requires 256 colors xterm
+ *--------------------------------------------------------------------------- */
+
+#define LATENCY_DIST_DEFAULT_INTERVAL 1000 /* milliseconds. */
+
+/* Structure to store samples distribution. */
+struct distsamples {
+ long long max; /* Max latency to fit into this interval (usec). */
+ long long count; /* Number of samples in this interval. */
+ int character; /* Associated character in visualization. */
+};
+
+/* Helper function for latencyDistMode(). Performs the spectrum visualization
+ * of the collected samples targeting an xterm 256 terminal.
+ *
+ * Takes an array of distsamples structures, ordered from smaller to bigger
+ * 'max' value. Last sample max must be 0, to mean that it olds all the
+ * samples greater than the previous one, and is also the stop sentinel.
+ *
+ * "tot' is the total number of samples in the different buckets, so it
+ * is the SUM(samples[i].conut) for i to 0 up to the max sample.
+ *
+ * As a side effect the function sets all the buckets count to 0. */
+void showLatencyDistSamples(struct distsamples *samples, long long tot) {
+ int j;
+
+ /* We convert samples into a index inside the palette
+ * proportional to the percentage a given bucket represents.
+ * This way intensity of the different parts of the spectrum
+ * don't change relative to the number of requests, which avoids to
+ * pollute the visualization with non-latency related info. */
+ printf("\033[38;5;0m"); /* Set foreground color to black. */
+ for (j = 0; ; j++) {
+ int coloridx =
+ ceil((float) samples[j].count / tot * (spectrum_palette_size-1));
+ int color = spectrum_palette[coloridx];
+ printf("\033[48;5;%dm%c", (int)color, samples[j].character);
+ samples[j].count = 0;
+ if (samples[j].max == 0) break; /* Last sample. */
+ }
+ printf("\033[0m\n");
+ fflush(stdout);
+}
+
+/* Show the legend: different buckets values and colors meaning, so
+ * that the spectrum is more easily readable. */
+void showLatencyDistLegend(void) {
+ int j;
+
+ printf("---------------------------------------------\n");
+ printf(". - * # .01 .125 .25 .5 milliseconds\n");
+ printf("1,2,3,...,9 from 1 to 9 milliseconds\n");
+ printf("A,B,C,D,E 10,20,30,40,50 milliseconds\n");
+ printf("F,G,H,I,J .1,.2,.3,.4,.5 seconds\n");
+ printf("K,L,M,N,O,P,Q,? 1,2,4,8,16,30,60,>60 seconds\n");
+ printf("From 0 to 100%%: ");
+ for (j = 0; j < spectrum_palette_size; j++) {
+ printf("\033[48;5;%dm ", spectrum_palette[j]);
+ }
+ printf("\033[0m\n");
+ printf("---------------------------------------------\n");
+}
+
+static void latencyDistMode(void) {
+ redisReply *reply;
+ long long start, latency, count = 0;
+ long long history_interval =
+ config.interval ? config.interval/1000 :
+ LATENCY_DIST_DEFAULT_INTERVAL;
+ long long history_start = ustime();
+ int j, outputs = 0;
+
+ struct distsamples samples[] = {
+ /* We use a mostly logarithmic scale, with certain linear intervals
+ * which are more interesting than others, like 1-10 milliseconds
+ * range. */
+ {10,0,'.'}, /* 0.01 ms */
+ {125,0,'-'}, /* 0.125 ms */
+ {250,0,'*'}, /* 0.25 ms */
+ {500,0,'#'}, /* 0.5 ms */
+ {1000,0,'1'}, /* 1 ms */
+ {2000,0,'2'}, /* 2 ms */
+ {3000,0,'3'}, /* 3 ms */
+ {4000,0,'4'}, /* 4 ms */
+ {5000,0,'5'}, /* 5 ms */
+ {6000,0,'6'}, /* 6 ms */
+ {7000,0,'7'}, /* 7 ms */
+ {8000,0,'8'}, /* 8 ms */
+ {9000,0,'9'}, /* 9 ms */
+ {10000,0,'A'}, /* 10 ms */
+ {20000,0,'B'}, /* 20 ms */
+ {30000,0,'C'}, /* 30 ms */
+ {40000,0,'D'}, /* 40 ms */
+ {50000,0,'E'}, /* 50 ms */
+ {100000,0,'F'}, /* 0.1 s */
+ {200000,0,'G'}, /* 0.2 s */
+ {300000,0,'H'}, /* 0.3 s */
+ {400000,0,'I'}, /* 0.4 s */
+ {500000,0,'J'}, /* 0.5 s */
+ {1000000,0,'K'}, /* 1 s */
+ {2000000,0,'L'}, /* 2 s */
+ {4000000,0,'M'}, /* 4 s */
+ {8000000,0,'N'}, /* 8 s */
+ {16000000,0,'O'}, /* 16 s */
+ {30000000,0,'P'}, /* 30 s */
+ {60000000,0,'Q'}, /* 1 minute */
+ {0,0,'?'}, /* > 1 minute */
+ };
+
+ if (!context) exit(1);
+ while(1) {
+ start = ustime();
+ reply = reconnectingRedisCommand(context,"PING");
+ if (reply == NULL) {
+ fprintf(stderr,"\nI/O error\n");
+ exit(1);
+ }
+ latency = ustime()-start;
+ freeReplyObject(reply);
+ count++;
+
+ /* Populate the relevant bucket. */
+ for (j = 0; ; j++) {
+ if (samples[j].max == 0 || latency <= samples[j].max) {
+ samples[j].count++;
+ break;
+ }
+ }
+
+ /* From time to time show the spectrum. */
+ if (count && (ustime()-history_start)/1000 > history_interval) {
+ if ((outputs++ % 20) == 0)
+ showLatencyDistLegend();
+ showLatencyDistSamples(samples,count);
+ history_start = ustime();
+ count = 0;
+ }
+ usleep(LATENCY_SAMPLE_RATE * 1000);
+ }
+}
+
+/*------------------------------------------------------------------------------
* Slave mode
*--------------------------------------------------------------------------- */
@@ -1212,13 +5641,14 @@ static void getRDB(void) {
nwritten = write(fd, buf, nread);
if (nwritten != nread) {
fprintf(stderr,"Error writing data to file: %s\n",
- strerror(errno));
+ (nwritten == -1) ? strerror(errno) : "short write");
exit(1);
}
payload -= nread;
}
close(s); /* Close the file descriptor ASAP as fsync() may take time. */
fsync(fd);
+ close(fd);
fprintf(stderr,"Transfer finished with success.\n");
exit(0);
}
@@ -1227,6 +5657,7 @@ static void getRDB(void) {
* Bulk import (pipe) mode
*--------------------------------------------------------------------------- */
+#define PIPEMODE_WRITE_LOOP_MAX_BYTES (128*1024)
static void pipeMode(void) {
int fd = context->fd;
long long errors = 0, replies = 0, obuf_len = 0, obuf_pos = 0;
@@ -1303,6 +5734,8 @@ static void pipeMode(void) {
/* Handle the writable state: we can send protocol to the server. */
if (mask & AE_WRITABLE) {
+ ssize_t loop_nwritten = 0;
+
while(1) {
/* Transfer current buffer to server. */
if (obuf_len != 0) {
@@ -1319,6 +5752,7 @@ static void pipeMode(void) {
}
obuf_len -= nwritten;
obuf_pos += nwritten;
+ loop_nwritten += nwritten;
if (obuf_len != 0) break; /* Can't accept more data. */
}
/* If buffer is empty, load from stdin. */
@@ -1354,7 +5788,8 @@ static void pipeMode(void) {
obuf_pos = 0;
}
}
- if (obuf_len == 0 && eof) break;
+ if ((obuf_len == 0 && eof) ||
+ loop_nwritten > PIPEMODE_WRITE_LOOP_MAX_BYTES) break;
}
}
@@ -1387,7 +5822,9 @@ static void pipeMode(void) {
#define TYPE_SET 2
#define TYPE_HASH 3
#define TYPE_ZSET 4
-#define TYPE_NONE 5
+#define TYPE_STREAM 5
+#define TYPE_NONE 6
+#define TYPE_COUNT 7
static redisReply *sendScan(unsigned long long *it) {
redisReply *reply = redisCommand(context, "SCAN %llu", *it);
@@ -1412,7 +5849,7 @@ static redisReply *sendScan(unsigned long long *it) {
assert(reply->element[1]->type == REDIS_REPLY_ARRAY);
/* Update iterator */
- *it = atoi(reply->element[0]->str);
+ *it = strtoull(reply->element[0]->str, NULL, 10);
return reply;
}
@@ -1446,6 +5883,8 @@ static int toIntType(char *key, char *type) {
return TYPE_HASH;
} else if(!strcmp(type, "zset")) {
return TYPE_ZSET;
+ } else if(!strcmp(type, "stream")) {
+ return TYPE_STREAM;
} else if(!strcmp(type, "none")) {
return TYPE_NONE;
} else {
@@ -1470,8 +5909,13 @@ static void getKeyTypes(redisReply *keys, int *types) {
keys->element[i]->str, context->err, context->errstr);
exit(1);
} else if(reply->type != REDIS_REPLY_STATUS) {
- fprintf(stderr, "Invalid reply type (%d) for TYPE on key '%s'!\n",
- reply->type, keys->element[i]->str);
+ if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "TYPE returned an error: %s\n", reply->str);
+ } else {
+ fprintf(stderr,
+ "Invalid reply type (%d) for TYPE on key '%s'!\n",
+ reply->type, keys->element[i]->str);
+ }
exit(1);
}
@@ -1497,15 +5941,15 @@ static void getKeySizes(redisReply *keys, int *types,
keys->element[i]->str);
}
- /* Retreive sizes */
+ /* Retrieve sizes */
for(i=0;i<keys->elements;i++) {
- /* Skip keys that dissapeared between SCAN and TYPE */
+ /* Skip keys that disappeared between SCAN and TYPE */
if(types[i] == TYPE_NONE) {
sizes[i] = 0;
continue;
}
- /* Retreive size */
+ /* Retrieve size */
if(redisGetReply(context, (void**)&reply)!=REDIS_OK) {
fprintf(stderr, "Error getting size for key '%s' (%d: %s)\n",
keys->element[i]->str, context->err, context->errstr);
@@ -1526,11 +5970,11 @@ static void getKeySizes(redisReply *keys, int *types,
}
static void findBigKeys(void) {
- unsigned long long biggest[5] = {0}, counts[5] = {0}, totalsize[5] = {0};
+ unsigned long long biggest[TYPE_COUNT] = {0}, counts[TYPE_COUNT] = {0}, totalsize[TYPE_COUNT] = {0};
unsigned long long sampled = 0, total_keys, totlen=0, *sizes=NULL, it=0;
- sds maxkeys[5] = {0};
- char *typename[] = {"string","list","set","hash","zset"};
- char *typeunit[] = {"bytes","items","members","fields","members"};
+ sds maxkeys[TYPE_COUNT] = {0};
+ char *typename[] = {"string","list","set","hash","zset","stream","none"};
+ char *typeunit[] = {"bytes","items","members","fields","members","entries",""};
redisReply *reply, *keys;
unsigned int arrsize=0, i;
int type, *types=NULL;
@@ -1575,7 +6019,7 @@ static void findBigKeys(void) {
arrsize = keys->elements;
}
- /* Retreive types and then sizes */
+ /* Retrieve types and then sizes */
getKeyTypes(keys, types);
getKeySizes(keys, types, sizes);
@@ -1656,6 +6100,129 @@ static void findBigKeys(void) {
exit(0);
}
+static void getKeyFreqs(redisReply *keys, unsigned long long *freqs) {
+ redisReply *reply;
+ unsigned int i;
+
+ /* Pipeline OBJECT freq commands */
+ for(i=0;i<keys->elements;i++) {
+ redisAppendCommand(context, "OBJECT freq %s", keys->element[i]->str);
+ }
+
+ /* Retrieve freqs */
+ for(i=0;i<keys->elements;i++) {
+ if(redisGetReply(context, (void**)&reply)!=REDIS_OK) {
+ fprintf(stderr, "Error getting freq for key '%s' (%d: %s)\n",
+ keys->element[i]->str, context->err, context->errstr);
+ exit(1);
+ } else if(reply->type != REDIS_REPLY_INTEGER) {
+ if(reply->type == REDIS_REPLY_ERROR) {
+ fprintf(stderr, "Error: %s\n", reply->str);
+ exit(1);
+ } else {
+ fprintf(stderr, "Warning: OBJECT freq on '%s' failed (may have been deleted)\n", keys->element[i]->str);
+ freqs[i] = 0;
+ }
+ } else {
+ freqs[i] = reply->integer;
+ }
+ freeReplyObject(reply);
+ }
+}
+
+#define HOTKEYS_SAMPLE 16
+static void findHotKeys(void) {
+ redisReply *keys, *reply;
+ unsigned long long counters[HOTKEYS_SAMPLE] = {0};
+ sds hotkeys[HOTKEYS_SAMPLE] = {NULL};
+ unsigned long long sampled = 0, total_keys, *freqs = NULL, it = 0;
+ unsigned int arrsize = 0, i, k;
+ double pct;
+
+ /* Total keys pre scanning */
+ total_keys = getDbSize();
+
+ /* Status message */
+ printf("\n# Scanning the entire keyspace to find hot keys as well as\n");
+ printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n");
+ printf("# per 100 SCAN commands (not usually needed).\n\n");
+
+ /* SCAN loop */
+ do {
+ /* Calculate approximate percentage completion */
+ pct = 100 * (double)sampled/total_keys;
+
+ /* Grab some keys and point to the keys array */
+ reply = sendScan(&it);
+ keys = reply->element[1];
+
+ /* Reallocate our freqs array if we need to */
+ if(keys->elements > arrsize) {
+ freqs = zrealloc(freqs, sizeof(unsigned long long)*keys->elements);
+
+ if(!freqs) {
+ fprintf(stderr, "Failed to allocate storage for keys!\n");
+ exit(1);
+ }
+
+ arrsize = keys->elements;
+ }
+
+ getKeyFreqs(keys, freqs);
+
+ /* Now update our stats */
+ for(i=0;i<keys->elements;i++) {
+ sampled++;
+ /* Update overall progress */
+ if(sampled % 1000000 == 0) {
+ printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled);
+ }
+
+ /* Use eviction pool here */
+ k = 0;
+ while (k < HOTKEYS_SAMPLE && freqs[i] > counters[k]) k++;
+ if (k == 0) continue;
+ k--;
+ if (k == 0 || counters[k] == 0) {
+ sdsfree(hotkeys[k]);
+ } else {
+ sdsfree(hotkeys[0]);
+ memmove(counters,counters+1,sizeof(counters[0])*k);
+ memmove(hotkeys,hotkeys+1,sizeof(hotkeys[0])*k);
+ }
+ counters[k] = freqs[i];
+ hotkeys[k] = sdsnew(keys->element[i]->str);
+ printf(
+ "[%05.2f%%] Hot key '%s' found so far with counter %llu\n",
+ pct, keys->element[i]->str, freqs[i]);
+ }
+
+ /* Sleep if we've been directed to do so */
+ if(sampled && (sampled %100) == 0 && config.interval) {
+ usleep(config.interval);
+ }
+
+ freeReplyObject(reply);
+ } while(it != 0);
+
+ if (freqs) zfree(freqs);
+
+ /* We're done */
+ printf("\n-------- summary -------\n\n");
+
+ printf("Sampled %llu keys in the keyspace!\n", sampled);
+
+ for (i=1; i<= HOTKEYS_SAMPLE; i++) {
+ k = HOTKEYS_SAMPLE - i;
+ if(counters[k]>0) {
+ printf("hot key found with counter: %llu\tkeyname: %s\n", counters[k], hotkeys[k]);
+ sdsfree(hotkeys[k]);
+ }
+ }
+
+ exit(0);
+}
+
/*------------------------------------------------------------------------------
* Stats mode
*--------------------------------------------------------------------------- */
@@ -1673,7 +6240,7 @@ static char *getInfoField(char *info, char *field) {
n1 = strchr(p,'\r');
n2 = strchr(p,',');
if (n2 && n2 < n1) n1 = n2;
- result = malloc(sizeof(char)*(n1-p)+1);
+ result = zmalloc(sizeof(char)*(n1-p)+1);
memcpy(result,p,(n1-p));
result[n1-p] = '\0';
return result;
@@ -1687,7 +6254,7 @@ static long getLongInfoField(char *info, char *field) {
if (!value) return LONG_MIN;
l = strtol(value,NULL,10);
- free(value);
+ zfree(value);
return l;
}
@@ -1703,7 +6270,7 @@ void bytesToHuman(char *s, long long n) {
}
if (n < 1024) {
/* Bytes */
- sprintf(s,"%lluB",n);
+ sprintf(s,"%lldB",n);
return;
} else if (n < (1024*1024)) {
d = (double)n/(1024);
@@ -1726,7 +6293,7 @@ static void statMode(void) {
char buf[64];
int j;
- reply = reconnectingInfo();
+ reply = reconnectingRedisCommand(context,"INFO");
if (reply->type == REDIS_REPLY_ERROR) {
printf("ERROR: %s\n", reply->str);
exit(1);
@@ -1766,7 +6333,7 @@ static void statMode(void) {
sprintf(buf,"%ld",aux);
printf("%-8s",buf);
- /* Requets */
+ /* Requests */
aux = getLongInfoField(reply->str,"total_commands_processed");
sprintf(buf,"%ld (+%ld)",aux,requests == 0 ? 0 : aux-requests);
printf("%-19s",buf);
@@ -1780,6 +6347,7 @@ static void statMode(void) {
/* Children */
aux = getLongInfoField(reply->str,"bgsave_in_progress");
aux |= getLongInfoField(reply->str,"aof_rewrite_in_progress") << 1;
+ aux |= getLongInfoField(reply->str,"loading") << 2;
switch(aux) {
case 0: break;
case 1:
@@ -1791,6 +6359,9 @@ static void statMode(void) {
case 3:
printf("SAVE+AOF");
break;
+ case 4:
+ printf("LOAD");
+ break;
}
printf("\n");
@@ -1833,6 +6404,97 @@ static void scanMode(void) {
}
/*------------------------------------------------------------------------------
+ * LRU test mode
+ *--------------------------------------------------------------------------- */
+
+/* Return an integer from min to max (both inclusive) using a power-law
+ * distribution, depending on the value of alpha: the greater the alpha
+ * the more bias towards lower values.
+ *
+ * With alpha = 6.2 the output follows the 80-20 rule where 20% of
+ * the returned numbers will account for 80% of the frequency. */
+long long powerLawRand(long long min, long long max, double alpha) {
+ double pl, r;
+
+ max += 1;
+ r = ((double)rand()) / RAND_MAX;
+ pl = pow(
+ ((pow(max,alpha+1) - pow(min,alpha+1))*r + pow(min,alpha+1)),
+ (1.0/(alpha+1)));
+ return (max-1-(long long)pl)+min;
+}
+
+/* Generates a key name among a set of lru_test_sample_size keys, using
+ * an 80-20 distribution. */
+void LRUTestGenKey(char *buf, size_t buflen) {
+ snprintf(buf, buflen, "lru:%lld",
+ powerLawRand(1, config.lru_test_sample_size, 6.2));
+}
+
+#define LRU_CYCLE_PERIOD 1000 /* 1000 milliseconds. */
+#define LRU_CYCLE_PIPELINE_SIZE 250
+static void LRUTestMode(void) {
+ redisReply *reply;
+ char key[128];
+ long long start_cycle;
+ int j;
+
+ srand(time(NULL)^getpid());
+ while(1) {
+ /* Perform cycles of 1 second with 50% writes and 50% reads.
+ * We use pipelining batching writes / reads N times per cycle in order
+ * to fill the target instance easily. */
+ start_cycle = mstime();
+ long long hits = 0, misses = 0;
+ while(mstime() - start_cycle < 1000) {
+ /* Write cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ char val[6];
+ val[5] = '\0';
+ for (int i = 0; i < 5; i++) val[i] = 'A'+rand()%('z'-'A');
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "SET %s %s",key,val);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++)
+ redisGetReply(context, (void**)&reply);
+
+ /* Read cycle. */
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ LRUTestGenKey(key,sizeof(key));
+ redisAppendCommand(context, "GET %s",key);
+ }
+ for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) {
+ if (redisGetReply(context, (void**)&reply) == REDIS_OK) {
+ switch(reply->type) {
+ case REDIS_REPLY_ERROR:
+ printf("%s\n", reply->str);
+ break;
+ case REDIS_REPLY_NIL:
+ misses++;
+ break;
+ default:
+ hits++;
+ break;
+ }
+ }
+ }
+
+ if (context->err) {
+ fprintf(stderr,"I/O error during LRU test\n");
+ exit(1);
+ }
+ }
+ /* Print stats. */
+ printf(
+ "%lld Gets/sec | Hits: %lld (%.2f%%) | Misses: %lld (%.2f%%)\n",
+ hits+misses,
+ hits, (double)hits/(hits+misses)*100,
+ misses, (double)misses/(hits+misses)*100);
+ }
+ exit(0);
+}
+
+/*------------------------------------------------------------------------------
* Intrisic latency mode.
*
* Measure max latency of a running process that does not result from
@@ -1864,7 +6526,7 @@ unsigned long compute_something_fast(void) {
}
static void intrinsicLatencyModeStop(int s) {
- REDIS_NOTUSED(s);
+ UNUSED(s);
force_cancel_loop = 1;
}
@@ -1892,7 +6554,7 @@ static void intrinsicLatencyMode(void) {
}
double avg_us = (double)run_time/runs;
- double avg_ns = avg_us * 10e3;
+ double avg_ns = avg_us * 1e3;
if (force_cancel_loop || end > test_end) {
printf("\n%lld total runs "
"(avg latency: "
@@ -1923,7 +6585,10 @@ int main(int argc, char **argv) {
config.monitor_mode = 0;
config.pubsub_mode = 0;
config.latency_mode = 0;
+ config.latency_dist_mode = 0;
config.latency_history = 0;
+ config.lru_test_mode = 0;
+ config.lru_test_sample_size = 0;
config.cluster_mode = 0;
config.slave_mode = 0;
config.getrdb_mode = 0;
@@ -1935,28 +6600,69 @@ int main(int argc, char **argv) {
config.pipe_mode = 0;
config.pipe_timeout = REDIS_CLI_DEFAULT_PIPE_TIMEOUT;
config.bigkeys = 0;
+ config.hotkeys = 0;
config.stdinarg = 0;
config.auth = NULL;
config.eval = NULL;
+ config.eval_ldb = 0;
+ config.eval_ldb_end = 0;
+ config.eval_ldb_sync = 0;
+ config.enable_ldb_on_eval = 0;
config.last_cmd_type = -1;
+ config.verbose = 0;
+ config.no_auth_warning = 0;
+ config.cluster_manager_command.name = NULL;
+ config.cluster_manager_command.argc = 0;
+ config.cluster_manager_command.argv = NULL;
+ config.cluster_manager_command.flags = 0;
+ config.cluster_manager_command.replicas = 0;
+ config.cluster_manager_command.from = NULL;
+ config.cluster_manager_command.to = NULL;
+ config.cluster_manager_command.weight = NULL;
+ config.cluster_manager_command.weight_argc = 0;
+ config.cluster_manager_command.slots = 0;
+ config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT;
+ config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE;
+ config.cluster_manager_command.threshold =
+ CLUSTER_MANAGER_REBALANCE_THRESHOLD;
+ pref.hints = 1;
+
+ spectrum_palette = spectrum_palette_color;
+ spectrum_palette_size = spectrum_palette_color_size;
if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL))
config.output = OUTPUT_RAW;
else
config.output = OUTPUT_STANDARD;
config.mb_delim = sdsnew("\n");
- cliInitHelp();
firstarg = parseOptions(argc,argv);
argc -= firstarg;
argv += firstarg;
+ /* Cluster Manager mode */
+ if (CLUSTER_MANAGER_MODE()) {
+ clusterManagerCommandProc *proc = validateClusterManagerCommand();
+ if (!proc) {
+ sdsfree(config.hostip);
+ sdsfree(config.mb_delim);
+ exit(1);
+ }
+ clusterManagerMode(proc);
+ }
+
/* Latency mode */
if (config.latency_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
latencyMode();
}
+ /* Latency distribution mode */
+ if (config.latency_dist_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ latencyDistMode();
+ }
+
/* Slave mode */
if (config.slave_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
@@ -1981,6 +6687,12 @@ int main(int argc, char **argv) {
findBigKeys();
}
+ /* Find hot keys */
+ if (config.hotkeys) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ findHotKeys();
+ }
+
/* Stat mode */
if (config.stat_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
@@ -1994,6 +6706,12 @@ int main(int argc, char **argv) {
scanMode();
}
+ /* LRU test mode */
+ if (config.lru_test_mode) {
+ if (cliConnect(0) == REDIS_ERR) exit(1);
+ LRUTestMode();
+ }
+
/* Intrinsic latency mode */
if (config.intrinsic_latency_mode) intrinsicLatencyMode();
diff --git a/src/redis-trib.rb b/src/redis-trib.rb
index 6002e4caa..b1af83069 100755
--- a/src/redis-trib.rb
+++ b/src/redis-trib.rb
@@ -1,1373 +1,129 @@
#!/usr/bin/env ruby
-# TODO (temporary here, we'll move this into the Github issues once
-# redis-trib initial implementation is completed).
-#
-# - Make sure that if the rehashing fails in the middle redis-trib will try
-# to recover.
-# - When redis-trib performs a cluster check, if it detects a slot move in
-# progress it should prompt the user to continue the move from where it
-# stopped.
-# - Gracefully handle Ctrl+C in move_slot to prompt the user if really stop
-# while rehashing, and performing the best cleanup possible if the user
-# forces the quit.
-# - When doing "fix" set a global Fix to true, and prompt the user to
-# fix the problem if automatically fixable every time there is something
-# to fix. For instance:
-# 1) If there is a node that pretend to receive a slot, or to migrate a
-# slot, but has no entries in that slot, fix it.
-# 2) If there is a node having keys in slots that are not owned by it
-# fix this condition moving the entries in the same node.
-# 3) Perform more possibly slow tests about the state of the cluster.
-# 4) When aborted slot migration is detected, fix it.
-
-require 'rubygems'
-require 'redis'
-
-ClusterHashSlots = 16384
-
-def xputs(s)
- case s[0..2]
- when ">>>"
- color="29;1"
- when "[ER"
- color="31;1"
- when "[OK"
- color="32"
- when "[FA","***"
- color="33"
- else
- color=nil
- end
-
- color = nil if ENV['TERM'] != "xterm"
- print "\033[#{color}m" if color
- print s
- print "\033[0m" if color
- print "\n"
+def colorized(str, color)
+ return str if !(ENV['TERM'] || '')["xterm"]
+ color_code = {
+ white: 29,
+ bold: '29;1',
+ black: 30,
+ red: 31,
+ green: 32,
+ yellow: 33,
+ blue: 34,
+ magenta: 35,
+ cyan: 36,
+ gray: 37
+ }[color]
+ return str if !color_code
+ "\033[#{color_code}m#{str}\033[0m"
end
-class ClusterNode
- def initialize(addr)
- s = addr.split(":")
- if s.length < 2
- puts "Invalid IP or Port (given as #{addr}) - use IP:Port format"
- exit 1
- end
- port = s.pop # removes port from split array
- ip = s.join(":") # if s.length > 1 here, it's IPv6, so restore address
- @r = nil
- @info = {}
- @info[:host] = ip
- @info[:port] = port
- @info[:slots] = {}
- @info[:migrating] = {}
- @info[:importing] = {}
- @info[:replicate] = false
- @dirty = false # True if we need to flush slots info into node.
- @friends = []
- end
-
- def friends
- @friends
- end
-
- def slots
- @info[:slots]
- end
-
- def has_flag?(flag)
- @info[:flags].index(flag)
- end
-
- def to_s
- "#{@info[:host]}:#{@info[:port]}"
- end
-
- def connect(o={})
- return if @r
- print "Connecting to node #{self}: "
- STDOUT.flush
- begin
- @r = Redis.new(:host => @info[:host], :port => @info[:port], :timeout => 60)
- @r.ping
- rescue
- xputs "[ERR] Sorry, can't connect to node #{self}"
- exit 1 if o[:abort]
- @r = nil
- end
- xputs "OK"
- end
-
- def assert_cluster
- info = @r.info
- if !info["cluster_enabled"] || info["cluster_enabled"].to_i == 0
- xputs "[ERR] Node #{self} is not configured as a cluster node."
- exit 1
- end
- end
-
- def assert_empty
- if !(@r.cluster("info").split("\r\n").index("cluster_known_nodes:1")) ||
- (@r.info['db0'])
- xputs "[ERR] Node #{self} is not empty. Either the node already knows other nodes (check with CLUSTER NODES) or contains some key in database 0."
- exit 1
- end
- end
-
- def load_info(o={})
- self.connect
- nodes = @r.cluster("nodes").split("\n")
- nodes.each{|n|
- # name addr flags role ping_sent ping_recv link_status slots
- split = n.split
- name,addr,flags,master_id,ping_sent,ping_recv,config_epoch,link_status = split[0..6]
- slots = split[8..-1]
- info = {
- :name => name,
- :addr => addr,
- :flags => flags.split(","),
- :replicate => master_id,
- :ping_sent => ping_sent.to_i,
- :ping_recv => ping_recv.to_i,
- :link_status => link_status
- }
- info[:replicate] = false if master_id == "-"
-
- if info[:flags].index("myself")
- @info = @info.merge(info)
- @info[:slots] = {}
- slots.each{|s|
- if s[0..0] == '['
- if s.index("->-") # Migrating
- slot,dst = s[1..-1].split("->-")
- @info[:migrating][slot.to_i] = dst
- elsif s.index("-<-") # Importing
- slot,src = s[1..-1].split("-<-")
- @info[:importing][slot.to_i] = src
- end
- elsif s.index("-")
- start,stop = s.split("-")
- self.add_slots((start.to_i)..(stop.to_i))
- else
- self.add_slots((s.to_i)..(s.to_i))
- end
- } if slots
- @dirty = false
- @r.cluster("info").split("\n").each{|e|
- k,v=e.split(":")
- k = k.to_sym
- v.chop!
- if k != :cluster_state
- @info[k] = v.to_i
- else
- @info[k] = v
- end
- }
- elsif o[:getfriends]
- @friends << info
- end
- }
- end
-
- def add_slots(slots)
- slots.each{|s|
- @info[:slots][s] = :new
- }
- @dirty = true
- end
-
- def set_as_replica(node_id)
- @info[:replicate] = node_id
- @dirty = true
- end
-
- def flush_node_config
- return if !@dirty
- if @info[:replicate]
- begin
- @r.cluster("replicate",@info[:replicate])
- rescue
- # If the cluster did not already joined it is possible that
- # the slave does not know the master node yet. So on errors
- # we return ASAP leaving the dirty flag set, to flush the
- # config later.
- return
- end
- else
- new = []
- @info[:slots].each{|s,val|
- if val == :new
- new << s
- @info[:slots][s] = true
- end
- }
- @r.cluster("addslots",*new)
- end
- @dirty = false
- end
-
- def info_string
- # We want to display the hash slots assigned to this node
- # as ranges, like in: "1-5,8-9,20-25,30"
- #
- # Note: this could be easily written without side effects,
- # we use 'slots' just to split the computation into steps.
-
- # First step: we want an increasing array of integers
- # for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
- slots = @info[:slots].keys.sort
-
- # As we want to aggregate adjacent slots we convert all the
- # slot integers into ranges (with just one element)
- # So we have something like [1..1,2..2, ... and so forth.
- slots.map!{|x| x..x}
+class String
- # Finally we group ranges with adjacent elements.
- slots = slots.reduce([]) {|a,b|
- if !a.empty? && b.first == (a[-1].last)+1
- a[0..-2] + [(a[-1].first)..(b.last)]
- else
- a + [b]
- end
+ %w(white bold black red green yellow blue magenta cyan gray).each{|color|
+ color = :"#{color}"
+ define_method(color){
+ colorized(self, color)
}
+ }
- # Now our task is easy, we just convert ranges with just one
- # element into a number, and a real range into a start-end format.
- # Finally we join the array using the comma as separator.
- slots = slots.map{|x|
- x.count == 1 ? x.first.to_s : "#{x.first}-#{x.last}"
- }.join(",")
-
- role = self.has_flag?("master") ? "M" : "S"
-
- if self.info[:replicate] and @dirty
- is = "S: #{self.info[:name]} #{self.to_s}"
- else
- is = "#{role}: #{self.info[:name]} #{self.to_s}\n"+
- " slots:#{slots} (#{self.slots.length} slots) "+
- "#{(self.info[:flags]-["myself"]).join(",")}"
- end
- if self.info[:replicate]
- is += "\n replicates #{info[:replicate]}"
- elsif self.has_flag?("master") && self.info[:replicas]
- is += "\n #{info[:replicas].length} additional replica(s)"
- end
- is
- end
-
- # Return a single string representing nodes and associated slots.
- # TODO: remove slaves from config when slaves will be handled
- # by Redis Cluster.
- def get_config_signature
- config = []
- @r.cluster("nodes").each_line{|l|
- s = l.split
- slots = s[8..-1].select {|x| x[0..0] != "["}
- next if slots.length == 0
- config << s[0]+":"+(slots.sort.join(","))
- }
- config.sort.join("|")
- end
-
- def info
- @info
- end
-
- def is_dirty?
- @dirty
- end
-
- def r
- @r
- end
end
-class RedisTrib
- def initialize
- @nodes = []
- @fix = false
- @errors = []
- end
-
- def check_arity(req_args, num_args)
- if ((req_args > 0 and num_args != req_args) ||
- (req_args < 0 and num_args < req_args.abs))
- xputs "[ERR] Wrong number of arguments for specified sub command"
- exit 1
- end
- end
-
- def add_node(node)
- @nodes << node
- end
-
- def cluster_error(msg)
- @errors << msg
- xputs msg
- end
-
- def get_node_by_name(name)
- @nodes.each{|n|
- return n if n.info[:name] == name.downcase
- }
- return nil
- end
-
- # This function returns the master that has the least number of replicas
- # in the cluster. If there are multiple masters with the same smaller
- # number of replicas, one at random is returned.
- def get_master_with_least_replicas
- masters = @nodes.select{|n| n.has_flag? "master"}
- sorted = masters.sort{|a,b|
- a.info[:replicas].length <=> b.info[:replicas].length
- }
- sorted[0]
- end
-
- def check_cluster
- xputs ">>> Performing Cluster Check (using node #{@nodes[0]})"
- show_nodes
- check_config_consistency
- check_open_slots
- check_slots_coverage
- end
-
- # Merge slots of every known node. If the resulting slots are equal
- # to ClusterHashSlots, then all slots are served.
- def covered_slots
- slots = {}
- @nodes.each{|n|
- slots = slots.merge(n.slots)
- }
- slots
- end
-
- def check_slots_coverage
- xputs ">>> Check slots coverage..."
- slots = covered_slots
- if slots.length == ClusterHashSlots
- xputs "[OK] All #{ClusterHashSlots} slots covered."
- else
- cluster_error \
- "[ERR] Not all #{ClusterHashSlots} slots are covered by nodes."
- fix_slots_coverage if @fix
- end
- end
-
- def check_open_slots
- xputs ">>> Check for open slots..."
- open_slots = []
- @nodes.each{|n|
- if n.info[:migrating].size > 0
- cluster_error \
- "[WARNING] Node #{n} has slots in migrating state (#{n.info[:migrating].keys.join(",")})."
- open_slots += n.info[:migrating].keys
- elsif n.info[:importing].size > 0
- cluster_error \
- "[WARNING] Node #{n} has slots in importing state (#{n.info[:importing].keys.join(",")})."
- open_slots += n.info[:importing].keys
- end
- }
- open_slots.uniq!
- if open_slots.length > 0
- xputs "[WARNING] The following slots are open: #{open_slots.join(",")}"
- end
- if @fix
- open_slots.each{|slot| fix_open_slot slot}
- end
- end
-
- def nodes_with_keys_in_slot(slot)
- nodes = []
- @nodes.each{|n|
- nodes << n if n.r.cluster("getkeysinslot",slot,1).length > 0
- }
- nodes
- end
-
- def fix_slots_coverage
- not_covered = (0...ClusterHashSlots).to_a - covered_slots.keys
- xputs ">>> Fixing slots coverage..."
- xputs "List of not covered slots: " + not_covered.join(",")
-
- # For every slot, take action depending on the actual condition:
- # 1) No node has keys for this slot.
- # 2) A single node has keys for this slot.
- # 3) Multiple nodes have keys for this slot.
- slots = {}
- not_covered.each{|slot|
- nodes = nodes_with_keys_in_slot(slot)
- slots[slot] = nodes
- xputs "Slot #{slot} has keys in #{nodes.length} nodes: #{nodes.join}"
- }
-
- none = slots.select {|k,v| v.length == 0}
- single = slots.select {|k,v| v.length == 1}
- multi = slots.select {|k,v| v.length > 1}
-
- # Handle case "1": keys in no node.
- if none.length > 0
- xputs "The folowing uncovered slots have no keys across the cluster:"
- xputs none.keys.join(",")
- yes_or_die "Fix these slots by covering with a random node?"
- none.each{|slot,nodes|
- node = @nodes.sample
- xputs ">>> Covering slot #{slot} with #{node}"
- node.r.cluster("addslots",slot)
- }
- end
-
- # Handle case "2": keys only in one node.
- if single.length > 0
- xputs "The folowing uncovered slots have keys in just one node:"
- puts single.keys.join(",")
- yes_or_die "Fix these slots by covering with those nodes?"
- single.each{|slot,nodes|
- xputs ">>> Covering slot #{slot} with #{nodes[0]}"
- nodes[0].r.cluster("addslots",slot)
- }
- end
-
- # Handle case "3": keys in multiple nodes.
- if multi.length > 0
- xputs "The folowing uncovered slots have keys in multiple nodes:"
- xputs multi.keys.join(",")
- yes_or_die "Fix these slots by moving keys into a single node?"
- multi.each{|slot,nodes|
- xputs ">>> Covering slot #{slot} moving keys to #{nodes[0]}"
- # TODO
- # 1) Set all nodes as "MIGRATING" for this slot, so that we
- # can access keys in the hash slot using ASKING.
- # 2) Move everything to node[0]
- # 3) Clear MIGRATING from nodes, and ADDSLOTS the slot to
- # node[0].
- raise "TODO: Work in progress"
- }
- end
- end
-
- # Return the owner of the specified slot
- def get_slot_owner(slot)
- @nodes.each{|n|
- n.slots.each{|s,_|
- return n if s == slot
- }
- }
- nil
- end
-
- # Slot 'slot' was found to be in importing or migrating state in one or
- # more nodes. This function fixes this condition by migrating keys where
- # it seems more sensible.
- def fix_open_slot(slot)
- puts ">>> Fixing open slot #{slot}"
-
- # Try to obtain the current slot owner, according to the current
- # nodes configuration.
- owner = get_slot_owner(slot)
-
- # If there is no slot owner, set as owner the slot with the biggest
- # number of keys, among the set of migrating / importing nodes.
- if !owner
- xputs "*** Fix me, some work to do here."
- # Select owner...
- # Use ADDSLOTS to assign the slot.
- exit 1
- end
-
- migrating = []
- importing = []
- @nodes.each{|n|
- next if n.has_flag? "slave"
- if n.info[:migrating][slot]
- migrating << n
- elsif n.info[:importing][slot]
- importing << n
- elsif n.r.cluster("countkeysinslot",slot) > 0 && n != owner
- xputs "*** Found keys about slot #{slot} in node #{n}!"
- importing << n
- end
- }
- puts "Set as migrating in: #{migrating.join(",")}"
- puts "Set as importing in: #{importing.join(",")}"
-
- # Case 1: The slot is in migrating state in one slot, and in
- # importing state in 1 slot. That's trivial to address.
- if migrating.length == 1 && importing.length == 1
- move_slot(migrating[0],importing[0],slot,:verbose=>true,:fix=>true)
- elsif migrating.length == 0 && importing.length > 0
- xputs ">>> Moving all the #{slot} slot keys to its owner #{owner}"
- importing.each {|node|
- next if node == owner
- move_slot(node,owner,slot,:verbose=>true,:fix=>true,:cold=>true)
- xputs ">>> Setting #{slot} as STABLE in #{node}"
- node.r.cluster("setslot",slot,"stable")
- }
- else
- xputs "[ERR] Sorry, Redis-trib can't fix this slot yet (work in progress)"
- end
- end
-
- # Check if all the nodes agree about the cluster configuration
- def check_config_consistency
- if !is_config_consistent?
- cluster_error "[ERR] Nodes don't agree about configuration!"
- else
- xputs "[OK] All nodes agree about slots configuration."
- end
- end
-
- def is_config_consistent?
- signatures=[]
- @nodes.each{|n|
- signatures << n.get_config_signature
- }
- return signatures.uniq.length == 1
- end
-
- def wait_cluster_join
- print "Waiting for the cluster to join"
- while !is_config_consistent?
- print "."
- STDOUT.flush
- sleep 1
- end
- print "\n"
- end
-
- def alloc_slots
- nodes_count = @nodes.length
- masters_count = @nodes.length / (@replicas+1)
- masters = []
-
- # The first step is to split instances by IP. This is useful as
- # we'll try to allocate master nodes in different physical machines
- # (as much as possible) and to allocate slaves of a given master in
- # different physical machines as well.
- #
- # This code assumes just that if the IP is different, than it is more
- # likely that the instance is running in a different physical host
- # or at least a different virtual machine.
- ips = {}
- @nodes.each{|n|
- ips[n.info[:host]] = [] if !ips[n.info[:host]]
- ips[n.info[:host]] << n
- }
-
- # Select master instances
- puts "Using #{masters_count} masters:"
- interleaved = []
- stop = false
- while not stop do
- # Take one node from each IP until we run out of nodes
- # across every IP.
- ips.each do |ip,nodes|
- if nodes.empty?
- # if this IP has no remaining nodes, check for termination
- if interleaved.length == nodes_count
- # stop when 'interleaved' has accumulated all nodes
- stop = true
- next
- end
- else
- # else, move one node from this IP to 'interleaved'
- interleaved.push nodes.shift
- end
- end
- end
-
- masters = interleaved.slice!(0, masters_count)
- nodes_count -= masters.length
-
- masters.each{|m| puts m}
-
- # Alloc slots on masters
- slots_per_node = ClusterHashSlots.to_f / masters_count
- first = 0
- cursor = 0.0
- masters.each_with_index{|n,masternum|
- last = (cursor+slots_per_node-1).round
- if last > ClusterHashSlots || masternum == masters.length-1
- last = ClusterHashSlots-1
- end
- last = first if last < first # Min step is 1.
- n.add_slots first..last
- first = last+1
- cursor += slots_per_node
- }
-
- # Select N replicas for every master.
- # We try to split the replicas among all the IPs with spare nodes
- # trying to avoid the host where the master is running, if possible.
- #
- # Note we loop two times. The first loop assigns the requested
- # number of replicas to each master. The second loop assigns any
- # remaining instances as extra replicas to masters. Some masters
- # may end up with more than their requested number of replicas, but
- # all nodes will be used.
- assignment_verbose = false
-
- [:requested,:unused].each do |assign|
- masters.each do |m|
- assigned_replicas = 0
- while assigned_replicas < @replicas
- break if nodes_count == 0
- if assignment_verbose
- if assign == :requested
- puts "Requesting total of #{@replicas} replicas " \
- "(#{assigned_replicas} replicas assigned " \
- "so far with #{nodes_count} total remaining)."
- elsif assign == :unused
- puts "Assigning extra instance to replication " \
- "role too (#{nodes_count} remaining)."
- end
- end
-
- # Return the first node not matching our current master
- node = interleaved.find{|n| n.info[:host] != m.info[:host]}
-
- # If we found a node, use it as a best-first match.
- # Otherwise, we didn't find a node on a different IP, so we
- # go ahead and use a same-IP replica.
- if node
- slave = node
- interleaved.delete node
- else
- slave = interleaved.shift
- end
- slave.set_as_replica(m.info[:name])
- nodes_count -= 1
- assigned_replicas += 1
- puts "Adding replica #{slave} to #{m}"
-
- # If we are in the "assign extra nodes" loop,
- # we want to assign one extra replica to each
- # master before repeating masters.
- # This break lets us assign extra replicas to masters
- # in a round-robin way.
- break if assign == :unused
- end
- end
- end
- end
-
- def flush_nodes_config
- @nodes.each{|n|
- n.flush_node_config
- }
- end
-
- def show_nodes
- @nodes.each{|n|
- xputs n.info_string
- }
- end
-
- # Redis Cluster config epoch collision resolution code is able to eventually
- # set a different epoch to each node after a new cluster is created, but
- # it is slow compared to assign a progressive config epoch to each node
- # before joining the cluster. However we do just a best-effort try here
- # since if we fail is not a problem.
- def assign_config_epoch
- config_epoch = 1
- @nodes.each{|n|
- begin
- n.r.cluster("set-config-epoch",config_epoch)
- rescue
- end
- config_epoch += 1
- }
- end
+COMMANDS = %w(create check info fix reshard rebalance add-node
+ del-node set-timeout call import help)
- def join_cluster
- # We use a brute force approach to make sure the node will meet
- # each other, that is, sending CLUSTER MEET messages to all the nodes
- # about the very same node.
- # Thanks to gossip this information should propagate across all the
- # cluster in a matter of seconds.
- first = false
- @nodes.each{|n|
- if !first then first = n.info; next; end # Skip the first node
- n.r.cluster("meet",first[:host],first[:port])
- }
- end
-
- def yes_or_die(msg)
- print "#{msg} (type 'yes' to accept): "
- STDOUT.flush
- if !(STDIN.gets.chomp.downcase == "yes")
- xputs "*** Aborting..."
- exit 1
- end
- end
-
- def load_cluster_info_from_node(nodeaddr)
- node = ClusterNode.new(nodeaddr)
- node.connect(:abort => true)
- node.assert_cluster
- node.load_info(:getfriends => true)
- add_node(node)
- node.friends.each{|f|
- next if f[:flags].index("noaddr") ||
- f[:flags].index("disconnected") ||
- f[:flags].index("fail")
- fnode = ClusterNode.new(f[:addr])
- fnode.connect()
- next if !fnode.r
- begin
- fnode.load_info()
- add_node(fnode)
- rescue => e
- xputs "[ERR] Unable to load info for node #{fnode}"
- end
- }
- populate_nodes_replicas_info
- end
-
- # This function is called by load_cluster_info_from_node in order to
- # add additional information to every node as a list of replicas.
- def populate_nodes_replicas_info
- # Start adding the new field to every node.
- @nodes.each{|n|
- n.info[:replicas] = []
- }
-
- # Populate the replicas field using the replicate field of slave
- # nodes.
- @nodes.each{|n|
- if n.info[:replicate]
- master = get_node_by_name(n.info[:replicate])
- if !master
- xputs "*** WARNING: #{n} claims to be slave of unknown node ID #{n.info[:replicate]}."
- else
- master.info[:replicas] << n
- end
- end
- }
- end
+ALLOWED_OPTIONS={
+ "create" => {"replicas" => true},
+ "add-node" => {"slave" => false, "master-id" => true},
+ "import" => {"from" => :required, "copy" => false, "replace" => false},
+ "reshard" => {"from" => true, "to" => true, "slots" => true, "yes" => false, "timeout" => true, "pipeline" => true},
+ "rebalance" => {"weight" => [], "auto-weights" => false, "use-empty-masters" => false, "timeout" => true, "simulate" => false, "pipeline" => true, "threshold" => true},
+ "fix" => {"timeout" => 0},
+}
- # Given a list of source nodes return a "resharding plan"
- # with what slots to move in order to move "numslots" slots to another
- # instance.
- def compute_reshard_table(sources,numslots)
- moved = []
- # Sort from bigger to smaller instance, for two reasons:
- # 1) If we take less slots than instances it is better to start
- # getting from the biggest instances.
- # 2) We take one slot more from the first instance in the case of not
- # perfect divisibility. Like we have 3 nodes and need to get 10
- # slots, we take 4 from the first, and 3 from the rest. So the
- # biggest is always the first.
- sources = sources.sort{|a,b| b.slots.length <=> a.slots.length}
- source_tot_slots = sources.inject(0) {|sum,source|
- sum+source.slots.length
- }
- sources.each_with_index{|s,i|
- # Every node will provide a number of slots proportional to the
- # slots it has assigned.
- n = (numslots.to_f/source_tot_slots*s.slots.length)
- if i == 0
- n = n.ceil
+def parse_options(cmd)
+ cmd = cmd.downcase
+ idx = 0
+ options = {}
+ args = []
+ while (arg = ARGV.shift)
+ if arg[0..1] == "--"
+ option = arg[2..-1]
+
+ # --verbose is a global option
+ if option == "--verbose"
+ options['verbose'] = true
+ next
+ end
+ if ALLOWED_OPTIONS[cmd] == nil ||
+ ALLOWED_OPTIONS[cmd][option] == nil
+ next
+ end
+ if ALLOWED_OPTIONS[cmd][option] != false
+ value = ARGV.shift
+ next if !value
else
- n = n.floor
- end
- s.slots.keys.sort[(0...n)].each{|slot|
- if moved.length < numslots
- moved << {:source => s, :slot => slot}
- end
- }
- }
- return moved
- end
-
- def show_reshard_table(table)
- table.each{|e|
- puts " Moving slot #{e[:slot]} from #{e[:source].info[:name]}"
- }
- end
-
- # Move slots between source and target nodes using MIGRATE.
- #
- # Options:
- # :verbose -- Print a dot for every moved key.
- # :fix -- We are moving in the context of a fix. Use REPLACE.
- # :cold -- Move keys without opening / reconfiguring the nodes.
- def move_slot(source,target,slot,o={})
- # We start marking the slot as importing in the destination node,
- # and the slot as migrating in the target host. Note that the order of
- # the operations is important, as otherwise a client may be redirected
- # to the target node that does not yet know it is importing this slot.
- print "Moving slot #{slot} from #{source} to #{target}: "; STDOUT.flush
- if !o[:cold]
- target.r.cluster("setslot",slot,"importing",source.info[:name])
- source.r.cluster("setslot",slot,"migrating",target.info[:name])
- end
- # Migrate all the keys from source to target using the MIGRATE command
- while true
- keys = source.r.cluster("getkeysinslot",slot,10)
- break if keys.length == 0
- keys.each{|key|
- begin
- source.r.client.call(["migrate",target.info[:host],target.info[:port],key,0,15000])
- rescue => e
- if o[:fix] && e.to_s =~ /BUSYKEY/
- xputs "*** Target key #{key} exists. Replace it for FIX."
- source.r.client.call(["migrate",target.info[:host],target.info[:port],key,0,15000,:replace])
- else
- puts ""
- xputs "[ERR] #{e}"
- exit 1
- end
- end
- print "." if o[:verbose]
- STDOUT.flush
- }
- end
-
- puts
- # Set the new node as the owner of the slot in all the known nodes.
- if !o[:cold]
- @nodes.each{|n|
- n.r.cluster("setslot",slot,"node",target.info[:name])
- }
- end
- end
-
- # redis-trib subcommands implementations
-
- def check_cluster_cmd(argv,opt)
- load_cluster_info_from_node(argv[0])
- check_cluster
- end
-
- def fix_cluster_cmd(argv,opt)
- @fix = true
- load_cluster_info_from_node(argv[0])
- check_cluster
- end
-
- def reshard_cluster_cmd(argv,opt)
- load_cluster_info_from_node(argv[0])
- check_cluster
- if @errors.length != 0
- puts "*** Please fix your cluster problems before resharding"
- exit 1
- end
-
- # Get number of slots
- if opt['slots']
- numslots = opt['slots'].to_i
- else
- numslots = 0
- while numslots <= 0 or numslots > ClusterHashSlots
- print "How many slots do you want to move (from 1 to #{ClusterHashSlots})? "
- numslots = STDIN.gets.to_i
- end
- end
-
- # Get the target instance
- if opt['to']
- target = get_node_by_name(opt['to'])
- if !target || target.has_flag?("slave")
- xputs "*** The specified node is not known or not a master, please retry."
- exit 1
+ value = true
end
- else
- target = nil
- while not target
- print "What is the receiving node ID? "
- target = get_node_by_name(STDIN.gets.chop)
- if !target || target.has_flag?("slave")
- xputs "*** The specified node is not known or not a master, please retry."
- target = nil
- end
- end
- end
-
- # Get the source instances
- sources = []
- if opt['from']
- opt['from'].split(',').each{|node_id|
- if node_id == "all"
- sources = "all"
- break
- end
- src = get_node_by_name(node_id)
- if !src || src.has_flag?("slave")
- xputs "*** The specified node is not known or is not a master, please retry."
- exit 1
- end
- sources << src
- }
- else
- xputs "Please enter all the source node IDs."
- xputs " Type 'all' to use all the nodes as source nodes for the hash slots."
- xputs " Type 'done' once you entered all the source nodes IDs."
- while true
- print "Source node ##{sources.length+1}:"
- line = STDIN.gets.chop
- src = get_node_by_name(line)
- if line == "done"
- break
- elsif line == "all"
- sources = "all"
- break
- elsif !src || src.has_flag?("slave")
- xputs "*** The specified node is not known or is not a master, please retry."
- elsif src.info[:name] == target.info[:name]
- xputs "*** It is not possible to use the target node as source node."
- else
- sources << src
- end
- end
- end
-
- if sources.length == 0
- puts "*** No source nodes given, operation aborted"
- exit 1
- end
-
- # Handle soures == all.
- if sources == "all"
- sources = []
- @nodes.each{|n|
- next if n.info[:name] == target.info[:name]
- next if n.has_flag?("slave")
- sources << n
- }
- end
-
- # Check if the destination node is the same of any source nodes.
- if sources.index(target)
- xputs "*** Target node is also listed among the source nodes!"
- exit 1
- end
-
- puts "\nReady to move #{numslots} slots."
- puts " Source nodes:"
- sources.each{|s| puts " "+s.info_string}
- puts " Destination node:"
- puts " #{target.info_string}"
- reshard_table = compute_reshard_table(sources,numslots)
- puts " Resharding plan:"
- show_reshard_table(reshard_table)
- if !opt['yes']
- print "Do you want to proceed with the proposed reshard plan (yes/no)? "
- yesno = STDIN.gets.chop
- exit(1) if (yesno != "yes")
- end
- reshard_table.each{|e|
- move_slot(e[:source],target,e[:slot],:verbose=>true)
- }
- end
-
- # This is an helper function for create_cluster_cmd that verifies if
- # the number of nodes and the specified replicas have a valid configuration
- # where there are at least three master nodes and enough replicas per node.
- def check_create_parameters
- masters = @nodes.length/(@replicas+1)
- if masters < 3
- puts "*** ERROR: Invalid configuration for cluster creation."
- puts "*** Redis Cluster requires at least 3 master nodes."
- puts "*** This is not possible with #{@nodes.length} nodes and #{@replicas} replicas per node."
- puts "*** At least #{3*(@replicas+1)} nodes are required."
- exit 1
- end
- end
-
- def create_cluster_cmd(argv,opt)
- opt = {'replicas' => 0}.merge(opt)
- @replicas = opt['replicas'].to_i
-
- xputs ">>> Creating cluster"
- argv[0..-1].each{|n|
- node = ClusterNode.new(n)
- node.connect(:abort => true)
- node.assert_cluster
- node.load_info
- node.assert_empty
- add_node(node)
- }
- check_create_parameters
- xputs ">>> Performing hash slots allocation on #{@nodes.length} nodes..."
- alloc_slots
- show_nodes
- yes_or_die "Can I set the above configuration?"
- flush_nodes_config
- xputs ">>> Nodes configuration updated"
- xputs ">>> Assign a different config epoch to each node"
- assign_config_epoch
- xputs ">>> Sending CLUSTER MEET messages to join the cluster"
- join_cluster
- # Give one second for the join to start, in order to avoid that
- # wait_cluster_join will find all the nodes agree about the config as
- # they are still empty with unassigned slots.
- sleep 1
- wait_cluster_join
- flush_nodes_config # Useful for the replicas
- check_cluster
- end
-
- def addnode_cluster_cmd(argv,opt)
- xputs ">>> Adding node #{argv[0]} to cluster #{argv[1]}"
- # Check the existing cluster
- load_cluster_info_from_node(argv[1])
- check_cluster
-
- # If --master-id was specified, try to resolve it now so that we
- # abort before starting with the node configuration.
- if opt['slave']
- if opt['master-id']
- master = get_node_by_name(opt['master-id'])
- if !master
- xputs "[ERR] No such master ID #{opt['master-id']}"
- end
+ # If the option is set to [], it's a multiple arguments
+ # option. We just queue every new value into an array.
+ if ALLOWED_OPTIONS[cmd][option] == []
+ options[option] = [] if !options[option]
+ options[option] << value
else
- master = get_master_with_least_replicas
- xputs "Automatically selected master #{master}"
- end
- end
-
- # Add the new node
- new = ClusterNode.new(argv[0])
- new.connect(:abort => true)
- new.assert_cluster
- new.load_info
- new.assert_empty
- first = @nodes.first.info
- add_node(new)
-
- # Send CLUSTER MEET command to the new node
- xputs ">>> Send CLUSTER MEET to node #{new} to make it join the cluster."
- new.r.cluster("meet",first[:host],first[:port])
-
- # Additional configuration is needed if the node is added as
- # a slave.
- if opt['slave']
- wait_cluster_join
- xputs ">>> Configure node as replica of #{master}."
- new.r.cluster("replicate",master.info[:name])
- end
- xputs "[OK] New node added correctly."
- end
-
- def delnode_cluster_cmd(argv,opt)
- id = argv[1].downcase
- xputs ">>> Removing node #{id} from cluster #{argv[0]}"
-
- # Load cluster information
- load_cluster_info_from_node(argv[0])
-
- # Check if the node exists and is not empty
- node = get_node_by_name(id)
-
- if !node
- xputs "[ERR] No such node ID #{id}"
- exit 1
- end
-
- if node.slots.length != 0
- xputs "[ERR] Node #{node} is not empty! Reshard data away and try again."
- exit 1
- end
-
- # Send CLUSTER FORGET to all the nodes but the node to remove
- xputs ">>> Sending CLUSTER FORGET messages to the cluster..."
- @nodes.each{|n|
- next if n == node
- if n.info[:replicate] && n.info[:replicate].downcase == id
- # Reconfigure the slave to replicate with some other node
- master = get_master_with_least_replicas
- xputs ">>> #{n} as replica of #{master}"
- n.r.cluster("replicate",master.info[:name])
- end
- n.r.cluster("forget",argv[1])
- }
-
- # Finally shutdown the node
- xputs ">>> SHUTDOWN the node."
- node.r.shutdown
- end
-
- def set_timeout_cluster_cmd(argv,opt)
- timeout = argv[1].to_i
- if timeout < 100
- puts "Setting a node timeout of less than 100 milliseconds is a bad idea."
- exit 1
- end
-
- # Load cluster information
- load_cluster_info_from_node(argv[0])
- ok_count = 0
- err_count = 0
-
- # Send CLUSTER FORGET to all the nodes but the node to remove
- xputs ">>> Reconfiguring node timeout in every cluster node..."
- @nodes.each{|n|
- begin
- n.r.config("set","cluster-node-timeout",timeout)
- n.r.config("rewrite")
- ok_count += 1
- xputs "*** New timeout set for #{n}"
- rescue => e
- puts "ERR setting node-timeot for #{n}: #{e}"
- err_count += 1
- end
- }
- xputs ">>> New node timeout set. #{ok_count} OK, #{err_count} ERR."
- end
-
- def call_cluster_cmd(argv,opt)
- cmd = argv[1..-1]
- cmd[0] = cmd[0].upcase
-
- # Load cluster information
- load_cluster_info_from_node(argv[0])
- xputs ">>> Calling #{cmd.join(" ")}"
- @nodes.each{|n|
- begin
- res = n.r.send(*cmd)
- puts "#{n}: #{res}"
- rescue => e
- puts "#{n}: #{e}"
- end
- }
- end
-
- def import_cluster_cmd(argv,opt)
- source_addr = opt['from']
- xputs ">>> Importing data from #{source_addr} to cluster #{argv[1]}"
-
- # Check the existing cluster.
- load_cluster_info_from_node(argv[0])
- check_cluster
-
- # Connect to the source node.
- xputs ">>> Connecting to the source Redis instance"
- src_host,src_port = source_addr.split(":")
- source = Redis.new(:host =>src_host, :port =>src_port)
- if source.info['cluster_enabled'].to_i == 1
- xputs "[ERR] The source node should not be a cluster node."
- end
- xputs "*** Importing #{source.dbsize} keys from DB 0"
-
- # Build a slot -> node map
- slots = {}
- @nodes.each{|n|
- n.slots.each{|s,_|
- slots[s] = n
- }
- }
-
- # Use SCAN to iterate over the keys, migrating to the
- # right node as needed.
- cursor = nil
- while cursor != 0
- cursor,keys = source.scan(cursor, :count => 1000)
- cursor = cursor.to_i
- keys.each{|k|
- # Migrate keys using the MIGRATE command.
- slot = key_to_slot(k)
- target = slots[slot]
- print "Migrating #{k} to #{target}: "
- STDOUT.flush
- begin
- source.client.call(["migrate",target.info[:host],target.info[:port],k,0,15000])
- rescue => e
- puts e
- else
- puts "OK"
- end
- }
- end
- end
-
- def help_cluster_cmd(argv,opt)
- show_help
- exit 0
- end
-
- # Parse the options for the specific command "cmd".
- # Returns an hash populate with option => value pairs, and the index of
- # the first non-option argument in ARGV.
- def parse_options(cmd)
- idx = 1 ; # Current index into ARGV
- options={}
- while idx < ARGV.length && ARGV[idx][0..1] == '--'
- if ARGV[idx][0..1] == "--"
- option = ARGV[idx][2..-1]
- idx += 1
- if ALLOWED_OPTIONS[cmd] == nil || ALLOWED_OPTIONS[cmd][option] == nil
- puts "Unknown option '#{option}' for command '#{cmd}'"
- exit 1
- end
- if ALLOWED_OPTIONS[cmd][option]
- value = ARGV[idx]
- idx += 1
- else
- value = true
- end
options[option] = value
- else
- # Remaining arguments are not options.
- break
end
+ else
+ next if arg[0,1] == '-'
+ args << arg
end
-
- # Enforce mandatory options
- if ALLOWED_OPTIONS[cmd]
- ALLOWED_OPTIONS[cmd].each {|option,val|
- if !options[option] && val == :required
- puts "Option '--#{option}' is required "+ \
- "for subcommand '#{cmd}'"
- exit 1
- end
- }
- end
- return options,idx
end
-end
-#################################################################################
-# Libraries
-#
-# We try to don't depend on external libs since this is a critical part
-# of Redis Cluster.
-#################################################################################
-
-# This is the CRC16 algorithm used by Redis Cluster to hash keys.
-# Implementation according to CCITT standards.
-#
-# This is actually the XMODEM CRC 16 algorithm, using the
-# following parameters:
-#
-# Name : "XMODEM", also known as "ZMODEM", "CRC-16/ACORN"
-# Width : 16 bit
-# Poly : 1021 (That is actually x^16 + x^12 + x^5 + 1)
-# Initialization : 0000
-# Reflect Input byte : False
-# Reflect Output CRC : False
-# Xor constant to output CRC : 0000
-# Output for "123456789" : 31C3
-
-module RedisClusterCRC16
- def RedisClusterCRC16.crc16(bytes)
- crc = 0
- bytes.each_byte{|b|
- crc = ((crc<<8) & 0xffff) ^ XMODEMCRC16Lookup[((crc>>8)^b) & 0xff]
- }
- crc
- end
-
-private
- XMODEMCRC16Lookup = [
- 0x0000,0x1021,0x2042,0x3063,0x4084,0x50a5,0x60c6,0x70e7,
- 0x8108,0x9129,0xa14a,0xb16b,0xc18c,0xd1ad,0xe1ce,0xf1ef,
- 0x1231,0x0210,0x3273,0x2252,0x52b5,0x4294,0x72f7,0x62d6,
- 0x9339,0x8318,0xb37b,0xa35a,0xd3bd,0xc39c,0xf3ff,0xe3de,
- 0x2462,0x3443,0x0420,0x1401,0x64e6,0x74c7,0x44a4,0x5485,
- 0xa56a,0xb54b,0x8528,0x9509,0xe5ee,0xf5cf,0xc5ac,0xd58d,
- 0x3653,0x2672,0x1611,0x0630,0x76d7,0x66f6,0x5695,0x46b4,
- 0xb75b,0xa77a,0x9719,0x8738,0xf7df,0xe7fe,0xd79d,0xc7bc,
- 0x48c4,0x58e5,0x6886,0x78a7,0x0840,0x1861,0x2802,0x3823,
- 0xc9cc,0xd9ed,0xe98e,0xf9af,0x8948,0x9969,0xa90a,0xb92b,
- 0x5af5,0x4ad4,0x7ab7,0x6a96,0x1a71,0x0a50,0x3a33,0x2a12,
- 0xdbfd,0xcbdc,0xfbbf,0xeb9e,0x9b79,0x8b58,0xbb3b,0xab1a,
- 0x6ca6,0x7c87,0x4ce4,0x5cc5,0x2c22,0x3c03,0x0c60,0x1c41,
- 0xedae,0xfd8f,0xcdec,0xddcd,0xad2a,0xbd0b,0x8d68,0x9d49,
- 0x7e97,0x6eb6,0x5ed5,0x4ef4,0x3e13,0x2e32,0x1e51,0x0e70,
- 0xff9f,0xefbe,0xdfdd,0xcffc,0xbf1b,0xaf3a,0x9f59,0x8f78,
- 0x9188,0x81a9,0xb1ca,0xa1eb,0xd10c,0xc12d,0xf14e,0xe16f,
- 0x1080,0x00a1,0x30c2,0x20e3,0x5004,0x4025,0x7046,0x6067,
- 0x83b9,0x9398,0xa3fb,0xb3da,0xc33d,0xd31c,0xe37f,0xf35e,
- 0x02b1,0x1290,0x22f3,0x32d2,0x4235,0x5214,0x6277,0x7256,
- 0xb5ea,0xa5cb,0x95a8,0x8589,0xf56e,0xe54f,0xd52c,0xc50d,
- 0x34e2,0x24c3,0x14a0,0x0481,0x7466,0x6447,0x5424,0x4405,
- 0xa7db,0xb7fa,0x8799,0x97b8,0xe75f,0xf77e,0xc71d,0xd73c,
- 0x26d3,0x36f2,0x0691,0x16b0,0x6657,0x7676,0x4615,0x5634,
- 0xd94c,0xc96d,0xf90e,0xe92f,0x99c8,0x89e9,0xb98a,0xa9ab,
- 0x5844,0x4865,0x7806,0x6827,0x18c0,0x08e1,0x3882,0x28a3,
- 0xcb7d,0xdb5c,0xeb3f,0xfb1e,0x8bf9,0x9bd8,0xabbb,0xbb9a,
- 0x4a75,0x5a54,0x6a37,0x7a16,0x0af1,0x1ad0,0x2ab3,0x3a92,
- 0xfd2e,0xed0f,0xdd6c,0xcd4d,0xbdaa,0xad8b,0x9de8,0x8dc9,
- 0x7c26,0x6c07,0x5c64,0x4c45,0x3ca2,0x2c83,0x1ce0,0x0cc1,
- 0xef1f,0xff3e,0xcf5d,0xdf7c,0xaf9b,0xbfba,0x8fd9,0x9ff8,
- 0x6e17,0x7e36,0x4e55,0x5e74,0x2e93,0x3eb2,0x0ed1,0x1ef0
- ]
-end
-
-# Turn a key name into the corrisponding Redis Cluster slot.
-def key_to_slot(key)
- # Only hash what is inside {...} if there is such a pattern in the key.
- # Note that the specification requires the content that is between
- # the first { and the first } after the first {. If we found {} without
- # nothing in the middle, the whole key is hashed as usually.
- s = key.index "{"
- if s
- e = key.index "}",s+1
- if e && e != s+1
- key = key[s+1..e-1]
- end
- end
- RedisClusterCRC16.crc16(key) % 16384
+ return options,args
end
-#################################################################################
-# Definition of commands
-#################################################################################
-
-COMMANDS={
- "create" => ["create_cluster_cmd", -2, "host1:port1 ... hostN:portN"],
- "check" => ["check_cluster_cmd", 2, "host:port"],
- "fix" => ["fix_cluster_cmd", 2, "host:port"],
- "reshard" => ["reshard_cluster_cmd", 2, "host:port"],
- "add-node" => ["addnode_cluster_cmd", 3, "new_host:new_port existing_host:existing_port"],
- "del-node" => ["delnode_cluster_cmd", 3, "host:port node_id"],
- "set-timeout" => ["set_timeout_cluster_cmd", 3, "host:port milliseconds"],
- "call" => ["call_cluster_cmd", -3, "host:port command arg arg .. arg"],
- "import" => ["import_cluster_cmd", 2, "host:port"],
- "help" => ["help_cluster_cmd", 1, "(show this help)"]
-}
-
-ALLOWED_OPTIONS={
- "create" => {"replicas" => true},
- "add-node" => {"slave" => false, "master-id" => true},
- "import" => {"from" => :required},
- "reshard" => {"from" => true, "to" => true, "slots" => true, "yes" => false}
-}
-
-def show_help
- puts "Usage: redis-trib <command> <options> <arguments ...>\n\n"
- COMMANDS.each{|k,v|
- o = ""
- puts " #{k.ljust(15)} #{v[2]}"
- if ALLOWED_OPTIONS[k]
- ALLOWED_OPTIONS[k].each{|optname,has_arg|
- puts " --#{optname}" + (has_arg ? " <arg>" : "")
- }
+def command_example(cmd, args, opts)
+ cmd = "redis-cli --cluster #{cmd}"
+ args.each{|a|
+ a = a.to_s
+ a = a.inspect if a[' ']
+ cmd << " #{a}"
+ }
+ opts.each{|opt, val|
+ opt = " --cluster-#{opt.downcase}"
+ if val != true
+ val = val.join(' ') if val.is_a? Array
+ opt << " #{val}"
end
+ cmd << opt
}
- puts "\nFor check, fix, reshard, del-node, set-timeout you can specify the host and port of any working node in the cluster.\n"
+ cmd
end
-# Sanity check
-if ARGV.length == 0
- show_help
- exit 1
+$command = ARGV.shift
+$opts, $args = parse_options($command) if $command
+
+puts "WARNING: redis-trib.rb is not longer available!".yellow
+puts "You should use #{'redis-cli'.bold} instead."
+puts ''
+puts "All commands and features belonging to redis-trib.rb "+
+ "have been moved\nto redis-cli."
+puts "In order to use them you should call redis-cli with the #{'--cluster'.bold}"
+puts "option followed by the subcommand name, arguments and options."
+puts ''
+puts "Use the following syntax:"
+puts "redis-cli --cluster SUBCOMMAND [ARGUMENTS] [OPTIONS]".bold
+puts ''
+puts "Example:"
+if $command
+ example = command_example $command, $args, $opts
+else
+ example = "redis-cli --cluster info 127.0.0.1:7000"
end
-
-rt = RedisTrib.new
-cmd_spec = COMMANDS[ARGV[0].downcase]
-if !cmd_spec
- puts "Unknown redis-trib subcommand '#{ARGV[0]}'"
- exit 1
-end
-
-# Parse options
-cmd_options,first_non_option = rt.parse_options(ARGV[0].downcase)
-rt.check_arity(cmd_spec[1],ARGV.length-(first_non_option-1))
-
-# Dispatch
-rt.send(cmd_spec[0],ARGV[first_non_option..-1],cmd_options)
+puts example.bold
+puts ''
+puts "To get help about all subcommands, type:"
+puts "redis-cli --cluster help".bold
+puts ''
+exit 1
diff --git a/src/redis.h b/src/redis.h
deleted file mode 100644
index 0c191d06f..000000000
--- a/src/redis.h
+++ /dev/null
@@ -1,1583 +0,0 @@
-/*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Redis nor the names of its contributors may be used
- * to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __REDIS_H
-#define __REDIS_H
-
-#include "fmacros.h"
-#include "config.h"
-#include "solarisfixes.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <limits.h>
-#include <unistd.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <syslog.h>
-#include <netinet/in.h>
-#include <lua.h>
-#include <signal.h>
-
-typedef long long mstime_t; /* millisecond time type. */
-
-#include "ae.h" /* Event driven programming library */
-#include "sds.h" /* Dynamic safe strings */
-#include "dict.h" /* Hash tables */
-#include "adlist.h" /* Linked lists */
-#include "zmalloc.h" /* total memory usage aware version of malloc/free */
-#include "anet.h" /* Networking the easy way */
-#include "ziplist.h" /* Compact list data structure */
-#include "intset.h" /* Compact integer set structure */
-#include "version.h" /* Version macro */
-#include "util.h" /* Misc functions useful in many places */
-#include "latency.h" /* Latency monitor API */
-#include "sparkline.h" /* ASII graphs API */
-#include "quicklist.h"
-
-/* Following includes allow test functions to be called from Redis main() */
-#include "zipmap.h"
-#include "sha1.h"
-#include "endianconv.h"
-#include "crc64.h"
-
-/* Error codes */
-#define REDIS_OK 0
-#define REDIS_ERR -1
-
-/* Static server configuration */
-#define REDIS_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
-#define REDIS_MIN_HZ 1
-#define REDIS_MAX_HZ 500
-#define REDIS_SERVERPORT 6379 /* TCP port */
-#define REDIS_TCP_BACKLOG 511 /* TCP listen backlog */
-#define REDIS_MAXIDLETIME 0 /* default client timeout: infinite */
-#define REDIS_DEFAULT_DBNUM 16
-#define REDIS_CONFIGLINE_MAX 1024
-#define REDIS_DBCRON_DBS_PER_CALL 16
-#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
-#define REDIS_SHARED_SELECT_CMDS 10
-#define REDIS_SHARED_INTEGERS 10000
-#define REDIS_SHARED_BULKHDR_LEN 32
-#define REDIS_MAX_LOGMSG_LEN 1024 /* Default maximum length of syslog messages */
-#define REDIS_AOF_REWRITE_PERC 100
-#define REDIS_AOF_REWRITE_MIN_SIZE (64*1024*1024)
-#define REDIS_AOF_REWRITE_ITEMS_PER_CMD 64
-#define REDIS_SLOWLOG_LOG_SLOWER_THAN 10000
-#define REDIS_SLOWLOG_MAX_LEN 128
-#define REDIS_MAX_CLIENTS 10000
-#define REDIS_AUTHPASS_MAX_LEN 512
-#define REDIS_DEFAULT_SLAVE_PRIORITY 100
-#define REDIS_REPL_TIMEOUT 60
-#define REDIS_REPL_PING_SLAVE_PERIOD 10
-#define REDIS_RUN_ID_SIZE 40
-#define REDIS_EOF_MARK_SIZE 40
-#define REDIS_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
-#define REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
-#define REDIS_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
-#define REDIS_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
-#define REDIS_DEFAULT_PID_FILE "/var/run/redis.pid"
-#define REDIS_DEFAULT_SYSLOG_IDENT "redis"
-#define REDIS_DEFAULT_CLUSTER_CONFIG_FILE "nodes.conf"
-#define REDIS_DEFAULT_DAEMONIZE 0
-#define REDIS_DEFAULT_UNIX_SOCKET_PERM 0
-#define REDIS_DEFAULT_TCP_KEEPALIVE 0
-#define REDIS_DEFAULT_LOGFILE ""
-#define REDIS_DEFAULT_SYSLOG_ENABLED 0
-#define REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR 1
-#define REDIS_DEFAULT_RDB_COMPRESSION 1
-#define REDIS_DEFAULT_RDB_CHECKSUM 1
-#define REDIS_DEFAULT_RDB_FILENAME "dump.rdb"
-#define REDIS_DEFAULT_REPL_DISKLESS_SYNC 0
-#define REDIS_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5
-#define REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA 1
-#define REDIS_DEFAULT_SLAVE_READ_ONLY 1
-#define REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY 0
-#define REDIS_DEFAULT_MAXMEMORY 0
-#define REDIS_DEFAULT_MAXMEMORY_SAMPLES 5
-#define REDIS_DEFAULT_AOF_FILENAME "appendonly.aof"
-#define REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0
-#define REDIS_DEFAULT_AOF_LOAD_TRUNCATED 1
-#define REDIS_DEFAULT_ACTIVE_REHASHING 1
-#define REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
-#define REDIS_DEFAULT_MIN_SLAVES_TO_WRITE 0
-#define REDIS_DEFAULT_MIN_SLAVES_MAX_LAG 10
-#define REDIS_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
-#define REDIS_PEER_ID_LEN (REDIS_IP_STR_LEN+32) /* Must be enough for ip:port */
-#define REDIS_BINDADDR_MAX 16
-#define REDIS_MIN_RESERVED_FDS 32
-#define REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD 0
-
-#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
-#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
-#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
-#define ACTIVE_EXPIRE_CYCLE_SLOW 0
-#define ACTIVE_EXPIRE_CYCLE_FAST 1
-
-/* Instantaneous metrics tracking. */
-#define REDIS_METRIC_SAMPLES 16 /* Number of samples per metric. */
-#define REDIS_METRIC_COMMAND 0 /* Number of commands executed. */
-#define REDIS_METRIC_NET_INPUT 1 /* Bytes read to network .*/
-#define REDIS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
-#define REDIS_METRIC_COUNT 3
-
-/* Protocol and I/O related defines */
-#define REDIS_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
-#define REDIS_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
-#define REDIS_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
-#define REDIS_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
-#define REDIS_MBULK_BIG_ARG (1024*32)
-#define REDIS_LONGSTR_SIZE 21 /* Bytes needed for long -> str */
-#define REDIS_AOF_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
-/* When configuring the Redis eventloop, we setup it so that the total number
- * of file descriptors we can handle are server.maxclients + RESERVED_FDS + FDSET_INCR
- * that is our safety margin. */
-#define REDIS_EVENTLOOP_FDSET_INCR (REDIS_MIN_RESERVED_FDS+96)
-
-/* Hash table parameters */
-#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
-
-/* Command flags. Please check the command table defined in the redis.c file
- * for more information about the meaning of every flag. */
-#define REDIS_CMD_WRITE 1 /* "w" flag */
-#define REDIS_CMD_READONLY 2 /* "r" flag */
-#define REDIS_CMD_DENYOOM 4 /* "m" flag */
-#define REDIS_CMD_NOT_USED_1 8 /* no longer used flag */
-#define REDIS_CMD_ADMIN 16 /* "a" flag */
-#define REDIS_CMD_PUBSUB 32 /* "p" flag */
-#define REDIS_CMD_NOSCRIPT 64 /* "s" flag */
-#define REDIS_CMD_RANDOM 128 /* "R" flag */
-#define REDIS_CMD_SORT_FOR_SCRIPT 256 /* "S" flag */
-#define REDIS_CMD_LOADING 512 /* "l" flag */
-#define REDIS_CMD_STALE 1024 /* "t" flag */
-#define REDIS_CMD_SKIP_MONITOR 2048 /* "M" flag */
-#define REDIS_CMD_ASKING 4096 /* "k" flag */
-#define REDIS_CMD_FAST 8192 /* "F" flag */
-
-/* Object types */
-#define REDIS_STRING 0
-#define REDIS_LIST 1
-#define REDIS_SET 2
-#define REDIS_ZSET 3
-#define REDIS_HASH 4
-
-/* Objects encoding. Some kind of objects like Strings and Hashes can be
- * internally represented in multiple ways. The 'encoding' field of the object
- * is set to one of this fields for this object. */
-#define REDIS_ENCODING_RAW 0 /* Raw representation */
-#define REDIS_ENCODING_INT 1 /* Encoded as integer */
-#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
-#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
-#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
-#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
-#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
-#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
-#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
-#define REDIS_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
-
-/* Defines related to the dump file format. To store 32 bits lengths for short
- * keys requires a lot of space, so we check the most significant 2 bits of
- * the first byte to interpreter the length:
- *
- * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
- * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
- * 10|000000 [32 bit integer] => if it's 10, a full 32 bit len will follow
- * 11|000000 this means: specially encoded object will follow. The six bits
- * number specify the kind of object that follows.
- * See the REDIS_RDB_ENC_* defines.
- *
- * Lengths up to 63 are stored using a single byte, most DB keys, and may
- * values, will fit inside. */
-#define REDIS_RDB_6BITLEN 0
-#define REDIS_RDB_14BITLEN 1
-#define REDIS_RDB_32BITLEN 2
-#define REDIS_RDB_ENCVAL 3
-#define REDIS_RDB_LENERR UINT_MAX
-
-/* When a length of a string object stored on disk has the first two bits
- * set, the remaining two bits specify a special encoding for the object
- * accordingly to the following defines: */
-#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
-#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
-#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
-#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
-
-/* AOF states */
-#define REDIS_AOF_OFF 0 /* AOF is off */
-#define REDIS_AOF_ON 1 /* AOF is on */
-#define REDIS_AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
-
-/* Client flags */
-#define REDIS_SLAVE (1<<0) /* This client is a slave server */
-#define REDIS_MASTER (1<<1) /* This client is a master server */
-#define REDIS_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
-#define REDIS_MULTI (1<<3) /* This client is in a MULTI context */
-#define REDIS_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
-#define REDIS_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
-#define REDIS_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
-#define REDIS_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
- server.unblocked_clients */
-#define REDIS_LUA_CLIENT (1<<8) /* This is a non connected client used by Lua */
-#define REDIS_ASKING (1<<9) /* Client issued the ASKING command */
-#define REDIS_CLOSE_ASAP (1<<10)/* Close this client ASAP */
-#define REDIS_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
-#define REDIS_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
-#define REDIS_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
-#define REDIS_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
-#define REDIS_FORCE_REPL (1<<15) /* Force replication of current cmd. */
-#define REDIS_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
-#define REDIS_READONLY (1<<17) /* Cluster client is in read-only state. */
-#define REDIS_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
-
-/* Client block type (btype field in client structure)
- * if REDIS_BLOCKED flag is set. */
-#define REDIS_BLOCKED_NONE 0 /* Not blocked, no REDIS_BLOCKED flag set. */
-#define REDIS_BLOCKED_LIST 1 /* BLPOP & co. */
-#define REDIS_BLOCKED_WAIT 2 /* WAIT for synchronous replication. */
-
-/* Client request types */
-#define REDIS_REQ_INLINE 1
-#define REDIS_REQ_MULTIBULK 2
-
-/* Client classes for client limits, currently used only for
- * the max-client-output-buffer limit implementation. */
-#define REDIS_CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
-#define REDIS_CLIENT_TYPE_SLAVE 1 /* Slaves. */
-#define REDIS_CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
-#define REDIS_CLIENT_TYPE_COUNT 3
-
-/* Slave replication state - from the point of view of the slave. */
-#define REDIS_REPL_NONE 0 /* No active replication */
-#define REDIS_REPL_CONNECT 1 /* Must connect to master */
-#define REDIS_REPL_CONNECTING 2 /* Connecting to master */
-#define REDIS_REPL_RECEIVE_PONG 3 /* Wait for PING reply */
-#define REDIS_REPL_TRANSFER 4 /* Receiving .rdb from master */
-#define REDIS_REPL_CONNECTED 5 /* Connected to master */
-
-/* Slave replication state - from the point of view of the master.
- * In SEND_BULK and ONLINE state the slave receives new updates
- * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
- * to start the next background saving in order to send updates to it. */
-#define REDIS_REPL_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
-#define REDIS_REPL_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
-#define REDIS_REPL_SEND_BULK 8 /* Sending RDB file to slave. */
-#define REDIS_REPL_ONLINE 9 /* RDB file transmitted, sending just updates. */
-
-/* Synchronous read timeout - slave side */
-#define REDIS_REPL_SYNCIO_TIMEOUT 5
-
-/* List related stuff */
-#define REDIS_HEAD 0
-#define REDIS_TAIL 1
-
-/* Sort operations */
-#define REDIS_SORT_GET 0
-#define REDIS_SORT_ASC 1
-#define REDIS_SORT_DESC 2
-#define REDIS_SORTKEY_MAX 1024
-
-/* Log levels */
-#define REDIS_DEBUG 0
-#define REDIS_VERBOSE 1
-#define REDIS_NOTICE 2
-#define REDIS_WARNING 3
-#define REDIS_LOG_RAW (1<<10) /* Modifier to log without timestamp */
-#define REDIS_DEFAULT_VERBOSITY REDIS_NOTICE
-
-/* Supervision options */
-#define REDIS_SUPERVISED_NONE 0
-#define REDIS_SUPERVISED_AUTODETECT 1
-#define REDIS_SUPERVISED_SYSTEMD 2
-#define REDIS_SUPERVISED_UPSTART 3
-
-/* Anti-warning macro... */
-#define REDIS_NOTUSED(V) ((void) V)
-
-#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
-#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
-
-/* Append only defines */
-#define AOF_FSYNC_NO 0
-#define AOF_FSYNC_ALWAYS 1
-#define AOF_FSYNC_EVERYSEC 2
-#define REDIS_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
-
-/* Zip structure related defaults */
-#define REDIS_HASH_MAX_ZIPLIST_ENTRIES 512
-#define REDIS_HASH_MAX_ZIPLIST_VALUE 64
-#define REDIS_SET_MAX_INTSET_ENTRIES 512
-#define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128
-#define REDIS_ZSET_MAX_ZIPLIST_VALUE 64
-
-/* List defaults */
-#define REDIS_LIST_MAX_ZIPLIST_SIZE -2
-#define REDIS_LIST_COMPRESS_DEPTH 0
-
-/* HyperLogLog defines */
-#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
-
-/* Sets operations codes */
-#define REDIS_OP_UNION 0
-#define REDIS_OP_DIFF 1
-#define REDIS_OP_INTER 2
-
-/* Redis maxmemory strategies */
-#define REDIS_MAXMEMORY_VOLATILE_LRU 0
-#define REDIS_MAXMEMORY_VOLATILE_TTL 1
-#define REDIS_MAXMEMORY_VOLATILE_RANDOM 2
-#define REDIS_MAXMEMORY_ALLKEYS_LRU 3
-#define REDIS_MAXMEMORY_ALLKEYS_RANDOM 4
-#define REDIS_MAXMEMORY_NO_EVICTION 5
-#define REDIS_DEFAULT_MAXMEMORY_POLICY REDIS_MAXMEMORY_NO_EVICTION
-
-/* Scripting */
-#define REDIS_LUA_TIME_LIMIT 5000 /* milliseconds */
-
-/* Units */
-#define UNIT_SECONDS 0
-#define UNIT_MILLISECONDS 1
-
-/* SHUTDOWN flags */
-#define REDIS_SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
- points are configured. */
-#define REDIS_SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
-
-/* Command call flags, see call() function */
-#define REDIS_CALL_NONE 0
-#define REDIS_CALL_SLOWLOG 1
-#define REDIS_CALL_STATS 2
-#define REDIS_CALL_PROPAGATE 4
-#define REDIS_CALL_FULL (REDIS_CALL_SLOWLOG | REDIS_CALL_STATS | REDIS_CALL_PROPAGATE)
-
-/* Command propagation flags, see propagate() function */
-#define REDIS_PROPAGATE_NONE 0
-#define REDIS_PROPAGATE_AOF 1
-#define REDIS_PROPAGATE_REPL 2
-
-/* RDB active child save type. */
-#define REDIS_RDB_CHILD_TYPE_NONE 0
-#define REDIS_RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
-#define REDIS_RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
-
-/* Keyspace changes notification classes. Every class is associated with a
- * character for configuration purposes. */
-#define REDIS_NOTIFY_KEYSPACE (1<<0) /* K */
-#define REDIS_NOTIFY_KEYEVENT (1<<1) /* E */
-#define REDIS_NOTIFY_GENERIC (1<<2) /* g */
-#define REDIS_NOTIFY_STRING (1<<3) /* $ */
-#define REDIS_NOTIFY_LIST (1<<4) /* l */
-#define REDIS_NOTIFY_SET (1<<5) /* s */
-#define REDIS_NOTIFY_HASH (1<<6) /* h */
-#define REDIS_NOTIFY_ZSET (1<<7) /* z */
-#define REDIS_NOTIFY_EXPIRED (1<<8) /* x */
-#define REDIS_NOTIFY_EVICTED (1<<9) /* e */
-#define REDIS_NOTIFY_ALL (REDIS_NOTIFY_GENERIC | REDIS_NOTIFY_STRING | REDIS_NOTIFY_LIST | REDIS_NOTIFY_SET | REDIS_NOTIFY_HASH | REDIS_NOTIFY_ZSET | REDIS_NOTIFY_EXPIRED | REDIS_NOTIFY_EVICTED) /* A */
-
-/* Get the first bind addr or NULL */
-#define REDIS_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL)
-
-/* Using the following macro you can run code inside serverCron() with the
- * specified period, specified in milliseconds.
- * The actual resolution depends on server.hz. */
-#define run_with_period(_ms_) if ((_ms_ <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
-
-/* We can print the stacktrace, so our assert is defined this way: */
-#define redisAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_redisAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
-#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
-#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
-
-/*-----------------------------------------------------------------------------
- * Data types
- *----------------------------------------------------------------------------*/
-
-/* A redis object, that is a type able to hold a string / list / set */
-
-/* The actual Redis Object */
-#define REDIS_LRU_BITS 24
-#define REDIS_LRU_CLOCK_MAX ((1<<REDIS_LRU_BITS)-1) /* Max value of obj->lru */
-#define REDIS_LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
-typedef struct redisObject {
- unsigned type:4;
- unsigned encoding:4;
- unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock) */
- int refcount;
- void *ptr;
-} robj;
-
-/* Macro used to obtain the current LRU clock.
- * If the current resolution is lower than the frequency we refresh the
- * LRU clock (as it should be in production servers) we return the
- * precomputed value, otherwise we need to resort to a function call. */
-#define LRU_CLOCK() ((1000/server.hz <= REDIS_LRU_CLOCK_RESOLUTION) ? server.lruclock : getLRUClock())
-
-/* Macro used to initialize a Redis object allocated on the stack.
- * Note that this macro is taken near the structure definition to make sure
- * we'll update it when the structure is changed, to avoid bugs like
- * bug #85 introduced exactly in this way. */
-#define initStaticStringObject(_var,_ptr) do { \
- _var.refcount = 1; \
- _var.type = REDIS_STRING; \
- _var.encoding = REDIS_ENCODING_RAW; \
- _var.ptr = _ptr; \
-} while(0);
-
-/* To improve the quality of the LRU approximation we take a set of keys
- * that are good candidate for eviction across freeMemoryIfNeeded() calls.
- *
- * Entries inside the eviciton pool are taken ordered by idle time, putting
- * greater idle times to the right (ascending order).
- *
- * Empty entries have the key pointer set to NULL. */
-#define REDIS_EVICTION_POOL_SIZE 16
-struct evictionPoolEntry {
- unsigned long long idle; /* Object idle time. */
- sds key; /* Key name. */
-};
-
-/* Redis database representation. There are multiple databases identified
- * by integers from 0 (the default database) up to the max configured
- * database. The database number is the 'id' field in the structure. */
-typedef struct redisDb {
- dict *dict; /* The keyspace for this DB */
- dict *expires; /* Timeout of keys with a timeout set */
- dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
- dict *ready_keys; /* Blocked keys that received a PUSH */
- dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
- struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
- int id; /* Database ID */
- long long avg_ttl; /* Average TTL, just for stats */
-} redisDb;
-
-/* Client MULTI/EXEC state */
-typedef struct multiCmd {
- robj **argv;
- int argc;
- struct redisCommand *cmd;
-} multiCmd;
-
-typedef struct multiState {
- multiCmd *commands; /* Array of MULTI commands */
- int count; /* Total number of MULTI commands */
- int minreplicas; /* MINREPLICAS for synchronous replication */
- time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
-} multiState;
-
-/* This structure holds the blocking operation state for a client.
- * The fields used depend on client->btype. */
-typedef struct blockingState {
- /* Generic fields. */
- mstime_t timeout; /* Blocking operation timeout. If UNIX current time
- * is > timeout then the operation timed out. */
-
- /* REDIS_BLOCK_LIST */
- dict *keys; /* The keys we are waiting to terminate a blocking
- * operation such as BLPOP. Otherwise NULL. */
- robj *target; /* The key that should receive the element,
- * for BRPOPLPUSH. */
-
- /* REDIS_BLOCK_WAIT */
- int numreplicas; /* Number of replicas we are waiting for ACK. */
- long long reploffset; /* Replication offset to reach. */
-} blockingState;
-
-/* The following structure represents a node in the server.ready_keys list,
- * where we accumulate all the keys that had clients blocked with a blocking
- * operation such as B[LR]POP, but received new data in the context of the
- * last executed command.
- *
- * After the execution of every command or script, we run this list to check
- * if as a result we should serve data to clients blocked, unblocking them.
- * Note that server.ready_keys will not have duplicates as there dictionary
- * also called ready_keys in every structure representing a Redis database,
- * where we make sure to remember if a given key was already added in the
- * server.ready_keys list. */
-typedef struct readyList {
- redisDb *db;
- robj *key;
-} readyList;
-
-/* With multiplexing we need to take per-client state.
- * Clients are taken in a linked list. */
-typedef struct redisClient {
- uint64_t id; /* Client incremental unique ID. */
- int fd;
- redisDb *db;
- int dictid;
- robj *name; /* As set by CLIENT SETNAME */
- sds querybuf;
- size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size */
- int argc;
- robj **argv;
- struct redisCommand *cmd, *lastcmd;
- int reqtype;
- int multibulklen; /* number of multi bulk arguments left to read */
- long bulklen; /* length of bulk argument in multi bulk request */
- list *reply;
- unsigned long reply_bytes; /* Tot bytes of objects in reply list */
- int sentlen; /* Amount of bytes already sent in the current
- buffer or object being sent. */
- time_t ctime; /* Client creation time */
- time_t lastinteraction; /* time of the last interaction, used for timeout */
- time_t obuf_soft_limit_reached_time;
- int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
- int authenticated; /* when requirepass is non-NULL */
- int replstate; /* replication state if this is a slave */
- int repl_put_online_on_ack; /* Install slave write handler on ACK. */
- int repldbfd; /* replication DB file descriptor */
- off_t repldboff; /* replication DB file offset */
- off_t repldbsize; /* replication DB file size */
- sds replpreamble; /* replication DB preamble. */
- long long reploff; /* replication offset if this is our master */
- long long repl_ack_off; /* replication ack offset, if this is a slave */
- long long repl_ack_time;/* replication ack time, if this is a slave */
- char replrunid[REDIS_RUN_ID_SIZE+1]; /* master run id if this is a master */
- int slave_listening_port; /* As configured with: SLAVECONF listening-port */
- multiState mstate; /* MULTI/EXEC state */
- int btype; /* Type of blocking op if REDIS_BLOCKED. */
- blockingState bpop; /* blocking state */
- long long woff; /* Last write global replication offset. */
- list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
- dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
- list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
- sds peerid; /* Cached peer ID. */
-
- /* Response buffer */
- int bufpos;
- char buf[REDIS_REPLY_CHUNK_BYTES];
-} redisClient;
-
-struct saveparam {
- time_t seconds;
- int changes;
-};
-
-struct sharedObjectsStruct {
- robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
- *colon, *nullbulk, *nullmultibulk, *queued,
- *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
- *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
- *masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
- *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
- *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *rpop, *lpop,
- *lpush, *emptyscan, *minstring, *maxstring,
- *select[REDIS_SHARED_SELECT_CMDS],
- *integers[REDIS_SHARED_INTEGERS],
- *mbulkhdr[REDIS_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
- *bulkhdr[REDIS_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
-};
-
-/* ZSETs use a specialized version of Skiplists */
-typedef struct zskiplistNode {
- robj *obj;
- double score;
- struct zskiplistNode *backward;
- struct zskiplistLevel {
- struct zskiplistNode *forward;
- unsigned int span;
- } level[];
-} zskiplistNode;
-
-typedef struct zskiplist {
- struct zskiplistNode *header, *tail;
- unsigned long length;
- int level;
-} zskiplist;
-
-typedef struct zset {
- dict *dict;
- zskiplist *zsl;
-} zset;
-
-typedef struct clientBufferLimitsConfig {
- unsigned long long hard_limit_bytes;
- unsigned long long soft_limit_bytes;
- time_t soft_limit_seconds;
-} clientBufferLimitsConfig;
-
-extern clientBufferLimitsConfig clientBufferLimitsDefaults[REDIS_CLIENT_TYPE_COUNT];
-
-/* The redisOp structure defines a Redis Operation, that is an instance of
- * a command with an argument vector, database ID, propagation target
- * (REDIS_PROPAGATE_*), and command pointer.
- *
- * Currently only used to additionally propagate more commands to AOF/Replication
- * after the propagation of the executed command. */
-typedef struct redisOp {
- robj **argv;
- int argc, dbid, target;
- struct redisCommand *cmd;
-} redisOp;
-
-/* Defines an array of Redis operations. There is an API to add to this
- * structure in a easy way.
- *
- * redisOpArrayInit();
- * redisOpArrayAppend();
- * redisOpArrayFree();
- */
-typedef struct redisOpArray {
- redisOp *ops;
- int numops;
-} redisOpArray;
-
-/*-----------------------------------------------------------------------------
- * Global server state
- *----------------------------------------------------------------------------*/
-
-struct clusterState;
-
-/* AIX defines hz to __hz, we don't use this define and in order to allow
- * Redis build on AIX we need to undef it. */
-#ifdef _AIX
-#undef hz
-#endif
-
-struct redisServer {
- /* General */
- pid_t pid; /* Main process pid. */
- char *configfile; /* Absolute config file path, or NULL */
- int hz; /* serverCron() calls frequency in hertz */
- redisDb *db;
- dict *commands; /* Command table */
- dict *orig_commands; /* Command table before command renaming. */
- aeEventLoop *el;
- unsigned lruclock:REDIS_LRU_BITS; /* Clock for LRU eviction */
- int shutdown_asap; /* SHUTDOWN needed ASAP */
- int activerehashing; /* Incremental rehash in serverCron() */
- char *requirepass; /* Pass for AUTH command, or NULL */
- char *pidfile; /* PID file path */
- int arch_bits; /* 32 or 64 depending on sizeof(long) */
- int cronloops; /* Number of times the cron function run */
- char runid[REDIS_RUN_ID_SIZE+1]; /* ID always different at every exec. */
- int sentinel_mode; /* True if this instance is a Sentinel. */
- /* Networking */
- int port; /* TCP listening port */
- int tcp_backlog; /* TCP listen() backlog */
- char *bindaddr[REDIS_BINDADDR_MAX]; /* Addresses we should bind to */
- int bindaddr_count; /* Number of addresses in server.bindaddr[] */
- char *unixsocket; /* UNIX socket path */
- mode_t unixsocketperm; /* UNIX socket permission */
- int ipfd[REDIS_BINDADDR_MAX]; /* TCP socket file descriptors */
- int ipfd_count; /* Used slots in ipfd[] */
- int sofd; /* Unix socket file descriptor */
- int cfd[REDIS_BINDADDR_MAX];/* Cluster bus listening socket */
- int cfd_count; /* Used slots in cfd[] */
- list *clients; /* List of active clients */
- list *clients_to_close; /* Clients to close asynchronously */
- list *slaves, *monitors; /* List of slaves and MONITORs */
- redisClient *current_client; /* Current client, only used on crash report */
- int clients_paused; /* True if clients are currently paused */
- mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
- char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
- dict *migrate_cached_sockets;/* MIGRATE cached sockets */
- uint64_t next_client_id; /* Next client unique ID. Incremental. */
- /* RDB / AOF loading information */
- int loading; /* We are loading data from disk if true */
- off_t loading_total_bytes;
- off_t loading_loaded_bytes;
- time_t loading_start_time;
- off_t loading_process_events_interval_bytes;
- /* Fast pointers to often looked up command */
- struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
- *rpopCommand;
- /* Fields used only for stats */
- time_t stat_starttime; /* Server start time */
- long long stat_numcommands; /* Number of processed commands */
- long long stat_numconnections; /* Number of connections received */
- long long stat_expiredkeys; /* Number of expired keys */
- long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
- long long stat_keyspace_hits; /* Number of successful lookups of keys */
- long long stat_keyspace_misses; /* Number of failed lookups of keys */
- size_t stat_peak_memory; /* Max used memory record */
- long long stat_fork_time; /* Time needed to perform latest fork() */
- double stat_fork_rate; /* Fork rate in GB/sec. */
- long long stat_rejected_conn; /* Clients rejected because of maxclients */
- long long stat_sync_full; /* Number of full resyncs with slaves. */
- long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
- long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
- list *slowlog; /* SLOWLOG list of commands */
- long long slowlog_entry_id; /* SLOWLOG current entry ID */
- long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
- unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
- size_t resident_set_size; /* RSS sampled in serverCron(). */
- long long stat_net_input_bytes; /* Bytes read from network. */
- long long stat_net_output_bytes; /* Bytes written to network. */
- /* The following two are used to track instantaneous metrics, like
- * number of operations per second, network traffic. */
- struct {
- long long last_sample_time; /* Timestamp of last sample in ms */
- long long last_sample_count;/* Count in last sample */
- long long samples[REDIS_METRIC_SAMPLES];
- int idx;
- } inst_metric[REDIS_METRIC_COUNT];
- /* Configuration */
- int verbosity; /* Loglevel in redis.conf */
- int maxidletime; /* Client timeout in seconds */
- int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
- int active_expire_enabled; /* Can be disabled for testing purposes. */
- size_t client_max_querybuf_len; /* Limit for client query buffer length */
- int dbnum; /* Total number of configured DBs */
- int supervised; /* 1 if supervised, 0 otherwise. */
- int supervised_mode; /* See REDIS_SUPERVISED_* */
- int daemonize; /* True if running as a daemon */
- clientBufferLimitsConfig client_obuf_limits[REDIS_CLIENT_TYPE_COUNT];
- /* AOF persistence */
- int aof_state; /* REDIS_AOF_(ON|OFF|WAIT_REWRITE) */
- int aof_fsync; /* Kind of fsync() policy */
- char *aof_filename; /* Name of the AOF file */
- int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
- int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
- off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
- off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
- off_t aof_current_size; /* AOF current size. */
- int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
- pid_t aof_child_pid; /* PID if rewriting process */
- list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
- sds aof_buf; /* AOF buffer, written before entering the event loop */
- int aof_fd; /* File descriptor of currently selected AOF file */
- int aof_selected_db; /* Currently selected DB in AOF */
- time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
- time_t aof_last_fsync; /* UNIX time of last fsync() */
- time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
- time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
- int aof_lastbgrewrite_status; /* REDIS_OK or REDIS_ERR */
- unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
- int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
- int aof_last_write_status; /* REDIS_OK or REDIS_ERR */
- int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
- int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
- /* AOF pipes used to communicate between parent and child during rewrite. */
- int aof_pipe_write_data_to_child;
- int aof_pipe_read_data_from_parent;
- int aof_pipe_write_ack_to_parent;
- int aof_pipe_read_ack_from_child;
- int aof_pipe_write_ack_to_child;
- int aof_pipe_read_ack_from_parent;
- int aof_stop_sending_diff; /* If true stop sending accumulated diffs
- to child process. */
- sds aof_child_diff; /* AOF diff accumulator child side. */
- /* RDB persistence */
- long long dirty; /* Changes to DB from the last save */
- long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
- pid_t rdb_child_pid; /* PID of RDB saving child */
- struct saveparam *saveparams; /* Save points array for RDB */
- int saveparamslen; /* Number of saving points */
- char *rdb_filename; /* Name of RDB file */
- int rdb_compression; /* Use compression in RDB? */
- int rdb_checksum; /* Use RDB checksum? */
- time_t lastsave; /* Unix time of last successful save */
- time_t lastbgsave_try; /* Unix time of last attempted bgsave */
- time_t rdb_save_time_last; /* Time used by last RDB save run. */
- time_t rdb_save_time_start; /* Current RDB save start time. */
- int rdb_child_type; /* Type of save by active child. */
- int lastbgsave_status; /* REDIS_OK or REDIS_ERR */
- int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
- int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
- int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
- /* Propagation of commands in AOF / replication */
- redisOpArray also_propagate; /* Additional command to propagate. */
- /* Logging */
- char *logfile; /* Path of log file */
- int syslog_enabled; /* Is syslog enabled? */
- char *syslog_ident; /* Syslog ident */
- int syslog_facility; /* Syslog facility */
- /* Replication (master) */
- int slaveseldb; /* Last SELECTed DB in replication output */
- long long master_repl_offset; /* Global replication offset */
- int repl_ping_slave_period; /* Master pings the slave every N seconds */
- char *repl_backlog; /* Replication backlog for partial syncs */
- long long repl_backlog_size; /* Backlog circular buffer size */
- long long repl_backlog_histlen; /* Backlog actual data length */
- long long repl_backlog_idx; /* Backlog circular buffer current offset */
- long long repl_backlog_off; /* Replication offset of first byte in the
- backlog buffer. */
- time_t repl_backlog_time_limit; /* Time without slaves after the backlog
- gets released. */
- time_t repl_no_slaves_since; /* We have no slaves since that time.
- Only valid if server.slaves len is 0. */
- int repl_min_slaves_to_write; /* Min number of slaves to write. */
- int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
- int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
- int repl_diskless_sync; /* Send RDB to slaves sockets directly. */
- int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
- /* Replication (slave) */
- char *masterauth; /* AUTH with this password with master */
- char *masterhost; /* Hostname of master */
- int masterport; /* Port of master */
- int repl_timeout; /* Timeout after N seconds of master idle */
- redisClient *master; /* Client that is master for this slave */
- redisClient *cached_master; /* Cached master to be reused for PSYNC. */
- int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
- int repl_state; /* Replication status if the instance is a slave */
- off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
- off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
- off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
- int repl_transfer_s; /* Slave -> Master SYNC socket */
- int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
- char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
- time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
- int repl_serve_stale_data; /* Serve stale data when link is down? */
- int repl_slave_ro; /* Slave is read only? */
- time_t repl_down_since; /* Unix time at which link with master went down */
- int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
- int slave_priority; /* Reported in INFO and used by Sentinel. */
- char repl_master_runid[REDIS_RUN_ID_SIZE+1]; /* Master run id for PSYNC. */
- long long repl_master_initial_offset; /* Master PSYNC offset. */
- /* Replication script cache. */
- dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
- list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
- unsigned int repl_scriptcache_size; /* Max number of elements. */
- /* Synchronous replication. */
- list *clients_waiting_acks; /* Clients waiting in WAIT command. */
- int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
- /* Limits */
- unsigned int maxclients; /* Max number of simultaneous clients */
- unsigned long long maxmemory; /* Max number of memory bytes to use */
- int maxmemory_policy; /* Policy for key eviction */
- int maxmemory_samples; /* Pricision of random sampling */
- /* Blocked clients */
- unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */
- list *unblocked_clients; /* list of clients to unblock before next loop */
- list *ready_keys; /* List of readyList structures for BLPOP & co */
- /* Sort parameters - qsort_r() is only available under BSD so we
- * have to take this state global, in order to pass it to sortCompare() */
- int sort_desc;
- int sort_alpha;
- int sort_bypattern;
- int sort_store;
- /* Zip structure config, see redis.conf for more information */
- size_t hash_max_ziplist_entries;
- size_t hash_max_ziplist_value;
- size_t set_max_intset_entries;
- size_t zset_max_ziplist_entries;
- size_t zset_max_ziplist_value;
- size_t hll_sparse_max_bytes;
- /* List parameters */
- int list_max_ziplist_size;
- int list_compress_depth;
- /* time cache */
- time_t unixtime; /* Unix time sampled every cron cycle. */
- long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
- /* Pubsub */
- dict *pubsub_channels; /* Map channels to list of subscribed clients */
- list *pubsub_patterns; /* A list of pubsub_patterns */
- int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
- xor of REDIS_NOTIFY... flags. */
- /* Cluster */
- int cluster_enabled; /* Is cluster enabled? */
- mstime_t cluster_node_timeout; /* Cluster node timeout. */
- char *cluster_configfile; /* Cluster auto-generated config file name. */
- struct clusterState *cluster; /* State of the cluster */
- int cluster_migration_barrier; /* Cluster replicas migration barrier. */
- int cluster_slave_validity_factor; /* Slave max data age for failover. */
- int cluster_require_full_coverage; /* If true, put the cluster down if
- there is at least an uncovered slot. */
- /* Scripting */
- lua_State *lua; /* The Lua interpreter. We use just one for all clients */
- redisClient *lua_client; /* The "fake client" to query Redis from Lua */
- redisClient *lua_caller; /* The client running EVAL right now, or NULL */
- dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
- mstime_t lua_time_limit; /* Script timeout in milliseconds */
- mstime_t lua_time_start; /* Start time of script, milliseconds time */
- int lua_write_dirty; /* True if a write command was called during the
- execution of the current script. */
- int lua_random_dirty; /* True if a random command was called during the
- execution of the current script. */
- int lua_timedout; /* True if we reached the time limit for script
- execution. */
- int lua_kill; /* Kill the script if true. */
- /* Latency monitor */
- long long latency_monitor_threshold;
- dict *latency_events;
- /* Assert & bug reporting */
- char *assert_failed;
- char *assert_file;
- int assert_line;
- int bug_report_start; /* True if bug report header was already logged. */
- int watchdog_period; /* Software watchdog period in ms. 0 = off */
- /* System hardware info */
- size_t system_memory_size; /* Total memory in system as reported by OS */
-};
-
-typedef struct pubsubPattern {
- redisClient *client;
- robj *pattern;
-} pubsubPattern;
-
-typedef void redisCommandProc(redisClient *c);
-typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-struct redisCommand {
- char *name;
- redisCommandProc *proc;
- int arity;
- char *sflags; /* Flags as string representation, one char per flag. */
- int flags; /* The actual flags, obtained from the 'sflags' field. */
- /* Use a function to determine keys arguments in a command line.
- * Used for Redis Cluster redirect. */
- redisGetKeysProc *getkeys_proc;
- /* What keys should be loaded in background when calling this command? */
- int firstkey; /* The first argument that's a key (0 = no keys) */
- int lastkey; /* The last argument that's a key */
- int keystep; /* The step between first and last key */
- long long microseconds, calls;
-};
-
-struct redisFunctionSym {
- char *name;
- unsigned long pointer;
-};
-
-typedef struct _redisSortObject {
- robj *obj;
- union {
- double score;
- robj *cmpobj;
- } u;
-} redisSortObject;
-
-typedef struct _redisSortOperation {
- int type;
- robj *pattern;
-} redisSortOperation;
-
-/* Structure to hold list iteration abstraction. */
-typedef struct {
- robj *subject;
- unsigned char encoding;
- unsigned char direction; /* Iteration direction */
- quicklistIter *iter;
-} listTypeIterator;
-
-/* Structure for an entry while iterating over a list. */
-typedef struct {
- listTypeIterator *li;
- quicklistEntry entry; /* Entry in quicklist */
-} listTypeEntry;
-
-/* Structure to hold set iteration abstraction. */
-typedef struct {
- robj *subject;
- int encoding;
- int ii; /* intset iterator */
- dictIterator *di;
-} setTypeIterator;
-
-/* Structure to hold hash iteration abstraction. Note that iteration over
- * hashes involves both fields and values. Because it is possible that
- * not both are required, store pointers in the iterator to avoid
- * unnecessary memory allocation for fields/values. */
-typedef struct {
- robj *subject;
- int encoding;
-
- unsigned char *fptr, *vptr;
-
- dictIterator *di;
- dictEntry *de;
-} hashTypeIterator;
-
-#define REDIS_HASH_KEY 1
-#define REDIS_HASH_VALUE 2
-
-/*-----------------------------------------------------------------------------
- * Extern declarations
- *----------------------------------------------------------------------------*/
-
-extern struct redisServer server;
-extern struct sharedObjectsStruct shared;
-extern dictType setDictType;
-extern dictType zsetDictType;
-extern dictType clusterNodesDictType;
-extern dictType clusterNodesBlackListDictType;
-extern dictType dbDictType;
-extern dictType shaScriptObjectDictType;
-extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
-extern dictType hashDictType;
-extern dictType replScriptCacheDictType;
-
-/*-----------------------------------------------------------------------------
- * Functions prototypes
- *----------------------------------------------------------------------------*/
-
-/* Utils */
-long long ustime(void);
-long long mstime(void);
-void getRandomHexChars(char *p, unsigned int len);
-uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
-void exitFromChild(int retcode);
-size_t redisPopcount(void *s, long count);
-void redisSetProcTitle(char *title);
-
-/* networking.c -- Networking and Client related operations */
-redisClient *createClient(int fd);
-void closeTimedoutClients(void);
-void freeClient(redisClient *c);
-void freeClientAsync(redisClient *c);
-void resetClient(redisClient *c);
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void *addDeferredMultiBulkLength(redisClient *c);
-void setDeferredMultiBulkLength(redisClient *c, void *node, long length);
-void processInputBuffer(redisClient *c);
-void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
-void addReplyBulk(redisClient *c, robj *obj);
-void addReplyBulkCString(redisClient *c, char *s);
-void addReplyBulkCBuffer(redisClient *c, void *p, size_t len);
-void addReplyBulkLongLong(redisClient *c, long long ll);
-void addReply(redisClient *c, robj *obj);
-void addReplySds(redisClient *c, sds s);
-void addReplyBulkSds(redisClient *c, sds s);
-void addReplyError(redisClient *c, char *err);
-void addReplyStatus(redisClient *c, char *status);
-void addReplyDouble(redisClient *c, double d);
-void addReplyLongLong(redisClient *c, long long ll);
-void addReplyMultiBulkLen(redisClient *c, long length);
-void copyClientOutputBuffer(redisClient *dst, redisClient *src);
-void *dupClientReplyValue(void *o);
-void getClientsMaxBuffers(unsigned long *longest_output_list,
- unsigned long *biggest_input_buffer);
-void formatPeerId(char *peerid, size_t peerid_len, char *ip, int port);
-char *getClientPeerId(redisClient *client);
-sds catClientInfoString(sds s, redisClient *client);
-sds getAllClientsInfoString(void);
-void rewriteClientCommandVector(redisClient *c, int argc, ...);
-void rewriteClientCommandArgument(redisClient *c, int i, robj *newval);
-unsigned long getClientOutputBufferMemoryUsage(redisClient *c);
-void freeClientsInAsyncFreeQueue(void);
-void asyncCloseClientOnOutputBufferLimitReached(redisClient *c);
-int getClientType(redisClient *c);
-int getClientTypeByName(char *name);
-char *getClientTypeName(int class);
-void flushSlavesOutputBuffers(void);
-void disconnectSlaves(void);
-int listenToPort(int port, int *fds, int *count);
-void pauseClients(mstime_t duration);
-int clientsArePaused(void);
-int processEventsWhileBlocked(void);
-
-#ifdef __GNUC__
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-#else
-void addReplyErrorFormat(redisClient *c, const char *fmt, ...);
-void addReplyStatusFormat(redisClient *c, const char *fmt, ...);
-#endif
-
-/* List data type */
-void listTypeTryConversion(robj *subject, robj *value);
-void listTypePush(robj *subject, robj *value, int where);
-robj *listTypePop(robj *subject, int where);
-unsigned long listTypeLength(robj *subject);
-listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
-void listTypeReleaseIterator(listTypeIterator *li);
-int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
-robj *listTypeGet(listTypeEntry *entry);
-void listTypeInsert(listTypeEntry *entry, robj *value, int where);
-int listTypeEqual(listTypeEntry *entry, robj *o);
-void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
-void listTypeConvert(robj *subject, int enc);
-void unblockClientWaitingData(redisClient *c);
-void handleClientsBlockedOnLists(void);
-void popGenericCommand(redisClient *c, int where);
-void signalListAsReady(redisDb *db, robj *key);
-
-/* MULTI/EXEC/WATCH... */
-void unwatchAllKeys(redisClient *c);
-void initClientMultiState(redisClient *c);
-void freeClientMultiState(redisClient *c);
-void queueMultiCommand(redisClient *c);
-void touchWatchedKey(redisDb *db, robj *key);
-void touchWatchedKeysOnFlush(int dbid);
-void discardTransaction(redisClient *c);
-void flagTransaction(redisClient *c);
-
-/* Redis object implementation */
-void decrRefCount(robj *o);
-void decrRefCountVoid(void *o);
-void incrRefCount(robj *o);
-robj *resetRefCount(robj *obj);
-void freeStringObject(robj *o);
-void freeListObject(robj *o);
-void freeSetObject(robj *o);
-void freeZsetObject(robj *o);
-void freeHashObject(robj *o);
-robj *createObject(int type, void *ptr);
-robj *createStringObject(char *ptr, size_t len);
-robj *createRawStringObject(char *ptr, size_t len);
-robj *createEmbeddedStringObject(char *ptr, size_t len);
-robj *dupStringObject(robj *o);
-int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
-robj *tryObjectEncoding(robj *o);
-robj *getDecodedObject(robj *o);
-size_t stringObjectLen(robj *o);
-robj *createStringObjectFromLongLong(long long value);
-robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
-robj *createQuicklistObject(void);
-robj *createZiplistObject(void);
-robj *createSetObject(void);
-robj *createIntsetObject(void);
-robj *createHashObject(void);
-robj *createZsetObject(void);
-robj *createZsetZiplistObject(void);
-int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg);
-int checkType(redisClient *c, robj *o, int type);
-int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg);
-int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg);
-int getLongLongFromObject(robj *o, long long *target);
-int getLongDoubleFromObject(robj *o, long double *target);
-int getLongDoubleFromObjectOrReply(redisClient *c, robj *o, long double *target, const char *msg);
-char *strEncoding(int encoding);
-int compareStringObjects(robj *a, robj *b);
-int collateStringObjects(robj *a, robj *b);
-int equalStringObjects(robj *a, robj *b);
-unsigned long long estimateObjectIdleTime(robj *o);
-#define sdsEncodedObject(objptr) (objptr->encoding == REDIS_ENCODING_RAW || objptr->encoding == REDIS_ENCODING_EMBSTR)
-
-/* Synchronous I/O with timeout */
-ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
-ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
-ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
-
-/* Replication */
-void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
-void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc);
-void updateSlavesWaitingBgsave(int bgsaveerr, int type);
-void replicationCron(void);
-void replicationHandleMasterDisconnection(void);
-void replicationCacheMaster(redisClient *c);
-void resizeReplicationBacklog(long long newsize);
-void replicationSetMaster(char *ip, int port);
-void replicationUnsetMaster(void);
-void refreshGoodSlavesCount(void);
-void replicationScriptCacheInit(void);
-void replicationScriptCacheFlush(void);
-void replicationScriptCacheAdd(sds sha1);
-int replicationScriptCacheExists(sds sha1);
-void processClientsWaitingReplicas(void);
-void unblockClientWaitingReplicas(redisClient *c);
-int replicationCountAcksByOffset(long long offset);
-void replicationSendNewlineToMaster(void);
-long long replicationGetSlaveOffset(void);
-char *replicationGetSlaveName(redisClient *c);
-
-/* Generic persistence functions */
-void startLoading(FILE *fp);
-void loadingProgress(off_t pos);
-void stopLoading(void);
-
-/* RDB persistence */
-#include "rdb.h"
-
-/* AOF persistence */
-void flushAppendOnlyFile(int force);
-void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
-void aofRemoveTempFile(pid_t childpid);
-int rewriteAppendOnlyFileBackground(void);
-int loadAppendOnlyFile(char *filename);
-void stopAppendOnly(void);
-int startAppendOnly(void);
-void backgroundRewriteDoneHandler(int exitcode, int bysignal);
-void aofRewriteBufferReset(void);
-unsigned long aofRewriteBufferSize(void);
-
-/* Sorted sets data type */
-
-/* Struct to hold a inclusive/exclusive range spec by score comparison. */
-typedef struct {
- double min, max;
- int minex, maxex; /* are min or max exclusive? */
-} zrangespec;
-
-/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
-typedef struct {
- robj *min, *max; /* May be set to shared.(minstring|maxstring) */
- int minex, maxex; /* are min or max exclusive? */
-} zlexrangespec;
-
-zskiplist *zslCreate(void);
-void zslFree(zskiplist *zsl);
-zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);
-unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score);
-int zslDelete(zskiplist *zsl, double score, robj *obj);
-zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
-zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
-double zzlGetScore(unsigned char *sptr);
-void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
-void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
-unsigned int zsetLength(robj *zobj);
-void zsetConvert(robj *zobj, int encoding);
-unsigned long zslGetRank(zskiplist *zsl, double score, robj *o);
-
-/* Core functions */
-int freeMemoryIfNeeded(void);
-int processCommand(redisClient *c);
-void setupSignalHandlers(void);
-struct redisCommand *lookupCommand(sds name);
-struct redisCommand *lookupCommandByCString(char *s);
-struct redisCommand *lookupCommandOrOriginal(sds name);
-void call(redisClient *c, int flags);
-void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
-void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
-void forceCommandPropagation(redisClient *c, int flags);
-int prepareForShutdown();
-#ifdef __GNUC__
-void redisLog(int level, const char *fmt, ...)
- __attribute__((format(printf, 2, 3)));
-#else
-void redisLog(int level, const char *fmt, ...);
-#endif
-void redisLogRaw(int level, const char *msg);
-void redisLogFromHandler(int level, const char *msg);
-void usage(void);
-void updateDictResizePolicy(void);
-int htNeedsResize(dict *dict);
-void oom(const char *msg);
-void populateCommandTable(void);
-void resetCommandTableStats(void);
-void adjustOpenFilesLimit(void);
-void closeListeningSockets(int unlink_unix_socket);
-void updateCachedTime(void);
-void resetServerStats(void);
-unsigned int getLRUClock(void);
-char *maxmemoryToString(void);
-
-/* Set data type */
-robj *setTypeCreate(robj *value);
-int setTypeAdd(robj *subject, robj *value);
-int setTypeRemove(robj *subject, robj *value);
-int setTypeIsMember(robj *subject, robj *value);
-setTypeIterator *setTypeInitIterator(robj *subject);
-void setTypeReleaseIterator(setTypeIterator *si);
-int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele);
-robj *setTypeNextObject(setTypeIterator *si);
-int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele);
-unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
-unsigned long setTypeSize(robj *subject);
-void setTypeConvert(robj *subject, int enc);
-
-/* Hash data type */
-void hashTypeConvert(robj *o, int enc);
-void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
-void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
-robj *hashTypeGetObject(robj *o, robj *key);
-int hashTypeExists(robj *o, robj *key);
-int hashTypeSet(robj *o, robj *key, robj *value);
-int hashTypeDelete(robj *o, robj *key);
-unsigned long hashTypeLength(robj *o);
-hashTypeIterator *hashTypeInitIterator(robj *subject);
-void hashTypeReleaseIterator(hashTypeIterator *hi);
-int hashTypeNext(hashTypeIterator *hi);
-void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
- unsigned char **vstr,
- unsigned int *vlen,
- long long *vll);
-void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst);
-robj *hashTypeCurrentObject(hashTypeIterator *hi, int what);
-robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key);
-
-/* Pub / Sub */
-int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
-int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
-void freePubsubPattern(void *p);
-int listMatchPubsubPattern(void *a, void *b);
-int pubsubPublishMessage(robj *channel, robj *message);
-
-/* Keyspace events notification */
-void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
-int keyspaceEventsStringToFlags(char *classes);
-sds keyspaceEventsFlagsToString(int flags);
-
-/* Configuration */
-void loadServerConfig(char *filename, char *options);
-void appendServerSaveParams(time_t seconds, int changes);
-void resetServerSaveParams(void);
-struct rewriteConfigState; /* Forward declaration to export API. */
-void rewriteConfigRewriteLine(struct rewriteConfigState *state, char *option, sds line, int force);
-int rewriteConfig(char *path);
-
-/* db.c -- Keyspace access API */
-int removeExpire(redisDb *db, robj *key);
-void propagateExpire(redisDb *db, robj *key);
-int expireIfNeeded(redisDb *db, robj *key);
-long long getExpire(redisDb *db, robj *key);
-void setExpire(redisDb *db, robj *key, long long when);
-robj *lookupKey(redisDb *db, robj *key);
-robj *lookupKeyRead(redisDb *db, robj *key);
-robj *lookupKeyWrite(redisDb *db, robj *key);
-robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply);
-robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply);
-void dbAdd(redisDb *db, robj *key, robj *val);
-void dbOverwrite(redisDb *db, robj *key, robj *val);
-void setKey(redisDb *db, robj *key, robj *val);
-int dbExists(redisDb *db, robj *key);
-robj *dbRandomKey(redisDb *db);
-int dbDelete(redisDb *db, robj *key);
-robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
-long long emptyDb(void(callback)(void*));
-int selectDb(redisClient *c, int id);
-void signalModifiedKey(redisDb *db, robj *key);
-void signalFlushedDb(int dbid);
-unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
-unsigned int countKeysInSlot(unsigned int hashslot);
-unsigned int delKeysInSlot(unsigned int hashslot);
-int verifyClusterConfigWithData(void);
-void scanGenericCommand(redisClient *c, robj *o, unsigned long cursor);
-int parseScanCursorOrReply(redisClient *c, robj *o, unsigned long *cursor);
-
-/* API to get key arguments from commands */
-int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-void getKeysFreeResult(int *result);
-int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys);
-int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
-
-/* Cluster */
-void clusterInit(void);
-unsigned short crc16(const char *buf, int len);
-unsigned int keyHashSlot(char *key, int keylen);
-void clusterCron(void);
-void clusterPropagatePublish(robj *channel, robj *message);
-void migrateCloseTimedoutSockets(void);
-void clusterBeforeSleep(void);
-
-/* Sentinel */
-void initSentinelConfig(void);
-void initSentinel(void);
-void sentinelTimer(void);
-char *sentinelHandleConfiguration(char **argv, int argc);
-void sentinelIsRunning(void);
-
-/* Scripting */
-void scriptingInit(void);
-
-/* Blocked clients */
-void processUnblockedClients(void);
-void blockClient(redisClient *c, int btype);
-void unblockClient(redisClient *c);
-void replyToBlockedClientTimedOut(redisClient *c);
-int getTimeoutFromObjectOrReply(redisClient *c, robj *object, mstime_t *timeout, int unit);
-
-/* Git SHA1 */
-char *redisGitSHA1(void);
-char *redisGitDirty(void);
-uint64_t redisBuildId(void);
-
-/* Commands prototypes */
-void authCommand(redisClient *c);
-void pingCommand(redisClient *c);
-void echoCommand(redisClient *c);
-void commandCommand(redisClient *c);
-void setCommand(redisClient *c);
-void setnxCommand(redisClient *c);
-void setexCommand(redisClient *c);
-void psetexCommand(redisClient *c);
-void getCommand(redisClient *c);
-void delCommand(redisClient *c);
-void existsCommand(redisClient *c);
-void setbitCommand(redisClient *c);
-void getbitCommand(redisClient *c);
-void setrangeCommand(redisClient *c);
-void getrangeCommand(redisClient *c);
-void incrCommand(redisClient *c);
-void decrCommand(redisClient *c);
-void incrbyCommand(redisClient *c);
-void decrbyCommand(redisClient *c);
-void incrbyfloatCommand(redisClient *c);
-void selectCommand(redisClient *c);
-void randomkeyCommand(redisClient *c);
-void keysCommand(redisClient *c);
-void scanCommand(redisClient *c);
-void dbsizeCommand(redisClient *c);
-void lastsaveCommand(redisClient *c);
-void saveCommand(redisClient *c);
-void bgsaveCommand(redisClient *c);
-void bgrewriteaofCommand(redisClient *c);
-void shutdownCommand(redisClient *c);
-void moveCommand(redisClient *c);
-void renameCommand(redisClient *c);
-void renamenxCommand(redisClient *c);
-void lpushCommand(redisClient *c);
-void rpushCommand(redisClient *c);
-void lpushxCommand(redisClient *c);
-void rpushxCommand(redisClient *c);
-void linsertCommand(redisClient *c);
-void lpopCommand(redisClient *c);
-void rpopCommand(redisClient *c);
-void llenCommand(redisClient *c);
-void lindexCommand(redisClient *c);
-void lrangeCommand(redisClient *c);
-void ltrimCommand(redisClient *c);
-void typeCommand(redisClient *c);
-void lsetCommand(redisClient *c);
-void saddCommand(redisClient *c);
-void sremCommand(redisClient *c);
-void smoveCommand(redisClient *c);
-void sismemberCommand(redisClient *c);
-void scardCommand(redisClient *c);
-void spopCommand(redisClient *c);
-void srandmemberCommand(redisClient *c);
-void sinterCommand(redisClient *c);
-void sinterstoreCommand(redisClient *c);
-void sunionCommand(redisClient *c);
-void sunionstoreCommand(redisClient *c);
-void sdiffCommand(redisClient *c);
-void sdiffstoreCommand(redisClient *c);
-void sscanCommand(redisClient *c);
-void syncCommand(redisClient *c);
-void flushdbCommand(redisClient *c);
-void flushallCommand(redisClient *c);
-void sortCommand(redisClient *c);
-void lremCommand(redisClient *c);
-void rpoplpushCommand(redisClient *c);
-void infoCommand(redisClient *c);
-void mgetCommand(redisClient *c);
-void monitorCommand(redisClient *c);
-void expireCommand(redisClient *c);
-void expireatCommand(redisClient *c);
-void pexpireCommand(redisClient *c);
-void pexpireatCommand(redisClient *c);
-void getsetCommand(redisClient *c);
-void ttlCommand(redisClient *c);
-void pttlCommand(redisClient *c);
-void persistCommand(redisClient *c);
-void slaveofCommand(redisClient *c);
-void roleCommand(redisClient *c);
-void debugCommand(redisClient *c);
-void msetCommand(redisClient *c);
-void msetnxCommand(redisClient *c);
-void zaddCommand(redisClient *c);
-void zincrbyCommand(redisClient *c);
-void zrangeCommand(redisClient *c);
-void zrangebyscoreCommand(redisClient *c);
-void zrevrangebyscoreCommand(redisClient *c);
-void zrangebylexCommand(redisClient *c);
-void zrevrangebylexCommand(redisClient *c);
-void zcountCommand(redisClient *c);
-void zlexcountCommand(redisClient *c);
-void zrevrangeCommand(redisClient *c);
-void zcardCommand(redisClient *c);
-void zremCommand(redisClient *c);
-void zscoreCommand(redisClient *c);
-void zremrangebyscoreCommand(redisClient *c);
-void zremrangebylexCommand(redisClient *c);
-void multiCommand(redisClient *c);
-void execCommand(redisClient *c);
-void discardCommand(redisClient *c);
-void blpopCommand(redisClient *c);
-void brpopCommand(redisClient *c);
-void brpoplpushCommand(redisClient *c);
-void appendCommand(redisClient *c);
-void strlenCommand(redisClient *c);
-void zrankCommand(redisClient *c);
-void zrevrankCommand(redisClient *c);
-void hsetCommand(redisClient *c);
-void hsetnxCommand(redisClient *c);
-void hgetCommand(redisClient *c);
-void hmsetCommand(redisClient *c);
-void hmgetCommand(redisClient *c);
-void hdelCommand(redisClient *c);
-void hlenCommand(redisClient *c);
-void zremrangebyrankCommand(redisClient *c);
-void zunionstoreCommand(redisClient *c);
-void zinterstoreCommand(redisClient *c);
-void zscanCommand(redisClient *c);
-void hkeysCommand(redisClient *c);
-void hvalsCommand(redisClient *c);
-void hgetallCommand(redisClient *c);
-void hexistsCommand(redisClient *c);
-void hscanCommand(redisClient *c);
-void configCommand(redisClient *c);
-void hincrbyCommand(redisClient *c);
-void hincrbyfloatCommand(redisClient *c);
-void subscribeCommand(redisClient *c);
-void unsubscribeCommand(redisClient *c);
-void psubscribeCommand(redisClient *c);
-void punsubscribeCommand(redisClient *c);
-void publishCommand(redisClient *c);
-void pubsubCommand(redisClient *c);
-void watchCommand(redisClient *c);
-void unwatchCommand(redisClient *c);
-void clusterCommand(redisClient *c);
-void restoreCommand(redisClient *c);
-void migrateCommand(redisClient *c);
-void askingCommand(redisClient *c);
-void readonlyCommand(redisClient *c);
-void readwriteCommand(redisClient *c);
-void dumpCommand(redisClient *c);
-void objectCommand(redisClient *c);
-void clientCommand(redisClient *c);
-void evalCommand(redisClient *c);
-void evalShaCommand(redisClient *c);
-void scriptCommand(redisClient *c);
-void timeCommand(redisClient *c);
-void bitopCommand(redisClient *c);
-void bitcountCommand(redisClient *c);
-void bitposCommand(redisClient *c);
-void replconfCommand(redisClient *c);
-void waitCommand(redisClient *c);
-void pfselftestCommand(redisClient *c);
-void pfaddCommand(redisClient *c);
-void pfcountCommand(redisClient *c);
-void pfmergeCommand(redisClient *c);
-void pfdebugCommand(redisClient *c);
-void latencyCommand(redisClient *c);
-
-#if defined(__GNUC__)
-void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
-void free(void *ptr) __attribute__ ((deprecated));
-void *malloc(size_t size) __attribute__ ((deprecated));
-void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
-#endif
-
-/* Debugging stuff */
-void _redisAssertWithInfo(redisClient *c, robj *o, char *estr, char *file, int line);
-void _redisAssert(char *estr, char *file, int line);
-void _redisPanic(char *msg, char *file, int line);
-void bugReportStart(void);
-void redisLogObjectDebugInfo(robj *o);
-void sigsegvHandler(int sig, siginfo_t *info, void *secret);
-sds genRedisInfoString(char *section);
-void enableWatchdog(int period);
-void disableWatchdog(void);
-void watchdogScheduleSignal(int period);
-void redisLogHexDump(int level, char *descr, void *value, size_t len);
-
-#define redisDebug(fmt, ...) \
- printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
-#define redisDebugMark() \
- printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
-
-#endif
diff --git a/src/redisassert.h b/src/redisassert.h
index e5825c0f5..61ab35a14 100644
--- a/src/redisassert.h
+++ b/src/redisassert.h
@@ -1,4 +1,4 @@
-/* redisassert.h -- Drop in replacemnet assert.h that prints the stack trace
+/* redisassert.h -- Drop in replacements assert.h that prints the stack trace
* in the Redis logs.
*
* This file should be included instead of "assert.h" inside libraries used by
@@ -40,8 +40,10 @@
#include <unistd.h> /* for _exit() */
-#define assert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define assert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define panic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)
-void _redisAssert(char *estr, char *file, int line);
+void _serverAssert(char *estr, char *file, int line);
+void _serverPanic(const char *file, int line, const char *msg, ...);
#endif
diff --git a/src/redismodule.h b/src/redismodule.h
new file mode 100644
index 000000000..47ecb1308
--- /dev/null
+++ b/src/redismodule.h
@@ -0,0 +1,457 @@
+#ifndef REDISMODULE_H
+#define REDISMODULE_H
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* ---------------- Defines common between core and modules --------------- */
+
+/* Error status return values. */
+#define REDISMODULE_OK 0
+#define REDISMODULE_ERR 1
+
+/* API versions. */
+#define REDISMODULE_APIVER_1 1
+
+/* API flags and constants */
+#define REDISMODULE_READ (1<<0)
+#define REDISMODULE_WRITE (1<<1)
+
+#define REDISMODULE_LIST_HEAD 0
+#define REDISMODULE_LIST_TAIL 1
+
+/* Key types. */
+#define REDISMODULE_KEYTYPE_EMPTY 0
+#define REDISMODULE_KEYTYPE_STRING 1
+#define REDISMODULE_KEYTYPE_LIST 2
+#define REDISMODULE_KEYTYPE_HASH 3
+#define REDISMODULE_KEYTYPE_SET 4
+#define REDISMODULE_KEYTYPE_ZSET 5
+#define REDISMODULE_KEYTYPE_MODULE 6
+
+/* Reply types. */
+#define REDISMODULE_REPLY_UNKNOWN -1
+#define REDISMODULE_REPLY_STRING 0
+#define REDISMODULE_REPLY_ERROR 1
+#define REDISMODULE_REPLY_INTEGER 2
+#define REDISMODULE_REPLY_ARRAY 3
+#define REDISMODULE_REPLY_NULL 4
+
+/* Postponed array length. */
+#define REDISMODULE_POSTPONED_ARRAY_LEN -1
+
+/* Expire */
+#define REDISMODULE_NO_EXPIRE -1
+
+/* Sorted set API flags. */
+#define REDISMODULE_ZADD_XX (1<<0)
+#define REDISMODULE_ZADD_NX (1<<1)
+#define REDISMODULE_ZADD_ADDED (1<<2)
+#define REDISMODULE_ZADD_UPDATED (1<<3)
+#define REDISMODULE_ZADD_NOP (1<<4)
+
+/* Hash API flags. */
+#define REDISMODULE_HASH_NONE 0
+#define REDISMODULE_HASH_NX (1<<0)
+#define REDISMODULE_HASH_XX (1<<1)
+#define REDISMODULE_HASH_CFIELDS (1<<2)
+#define REDISMODULE_HASH_EXISTS (1<<3)
+
+/* Context Flags: Info about the current context returned by
+ * RM_GetContextFlags(). */
+
+/* The command is running in the context of a Lua script */
+#define REDISMODULE_CTX_FLAGS_LUA (1<<0)
+/* The command is running inside a Redis transaction */
+#define REDISMODULE_CTX_FLAGS_MULTI (1<<1)
+/* The instance is a master */
+#define REDISMODULE_CTX_FLAGS_MASTER (1<<2)
+/* The instance is a slave */
+#define REDISMODULE_CTX_FLAGS_SLAVE (1<<3)
+/* The instance is read-only (usually meaning it's a slave as well) */
+#define REDISMODULE_CTX_FLAGS_READONLY (1<<4)
+/* The instance is running in cluster mode */
+#define REDISMODULE_CTX_FLAGS_CLUSTER (1<<5)
+/* The instance has AOF enabled */
+#define REDISMODULE_CTX_FLAGS_AOF (1<<6)
+/* The instance has RDB enabled */
+#define REDISMODULE_CTX_FLAGS_RDB (1<<7)
+/* The instance has Maxmemory set */
+#define REDISMODULE_CTX_FLAGS_MAXMEMORY (1<<8)
+/* Maxmemory is set and has an eviction policy that may delete keys */
+#define REDISMODULE_CTX_FLAGS_EVICT (1<<9)
+/* Redis is out of memory according to the maxmemory flag. */
+#define REDISMODULE_CTX_FLAGS_OOM (1<<10)
+/* Less than 25% of memory available according to maxmemory. */
+#define REDISMODULE_CTX_FLAGS_OOM_WARNING (1<<11)
+
+#define REDISMODULE_NOTIFY_GENERIC (1<<2) /* g */
+#define REDISMODULE_NOTIFY_STRING (1<<3) /* $ */
+#define REDISMODULE_NOTIFY_LIST (1<<4) /* l */
+#define REDISMODULE_NOTIFY_SET (1<<5) /* s */
+#define REDISMODULE_NOTIFY_HASH (1<<6) /* h */
+#define REDISMODULE_NOTIFY_ZSET (1<<7) /* z */
+#define REDISMODULE_NOTIFY_EXPIRED (1<<8) /* x */
+#define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */
+#define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */
+#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM) /* A */
+
+
+/* A special pointer that we can use between the core and the module to signal
+ * field deletion, and that is impossible to be a valid pointer. */
+#define REDISMODULE_HASH_DELETE ((RedisModuleString*)(long)1)
+
+/* Error messages. */
+#define REDISMODULE_ERRORMSG_WRONGTYPE "WRONGTYPE Operation against a key holding the wrong kind of value"
+
+#define REDISMODULE_POSITIVE_INFINITE (1.0/0.0)
+#define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0)
+
+/* Cluster API defines. */
+#define REDISMODULE_NODE_ID_LEN 40
+#define REDISMODULE_NODE_MYSELF (1<<0)
+#define REDISMODULE_NODE_MASTER (1<<1)
+#define REDISMODULE_NODE_SLAVE (1<<2)
+#define REDISMODULE_NODE_PFAIL (1<<3)
+#define REDISMODULE_NODE_FAIL (1<<4)
+#define REDISMODULE_NODE_NOFAILOVER (1<<5)
+
+#define REDISMODULE_NOT_USED(V) ((void) V)
+
+/* This type represents a timer handle, and is returned when a timer is
+ * registered and used in order to invalidate a timer. It's just a 64 bit
+ * number, because this is how each timer is represented inside the radix tree
+ * of timers that are going to expire, sorted by expire time. */
+typedef uint64_t RedisModuleTimerID;
+
+/* ------------------------- End of common defines ------------------------ */
+
+#ifndef REDISMODULE_CORE
+
+typedef long long mstime_t;
+
+/* Incomplete structures for compiler checks but opaque access. */
+typedef struct RedisModuleCtx RedisModuleCtx;
+typedef struct RedisModuleKey RedisModuleKey;
+typedef struct RedisModuleString RedisModuleString;
+typedef struct RedisModuleCallReply RedisModuleCallReply;
+typedef struct RedisModuleIO RedisModuleIO;
+typedef struct RedisModuleType RedisModuleType;
+typedef struct RedisModuleDigest RedisModuleDigest;
+typedef struct RedisModuleBlockedClient RedisModuleBlockedClient;
+typedef struct RedisModuleClusterInfo RedisModuleClusterInfo;
+
+typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
+typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc);
+typedef int (*RedisModuleNotificationFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key);
+typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver);
+typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value);
+typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value);
+typedef size_t (*RedisModuleTypeMemUsageFunc)(const void *value);
+typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value);
+typedef void (*RedisModuleTypeFreeFunc)(void *value);
+typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len);
+typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data);
+
+#define REDISMODULE_TYPE_METHOD_VERSION 1
+typedef struct RedisModuleTypeMethods {
+ uint64_t version;
+ RedisModuleTypeLoadFunc rdb_load;
+ RedisModuleTypeSaveFunc rdb_save;
+ RedisModuleTypeRewriteFunc aof_rewrite;
+ RedisModuleTypeMemUsageFunc mem_usage;
+ RedisModuleTypeDigestFunc digest;
+ RedisModuleTypeFreeFunc free;
+} RedisModuleTypeMethods;
+
+#define REDISMODULE_GET_API(name) \
+ RedisModule_GetApi("RedisModule_" #name, ((void **)&RedisModule_ ## name))
+
+#define REDISMODULE_API_FUNC(x) (*x)
+
+
+void *REDISMODULE_API_FUNC(RedisModule_Alloc)(size_t bytes);
+void *REDISMODULE_API_FUNC(RedisModule_Realloc)(void *ptr, size_t bytes);
+void REDISMODULE_API_FUNC(RedisModule_Free)(void *ptr);
+void *REDISMODULE_API_FUNC(RedisModule_Calloc)(size_t nmemb, size_t size);
+char *REDISMODULE_API_FUNC(RedisModule_Strdup)(const char *str);
+int REDISMODULE_API_FUNC(RedisModule_GetApi)(const char *, void *);
+int REDISMODULE_API_FUNC(RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep);
+void REDISMODULE_API_FUNC(RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver);
+int REDISMODULE_API_FUNC(RedisModule_IsModuleNameBusy)(const char *name);
+int REDISMODULE_API_FUNC(RedisModule_WrongArity)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithLongLong)(RedisModuleCtx *ctx, long long ll);
+int REDISMODULE_API_FUNC(RedisModule_GetSelectedDb)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_SelectDb)(RedisModuleCtx *ctx, int newid);
+void *REDISMODULE_API_FUNC(RedisModule_OpenKey)(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode);
+void REDISMODULE_API_FUNC(RedisModule_CloseKey)(RedisModuleKey *kp);
+int REDISMODULE_API_FUNC(RedisModule_KeyType)(RedisModuleKey *kp);
+size_t REDISMODULE_API_FUNC(RedisModule_ValueLength)(RedisModuleKey *kp);
+int REDISMODULE_API_FUNC(RedisModule_ListPush)(RedisModuleKey *kp, int where, RedisModuleString *ele);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_ListPop)(RedisModuleKey *key, int where);
+RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_Call)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...);
+const char *REDISMODULE_API_FUNC(RedisModule_CallReplyProto)(RedisModuleCallReply *reply, size_t *len);
+void REDISMODULE_API_FUNC(RedisModule_FreeCallReply)(RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_CallReplyType)(RedisModuleCallReply *reply);
+long long REDISMODULE_API_FUNC(RedisModule_CallReplyInteger)(RedisModuleCallReply *reply);
+size_t REDISMODULE_API_FUNC(RedisModule_CallReplyLength)(RedisModuleCallReply *reply);
+RedisModuleCallReply *REDISMODULE_API_FUNC(RedisModule_CallReplyArrayElement)(RedisModuleCallReply *reply, size_t idx);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str);
+const char *REDISMODULE_API_FUNC(RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, const char *msg);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len);
+void REDISMODULE_API_FUNC(RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithNull)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d);
+int REDISMODULE_API_FUNC(RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll);
+int REDISMODULE_API_FUNC(RedisModule_StringToDouble)(const RedisModuleString *str, double *d);
+void REDISMODULE_API_FUNC(RedisModule_AutoMemory)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...);
+int REDISMODULE_API_FUNC(RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx);
+const char *REDISMODULE_API_FUNC(RedisModule_CallReplyStringPtr)(RedisModuleCallReply *reply, size_t *len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CreateStringFromCallReply)(RedisModuleCallReply *reply);
+int REDISMODULE_API_FUNC(RedisModule_DeleteKey)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_UnlinkKey)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_StringSet)(RedisModuleKey *key, RedisModuleString *str);
+char *REDISMODULE_API_FUNC(RedisModule_StringDMA)(RedisModuleKey *key, size_t *len, int mode);
+int REDISMODULE_API_FUNC(RedisModule_StringTruncate)(RedisModuleKey *key, size_t newlen);
+mstime_t REDISMODULE_API_FUNC(RedisModule_GetExpire)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_SetExpire)(RedisModuleKey *key, mstime_t expire);
+int REDISMODULE_API_FUNC(RedisModule_ZsetAdd)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr);
+int REDISMODULE_API_FUNC(RedisModule_ZsetIncrby)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore);
+int REDISMODULE_API_FUNC(RedisModule_ZsetScore)(RedisModuleKey *key, RedisModuleString *ele, double *score);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRem)(RedisModuleKey *key, RedisModuleString *ele, int *deleted);
+void REDISMODULE_API_FUNC(RedisModule_ZsetRangeStop)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetFirstInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex);
+int REDISMODULE_API_FUNC(RedisModule_ZsetLastInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex);
+int REDISMODULE_API_FUNC(RedisModule_ZsetFirstInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max);
+int REDISMODULE_API_FUNC(RedisModule_ZsetLastInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_ZsetRangeCurrentElement)(RedisModuleKey *key, double *score);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangeNext)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangePrev)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_ZsetRangeEndReached)(RedisModuleKey *key);
+int REDISMODULE_API_FUNC(RedisModule_HashSet)(RedisModuleKey *key, int flags, ...);
+int REDISMODULE_API_FUNC(RedisModule_HashGet)(RedisModuleKey *key, int flags, ...);
+int REDISMODULE_API_FUNC(RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos);
+unsigned long long REDISMODULE_API_FUNC(RedisModule_GetClientId)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_GetContextFlags)(RedisModuleCtx *ctx);
+void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes);
+RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods);
+int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value);
+RedisModuleType *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetType)(RedisModuleKey *key);
+void *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetValue)(RedisModuleKey *key);
+void REDISMODULE_API_FUNC(RedisModule_SaveUnsigned)(RedisModuleIO *io, uint64_t value);
+uint64_t REDISMODULE_API_FUNC(RedisModule_LoadUnsigned)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_SaveSigned)(RedisModuleIO *io, int64_t value);
+int64_t REDISMODULE_API_FUNC(RedisModule_LoadSigned)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_EmitAOF)(RedisModuleIO *io, const char *cmdname, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_SaveString)(RedisModuleIO *io, RedisModuleString *s);
+void REDISMODULE_API_FUNC(RedisModule_SaveStringBuffer)(RedisModuleIO *io, const char *str, size_t len);
+RedisModuleString *REDISMODULE_API_FUNC(RedisModule_LoadString)(RedisModuleIO *io);
+char *REDISMODULE_API_FUNC(RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr);
+void REDISMODULE_API_FUNC(RedisModule_SaveDouble)(RedisModuleIO *io, double value);
+double REDISMODULE_API_FUNC(RedisModule_LoadDouble)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value);
+float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io);
+void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...);
+void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...);
+int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str);
+int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b);
+RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io);
+long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void);
+void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele);
+void REDISMODULE_API_FUNC(RedisModule_DigestEndSequence)(RedisModuleDigest *md);
+
+/* Experimental APIs */
+#ifdef REDISMODULE_EXPERIMENTAL_API
+#define REDISMODULE_EXPERIMENTAL_API_VERSION 3
+RedisModuleBlockedClient *REDISMODULE_API_FUNC(RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms);
+int REDISMODULE_API_FUNC(RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata);
+int REDISMODULE_API_FUNC(RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx);
+void *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx);
+RedisModuleBlockedClient *REDISMODULE_API_FUNC(RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_AbortBlock)(RedisModuleBlockedClient *bc);
+RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc);
+void REDISMODULE_API_FUNC(RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx);
+int REDISMODULE_API_FUNC(RedisModule_SubscribeToKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb);
+int REDISMODULE_API_FUNC(RedisModule_BlockedClientDisconnected)(RedisModuleCtx *ctx);
+void REDISMODULE_API_FUNC(RedisModule_RegisterClusterMessageReceiver)(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback);
+int REDISMODULE_API_FUNC(RedisModule_SendClusterMessage)(RedisModuleCtx *ctx, char *target_id, uint8_t type, unsigned char *msg, uint32_t len);
+int REDISMODULE_API_FUNC(RedisModule_GetClusterNodeInfo)(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags);
+char **REDISMODULE_API_FUNC(RedisModule_GetClusterNodesList)(RedisModuleCtx *ctx, size_t *numnodes);
+void REDISMODULE_API_FUNC(RedisModule_FreeClusterNodesList)(char **ids);
+RedisModuleTimerID REDISMODULE_API_FUNC(RedisModule_CreateTimer)(RedisModuleCtx *ctx, mstime_t period, RedisModuleTimerProc callback, void *data);
+int REDISMODULE_API_FUNC(RedisModule_StopTimer)(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data);
+int REDISMODULE_API_FUNC(RedisModule_GetTimerInfo)(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data);
+const char *REDISMODULE_API_FUNC(RedisModule_GetMyClusterID)(void);
+size_t REDISMODULE_API_FUNC(RedisModule_GetClusterSize)(void);
+void REDISMODULE_API_FUNC(RedisModule_GetRandomBytes)(unsigned char *dst, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_GetRandomHexChars)(char *dst, size_t len);
+void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback);
+#endif
+
+/* This is included inline inside each Redis module. */
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) __attribute__((unused));
+static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) {
+ void *getapifuncptr = ((void**)ctx)[0];
+ RedisModule_GetApi = (int (*)(const char *, void *)) (unsigned long)getapifuncptr;
+ REDISMODULE_GET_API(Alloc);
+ REDISMODULE_GET_API(Calloc);
+ REDISMODULE_GET_API(Free);
+ REDISMODULE_GET_API(Realloc);
+ REDISMODULE_GET_API(Strdup);
+ REDISMODULE_GET_API(CreateCommand);
+ REDISMODULE_GET_API(SetModuleAttribs);
+ REDISMODULE_GET_API(IsModuleNameBusy);
+ REDISMODULE_GET_API(WrongArity);
+ REDISMODULE_GET_API(ReplyWithLongLong);
+ REDISMODULE_GET_API(ReplyWithError);
+ REDISMODULE_GET_API(ReplyWithSimpleString);
+ REDISMODULE_GET_API(ReplyWithArray);
+ REDISMODULE_GET_API(ReplySetArrayLength);
+ REDISMODULE_GET_API(ReplyWithStringBuffer);
+ REDISMODULE_GET_API(ReplyWithString);
+ REDISMODULE_GET_API(ReplyWithNull);
+ REDISMODULE_GET_API(ReplyWithCallReply);
+ REDISMODULE_GET_API(ReplyWithDouble);
+ REDISMODULE_GET_API(ReplySetArrayLength);
+ REDISMODULE_GET_API(GetSelectedDb);
+ REDISMODULE_GET_API(SelectDb);
+ REDISMODULE_GET_API(OpenKey);
+ REDISMODULE_GET_API(CloseKey);
+ REDISMODULE_GET_API(KeyType);
+ REDISMODULE_GET_API(ValueLength);
+ REDISMODULE_GET_API(ListPush);
+ REDISMODULE_GET_API(ListPop);
+ REDISMODULE_GET_API(StringToLongLong);
+ REDISMODULE_GET_API(StringToDouble);
+ REDISMODULE_GET_API(Call);
+ REDISMODULE_GET_API(CallReplyProto);
+ REDISMODULE_GET_API(FreeCallReply);
+ REDISMODULE_GET_API(CallReplyInteger);
+ REDISMODULE_GET_API(CallReplyType);
+ REDISMODULE_GET_API(CallReplyLength);
+ REDISMODULE_GET_API(CallReplyArrayElement);
+ REDISMODULE_GET_API(CallReplyStringPtr);
+ REDISMODULE_GET_API(CreateStringFromCallReply);
+ REDISMODULE_GET_API(CreateString);
+ REDISMODULE_GET_API(CreateStringFromLongLong);
+ REDISMODULE_GET_API(CreateStringFromString);
+ REDISMODULE_GET_API(CreateStringPrintf);
+ REDISMODULE_GET_API(FreeString);
+ REDISMODULE_GET_API(StringPtrLen);
+ REDISMODULE_GET_API(AutoMemory);
+ REDISMODULE_GET_API(Replicate);
+ REDISMODULE_GET_API(ReplicateVerbatim);
+ REDISMODULE_GET_API(DeleteKey);
+ REDISMODULE_GET_API(UnlinkKey);
+ REDISMODULE_GET_API(StringSet);
+ REDISMODULE_GET_API(StringDMA);
+ REDISMODULE_GET_API(StringTruncate);
+ REDISMODULE_GET_API(GetExpire);
+ REDISMODULE_GET_API(SetExpire);
+ REDISMODULE_GET_API(ZsetAdd);
+ REDISMODULE_GET_API(ZsetIncrby);
+ REDISMODULE_GET_API(ZsetScore);
+ REDISMODULE_GET_API(ZsetRem);
+ REDISMODULE_GET_API(ZsetRangeStop);
+ REDISMODULE_GET_API(ZsetFirstInScoreRange);
+ REDISMODULE_GET_API(ZsetLastInScoreRange);
+ REDISMODULE_GET_API(ZsetFirstInLexRange);
+ REDISMODULE_GET_API(ZsetLastInLexRange);
+ REDISMODULE_GET_API(ZsetRangeCurrentElement);
+ REDISMODULE_GET_API(ZsetRangeNext);
+ REDISMODULE_GET_API(ZsetRangePrev);
+ REDISMODULE_GET_API(ZsetRangeEndReached);
+ REDISMODULE_GET_API(HashSet);
+ REDISMODULE_GET_API(HashGet);
+ REDISMODULE_GET_API(IsKeysPositionRequest);
+ REDISMODULE_GET_API(KeyAtPos);
+ REDISMODULE_GET_API(GetClientId);
+ REDISMODULE_GET_API(GetContextFlags);
+ REDISMODULE_GET_API(PoolAlloc);
+ REDISMODULE_GET_API(CreateDataType);
+ REDISMODULE_GET_API(ModuleTypeSetValue);
+ REDISMODULE_GET_API(ModuleTypeGetType);
+ REDISMODULE_GET_API(ModuleTypeGetValue);
+ REDISMODULE_GET_API(SaveUnsigned);
+ REDISMODULE_GET_API(LoadUnsigned);
+ REDISMODULE_GET_API(SaveSigned);
+ REDISMODULE_GET_API(LoadSigned);
+ REDISMODULE_GET_API(SaveString);
+ REDISMODULE_GET_API(SaveStringBuffer);
+ REDISMODULE_GET_API(LoadString);
+ REDISMODULE_GET_API(LoadStringBuffer);
+ REDISMODULE_GET_API(SaveDouble);
+ REDISMODULE_GET_API(LoadDouble);
+ REDISMODULE_GET_API(SaveFloat);
+ REDISMODULE_GET_API(LoadFloat);
+ REDISMODULE_GET_API(EmitAOF);
+ REDISMODULE_GET_API(Log);
+ REDISMODULE_GET_API(LogIOError);
+ REDISMODULE_GET_API(StringAppendBuffer);
+ REDISMODULE_GET_API(RetainString);
+ REDISMODULE_GET_API(StringCompare);
+ REDISMODULE_GET_API(GetContextFromIO);
+ REDISMODULE_GET_API(Milliseconds);
+ REDISMODULE_GET_API(DigestAddStringBuffer);
+ REDISMODULE_GET_API(DigestAddLongLong);
+ REDISMODULE_GET_API(DigestEndSequence);
+
+#ifdef REDISMODULE_EXPERIMENTAL_API
+ REDISMODULE_GET_API(GetThreadSafeContext);
+ REDISMODULE_GET_API(FreeThreadSafeContext);
+ REDISMODULE_GET_API(ThreadSafeContextLock);
+ REDISMODULE_GET_API(ThreadSafeContextUnlock);
+ REDISMODULE_GET_API(BlockClient);
+ REDISMODULE_GET_API(UnblockClient);
+ REDISMODULE_GET_API(IsBlockedReplyRequest);
+ REDISMODULE_GET_API(IsBlockedTimeoutRequest);
+ REDISMODULE_GET_API(GetBlockedClientPrivateData);
+ REDISMODULE_GET_API(GetBlockedClientHandle);
+ REDISMODULE_GET_API(AbortBlock);
+ REDISMODULE_GET_API(SetDisconnectCallback);
+ REDISMODULE_GET_API(SubscribeToKeyspaceEvents);
+ REDISMODULE_GET_API(BlockedClientDisconnected);
+ REDISMODULE_GET_API(RegisterClusterMessageReceiver);
+ REDISMODULE_GET_API(SendClusterMessage);
+ REDISMODULE_GET_API(GetClusterNodeInfo);
+ REDISMODULE_GET_API(GetClusterNodesList);
+ REDISMODULE_GET_API(FreeClusterNodesList);
+ REDISMODULE_GET_API(CreateTimer);
+ REDISMODULE_GET_API(StopTimer);
+ REDISMODULE_GET_API(GetTimerInfo);
+ REDISMODULE_GET_API(GetMyClusterID);
+ REDISMODULE_GET_API(GetClusterSize);
+ REDISMODULE_GET_API(GetRandomBytes);
+ REDISMODULE_GET_API(GetRandomHexChars);
+#endif
+
+ if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR;
+ RedisModule_SetModuleAttribs(ctx,name,ver,apiver);
+ return REDISMODULE_OK;
+}
+
+#else
+
+/* Things only defined for the modules core, not exported to modules
+ * including this file. */
+#define RedisModuleString robj
+
+#endif /* REDISMODULE_CORE */
+#endif /* REDISMOUDLE_H */
diff --git a/src/replication.c b/src/replication.c
index 7e36c3e99..6d589c012 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -29,7 +29,7 @@
*/
-#include "redis.h"
+#include "server.h"
#include <sys/time.h>
#include <unistd.h>
@@ -40,21 +40,27 @@
void replicationDiscardCachedMaster(void);
void replicationResurrectCachedMaster(int newfd);
void replicationSendAck(void);
-void putSlaveOnline(redisClient *slave);
+void putSlaveOnline(client *slave);
+int cancelReplicationHandshake(void);
/* --------------------------- Utility functions ---------------------------- */
/* Return the pointer to a string representing the slave ip:listening_port
* pair. Mostly useful for logging, since we want to log a slave using its
- * IP address and it's listening port which is more clear for the user, for
+ * IP address and its listening port which is more clear for the user, for
* example: "Closing connection with slave 10.1.2.3:6380". */
-char *replicationGetSlaveName(redisClient *c) {
- static char buf[REDIS_PEER_ID_LEN];
- char ip[REDIS_IP_STR_LEN];
+char *replicationGetSlaveName(client *c) {
+ static char buf[NET_PEER_ID_LEN];
+ char ip[NET_IP_STR_LEN];
ip[0] = '\0';
buf[0] = '\0';
- if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) {
+ if (c->slave_ip[0] != '\0' ||
+ anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1)
+ {
+ /* Note that the 'ip' buffer is always larger than 'c->slave_ip' */
+ if (c->slave_ip[0] != '\0') memcpy(ip,c->slave_ip,sizeof(c->slave_ip));
+
if (c->slave_listening_port)
anetFormatAddr(buf,sizeof(buf),ip,c->slave_listening_port);
else
@@ -69,15 +75,10 @@ char *replicationGetSlaveName(redisClient *c) {
/* ---------------------------------- MASTER -------------------------------- */
void createReplicationBacklog(void) {
- redisAssert(server.repl_backlog == NULL);
+ serverAssert(server.repl_backlog == NULL);
server.repl_backlog = zmalloc(server.repl_backlog_size);
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
- /* When a new backlog buffer is created, we increment the replication
- * offset by one to make sure we'll not be able to PSYNC with any
- * previous slave. This is needed because we avoid incrementing the
- * master_repl_offset if no backlog exists nor slaves are attached. */
- server.master_repl_offset++;
/* We don't have any data inside our buffer, but virtually the first
* byte we have is the next byte that will be generated for the
@@ -92,8 +93,8 @@ void createReplicationBacklog(void) {
* the most recent bytes, or the same data and more free space in case the
* buffer is enlarged). */
void resizeReplicationBacklog(long long newsize) {
- if (newsize < REDIS_REPL_BACKLOG_MIN_SIZE)
- newsize = REDIS_REPL_BACKLOG_MIN_SIZE;
+ if (newsize < CONFIG_REPL_BACKLOG_MIN_SIZE)
+ newsize = CONFIG_REPL_BACKLOG_MIN_SIZE;
if (server.repl_backlog_size == newsize) return;
server.repl_backlog_size = newsize;
@@ -113,7 +114,7 @@ void resizeReplicationBacklog(long long newsize) {
}
void freeReplicationBacklog(void) {
- redisAssert(listLength(server.slaves) == 0);
+ serverAssert(listLength(server.slaves) == 0);
zfree(server.repl_backlog);
server.repl_backlog = NULL;
}
@@ -121,7 +122,7 @@ void freeReplicationBacklog(void) {
/* Add data to the replication backlog.
* This function also increments the global replication offset stored at
* server.master_repl_offset, because there is no case where we want to feed
- * the backlog without incrementing the buffer. */
+ * the backlog without incrementing the offset. */
void feedReplicationBacklog(void *ptr, size_t len) {
unsigned char *p = ptr;
@@ -150,11 +151,11 @@ void feedReplicationBacklog(void *ptr, size_t len) {
/* Wrapper for feedReplicationBacklog() that takes Redis string objects
* as input. */
void feedReplicationBacklogWithObject(robj *o) {
- char llstr[REDIS_LONGSTR_SIZE];
+ char llstr[LONG_STR_SIZE];
void *p;
size_t len;
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
len = ll2string(llstr,sizeof(llstr),(long)o->ptr);
p = llstr;
} else {
@@ -164,31 +165,43 @@ void feedReplicationBacklogWithObject(robj *o) {
feedReplicationBacklog(p,len);
}
+/* Propagate write commands to slaves, and populate the replication backlog
+ * as well. This function is used if the instance is a master: we use
+ * the commands received by our clients in order to create the replication
+ * stream. Instead if the instance is a slave and has sub-slaves attached,
+ * we use replicationFeedSlavesFromMaster() */
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int j, len;
- char llstr[REDIS_LONGSTR_SIZE];
+ char llstr[LONG_STR_SIZE];
+
+ /* If the instance is not a top level master, return ASAP: we'll just proxy
+ * the stream of data we receive from our master instead, in order to
+ * propagate *identical* replication stream. In this way this slave can
+ * advertise the same replication ID as the master (since it shares the
+ * master replication history and has the same backlog and offsets). */
+ if (server.masterhost != NULL) return;
/* If there aren't slaves, and there is no backlog buffer to populate,
* we can return ASAP. */
if (server.repl_backlog == NULL && listLength(slaves) == 0) return;
/* We can't have slaves attached and no backlog. */
- redisAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
+ serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
/* Send SELECT command to every slave if needed. */
if (server.slaveseldb != dictid) {
robj *selectcmd;
/* For a few DBs we have pre-computed SELECT command. */
- if (dictid >= 0 && dictid < REDIS_SHARED_SELECT_CMDS) {
+ if (dictid >= 0 && dictid < PROTO_SHARED_SELECT_CMDS) {
selectcmd = shared.select[dictid];
} else {
int dictid_len;
dictid_len = ll2string(llstr,sizeof(llstr),dictid);
- selectcmd = createObject(REDIS_STRING,
+ selectcmd = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),
"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
dictid_len, llstr));
@@ -200,18 +213,19 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* Send it to slaves. */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
addReply(slave,selectcmd);
}
- if (dictid < 0 || dictid >= REDIS_SHARED_SELECT_CMDS)
+ if (dictid < 0 || dictid >= PROTO_SHARED_SELECT_CMDS)
decrRefCount(selectcmd);
}
server.slaveseldb = dictid;
/* Write the command to the replication backlog if any. */
if (server.repl_backlog) {
- char aux[REDIS_LONGSTR_SIZE+3];
+ char aux[LONG_STR_SIZE+3];
/* Add the multi bulk reply length. */
aux[0] = '*';
@@ -237,12 +251,12 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
}
/* Write the command to every slave. */
- listRewind(server.slaves,&li);
+ listRewind(slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start */
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
/* Feed slaves that are waiting for the initial SYNC (so these commands
* are queued in the output buffer until the initial SYNC completes),
@@ -258,7 +272,35 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
}
}
-void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **argv, int argc) {
+/* This function is used in order to proxy what we receive from our master
+ * to our sub-slaves. */
+#include <ctype.h>
+void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen) {
+ listNode *ln;
+ listIter li;
+
+ /* Debugging: this is handy to see the stream sent from master
+ * to slaves. Disabled with if(0). */
+ if (0) {
+ printf("%zu:",buflen);
+ for (size_t j = 0; j < buflen; j++) {
+ printf("%c", isprint(buf[j]) ? buf[j] : '.');
+ }
+ printf("\n");
+ }
+
+ if (server.repl_backlog) feedReplicationBacklog(buf,buflen);
+ listRewind(slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ /* Don't feed slaves that are still waiting for BGSAVE to start */
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
+ addReplyString(slave,buf,buflen);
+ }
+}
+
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc) {
listNode *ln;
listIter li;
int j;
@@ -268,16 +310,16 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
gettimeofday(&tv,NULL);
cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec);
- if (c->flags & REDIS_LUA_CLIENT) {
+ if (c->flags & CLIENT_LUA) {
cmdrepr = sdscatprintf(cmdrepr,"[%d lua] ",dictid);
- } else if (c->flags & REDIS_UNIX_SOCKET) {
+ } else if (c->flags & CLIENT_UNIX_SOCKET) {
cmdrepr = sdscatprintf(cmdrepr,"[%d unix:%s] ",dictid,server.unixsocket);
} else {
cmdrepr = sdscatprintf(cmdrepr,"[%d %s] ",dictid,getClientPeerId(c));
}
for (j = 0; j < argc; j++) {
- if (argv[j]->encoding == REDIS_ENCODING_INT) {
+ if (argv[j]->encoding == OBJ_ENCODING_INT) {
cmdrepr = sdscatprintf(cmdrepr, "\"%ld\"", (long)argv[j]->ptr);
} else {
cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
@@ -287,11 +329,11 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
cmdrepr = sdscatlen(cmdrepr," ",1);
}
cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
- cmdobj = createObject(REDIS_STRING,cmdrepr);
+ cmdobj = createObject(OBJ_STRING,cmdrepr);
listRewind(monitors,&li);
while((ln = listNext(&li))) {
- redisClient *monitor = ln->value;
+ client *monitor = ln->value;
addReply(monitor,cmdobj);
}
decrRefCount(cmdobj);
@@ -299,35 +341,35 @@ void replicationFeedMonitors(redisClient *c, list *monitors, int dictid, robj **
/* Feed the slave 'c' with the replication backlog starting from the
* specified 'offset' up to the end of the backlog. */
-long long addReplyReplicationBacklog(redisClient *c, long long offset) {
+long long addReplyReplicationBacklog(client *c, long long offset) {
long long j, skip, len;
- redisLog(REDIS_DEBUG, "[PSYNC] Slave request offset: %lld", offset);
+ serverLog(LL_DEBUG, "[PSYNC] Slave request offset: %lld", offset);
if (server.repl_backlog_histlen == 0) {
- redisLog(REDIS_DEBUG, "[PSYNC] Backlog history len is zero");
+ serverLog(LL_DEBUG, "[PSYNC] Backlog history len is zero");
return 0;
}
- redisLog(REDIS_DEBUG, "[PSYNC] Backlog size: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] Backlog size: %lld",
server.repl_backlog_size);
- redisLog(REDIS_DEBUG, "[PSYNC] First byte: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] First byte: %lld",
server.repl_backlog_off);
- redisLog(REDIS_DEBUG, "[PSYNC] History len: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] History len: %lld",
server.repl_backlog_histlen);
- redisLog(REDIS_DEBUG, "[PSYNC] Current index: %lld",
+ serverLog(LL_DEBUG, "[PSYNC] Current index: %lld",
server.repl_backlog_idx);
/* Compute the amount of bytes we need to discard. */
skip = offset - server.repl_backlog_off;
- redisLog(REDIS_DEBUG, "[PSYNC] Skipping: %lld", skip);
+ serverLog(LL_DEBUG, "[PSYNC] Skipping: %lld", skip);
- /* Point j to the oldest byte, that is actaully our
+ /* Point j to the oldest byte, that is actually our
* server.repl_backlog_off byte. */
j = (server.repl_backlog_idx +
(server.repl_backlog_size-server.repl_backlog_histlen)) %
server.repl_backlog_size;
- redisLog(REDIS_DEBUG, "[PSYNC] Index of first byte: %lld", j);
+ serverLog(LL_DEBUG, "[PSYNC] Index of first byte: %lld", j);
/* Discard the amount of data to seek to the specified 'offset'. */
j = (j + skip) % server.repl_backlog_size;
@@ -335,13 +377,13 @@ long long addReplyReplicationBacklog(redisClient *c, long long offset) {
/* Feed slave with data. Since it is a circular buffer we have to
* split the reply in two parts if we are cross-boundary. */
len = server.repl_backlog_histlen - skip;
- redisLog(REDIS_DEBUG, "[PSYNC] Reply total length: %lld", len);
+ serverLog(LL_DEBUG, "[PSYNC] Reply total length: %lld", len);
while(len) {
long long thislen =
((server.repl_backlog_size - j) < len) ?
(server.repl_backlog_size - j) : len;
- redisLog(REDIS_DEBUG, "[PSYNC] addReply() length: %lld", thislen);
+ serverLog(LL_DEBUG, "[PSYNC] addReply() length: %lld", thislen);
addReplySds(c,sdsnewlen(server.repl_backlog + j, thislen));
len -= thislen;
j = 0;
@@ -349,44 +391,111 @@ long long addReplyReplicationBacklog(redisClient *c, long long offset) {
return server.repl_backlog_histlen - skip;
}
+/* Return the offset to provide as reply to the PSYNC command received
+ * from the slave. The returned value is only valid immediately after
+ * the BGSAVE process started and before executing any other command
+ * from clients. */
+long long getPsyncInitialOffset(void) {
+ return server.master_repl_offset;
+}
+
+/* Send a FULLRESYNC reply in the specific case of a full resynchronization,
+ * as a side effect setup the slave for a full sync in different ways:
+ *
+ * 1) Remember, into the slave client structure, the replication offset
+ * we sent here, so that if new slaves will later attach to the same
+ * background RDB saving process (by duplicating this client output
+ * buffer), we can get the right offset from this slave.
+ * 2) Set the replication state of the slave to WAIT_BGSAVE_END so that
+ * we start accumulating differences from this point.
+ * 3) Force the replication stream to re-emit a SELECT statement so
+ * the new slave incremental differences will start selecting the
+ * right database number.
+ *
+ * Normally this function should be called immediately after a successful
+ * BGSAVE for replication was started, or when there is one already in
+ * progress that we attached our slave to. */
+int replicationSetupSlaveForFullResync(client *slave, long long offset) {
+ char buf[128];
+ int buflen;
+
+ slave->psync_initial_offset = offset;
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_END;
+ /* We are going to accumulate the incremental changes for this
+ * slave as well. Set slaveseldb to -1 in order to force to re-emit
+ * a SELECT statement in the replication stream. */
+ server.slaveseldb = -1;
+
+ /* Don't send this reply to slaves that approached us with
+ * the old SYNC command. */
+ if (!(slave->flags & CLIENT_PRE_PSYNC)) {
+ buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
+ server.replid,offset);
+ if (write(slave->fd,buf,buflen) != buflen) {
+ freeClientAsync(slave);
+ return C_ERR;
+ }
+ }
+ return C_OK;
+}
+
/* This function handles the PSYNC command from the point of view of a
* master receiving a request for partial resynchronization.
*
- * On success return REDIS_OK, otherwise REDIS_ERR is returned and we proceed
+ * On success return C_OK, otherwise C_ERR is returned and we proceed
* with the usual full resync. */
-int masterTryPartialResynchronization(redisClient *c) {
+int masterTryPartialResynchronization(client *c) {
long long psync_offset, psync_len;
- char *master_runid = c->argv[1]->ptr;
+ char *master_replid = c->argv[1]->ptr;
char buf[128];
int buflen;
- /* Is the runid of this master the same advertised by the wannabe slave
- * via PSYNC? If runid changed this master is a different instance and
- * there is no way to continue. */
- if (strcasecmp(master_runid, server.runid)) {
+ /* Parse the replication offset asked by the slave. Go to full sync
+ * on parse error: this should never happen but we try to handle
+ * it in a robust way compared to aborting. */
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
+ C_OK) goto need_full_resync;
+
+ /* Is the replication ID of this master the same advertised by the wannabe
+ * slave via PSYNC? If the replication ID changed this master has a
+ * different replication history, and there is no way to continue.
+ *
+ * Note that there are two potentially valid replication IDs: the ID1
+ * and the ID2. The ID2 however is only valid up to a specific offset. */
+ if (strcasecmp(master_replid, server.replid) &&
+ (strcasecmp(master_replid, server.replid2) ||
+ psync_offset > server.second_replid_offset))
+ {
/* Run id "?" is used by slaves that want to force a full resync. */
- if (master_runid[0] != '?') {
- redisLog(REDIS_NOTICE,"Partial resynchronization not accepted: "
- "Runid mismatch (Client asked for runid '%s', my runid is '%s')",
- master_runid, server.runid);
+ if (master_replid[0] != '?') {
+ if (strcasecmp(master_replid, server.replid) &&
+ strcasecmp(master_replid, server.replid2))
+ {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Replication ID mismatch (Slave asked for '%s', my "
+ "replication IDs are '%s' and '%s')",
+ master_replid, server.replid, server.replid2);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not accepted: "
+ "Requested offset for second ID was %lld, but I can reply "
+ "up to %lld", psync_offset, server.second_replid_offset);
+ }
} else {
- redisLog(REDIS_NOTICE,"Full resync requested by slave %s",
+ serverLog(LL_NOTICE,"Full resync requested by slave %s",
replicationGetSlaveName(c));
}
goto need_full_resync;
}
/* We still have the data our slave is asking for? */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&psync_offset,NULL) !=
- REDIS_OK) goto need_full_resync;
if (!server.repl_backlog ||
psync_offset < server.repl_backlog_off ||
psync_offset > (server.repl_backlog_off + server.repl_backlog_histlen))
{
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Unable to partial resync with slave %s for lack of backlog (Slave request was: %lld).", replicationGetSlaveName(c), psync_offset);
if (psync_offset > server.master_repl_offset) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Warning: slave %s tried to PSYNC with an offset that is greater than the master replication offset.", replicationGetSlaveName(c));
}
goto need_full_resync;
@@ -396,21 +505,25 @@ int masterTryPartialResynchronization(redisClient *c) {
* 1) Set client state to make it a slave.
* 2) Inform the client we can continue with +CONTINUE
* 3) Send the backlog data (from the offset to the end) to the slave. */
- c->flags |= REDIS_SLAVE;
- c->replstate = REDIS_REPL_ONLINE;
+ c->flags |= CLIENT_SLAVE;
+ c->replstate = SLAVE_STATE_ONLINE;
c->repl_ack_time = server.unixtime;
c->repl_put_online_on_ack = 0;
listAddNodeTail(server.slaves,c);
/* We can't use the connection buffers since they are used to accumulate
* new commands at this stage. But we are sure the socket send buffer is
* empty so this write will never fail actually. */
- buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
+ if (c->slave_capa & SLAVE_CAPA_PSYNC2) {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE %s\r\n", server.replid);
+ } else {
+ buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
+ }
if (write(c->fd,buf,buflen) != buflen) {
freeClientAsync(c);
- return REDIS_OK;
+ return C_OK;
}
psync_len = addReplyReplicationBacklog(c,psync_offset);
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Partial resynchronization request from %s accepted. Sending %lld bytes of backlog starting from offset %lld.",
replicationGetSlaveName(c),
psync_len, psync_offset);
@@ -419,55 +532,106 @@ int masterTryPartialResynchronization(redisClient *c) {
* has this state from the previous connection with the master. */
refreshGoodSlavesCount();
- return REDIS_OK; /* The caller can return, no full resync needed. */
+ return C_OK; /* The caller can return, no full resync needed. */
need_full_resync:
- /* We need a full resync for some reason... notify the client. */
- psync_offset = server.master_repl_offset;
- /* Add 1 to psync_offset if it the replication backlog does not exists
- * as when it will be created later we'll increment the offset by one. */
- if (server.repl_backlog == NULL) psync_offset++;
- /* Again, we can't use the connection buffers (see above). */
- buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
- server.runid,psync_offset);
- if (write(c->fd,buf,buflen) != buflen) {
- freeClientAsync(c);
- return REDIS_OK;
- }
- return REDIS_ERR;
+ /* We need a full resync for some reason... Note that we can't
+ * reply to PSYNC right now if a full SYNC is needed. The reply
+ * must include the master offset at the time the RDB file we transfer
+ * is generated, so we need to delay the reply to that moment. */
+ return C_ERR;
}
/* Start a BGSAVE for replication goals, which is, selecting the disk or
* socket target depending on the configuration, and making sure that
* the script cache is flushed before to start.
*
- * Returns REDIS_OK on success or REDIS_ERR otherwise. */
-int startBgsaveForReplication(void) {
+ * The mincapa argument is the bitwise AND among all the slaves capabilities
+ * of the slaves waiting for this BGSAVE, so represents the slave capabilities
+ * all the slaves support. Can be tested via SLAVE_CAPA_* macros.
+ *
+ * Side effects, other than starting a BGSAVE:
+ *
+ * 1) Handle the slaves in WAIT_START state, by preparing them for a full
+ * sync if the BGSAVE was successfully started, or sending them an error
+ * and dropping them from the list of slaves.
+ *
+ * 2) Flush the Lua scripting script cache if the BGSAVE was actually
+ * started.
+ *
+ * Returns C_OK on success or C_ERR otherwise. */
+int startBgsaveForReplication(int mincapa) {
int retval;
+ int socket_target = server.repl_diskless_sync && (mincapa & SLAVE_CAPA_EOF);
+ listIter li;
+ listNode *ln;
+
+ serverLog(LL_NOTICE,"Starting BGSAVE for SYNC with target: %s",
+ socket_target ? "slaves sockets" : "disk");
+
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ /* Only do rdbSave* when rsiptr is not NULL,
+ * otherwise slave will miss repl-stream-db. */
+ if (rsiptr) {
+ if (socket_target)
+ retval = rdbSaveToSlavesSockets(rsiptr);
+ else
+ retval = rdbSaveBackground(server.rdb_filename,rsiptr);
+ } else {
+ serverLog(LL_WARNING,"BGSAVE for replication: replication information not available, can't generate the RDB file right now. Try later.");
+ retval = C_ERR;
+ }
- redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC with target: %s",
- server.repl_diskless_sync ? "slaves sockets" : "disk");
+ /* If we failed to BGSAVE, remove the slaves waiting for a full
+ * resynchorinization from the list of salves, inform them with
+ * an error about what happened, close the connection ASAP. */
+ if (retval == C_ERR) {
+ serverLog(LL_WARNING,"BGSAVE for replication failed");
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ slave->flags &= ~CLIENT_SLAVE;
+ listDelNode(server.slaves,ln);
+ addReplyError(slave,
+ "BGSAVE failed, replication can't continue");
+ slave->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ }
+ }
+ return retval;
+ }
- if (server.repl_diskless_sync)
- retval = rdbSaveToSlavesSockets();
- else
- retval = rdbSaveBackground(server.rdb_filename);
+ /* If the target is socket, rdbSaveToSlavesSockets() already setup
+ * the salves for a full resync. Otherwise for disk target do it now.*/
+ if (!socket_target) {
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
+
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
+ replicationSetupSlaveForFullResync(slave,
+ getPsyncInitialOffset());
+ }
+ }
+ }
/* Flush the script cache, since we need that slave differences are
* accumulated without requiring slaves to match our cached scripts. */
- if (retval == REDIS_OK) replicationScriptCacheFlush();
+ if (retval == C_OK) replicationScriptCacheFlush();
return retval;
}
/* SYNC and PSYNC command implemenation. */
-void syncCommand(redisClient *c) {
+void syncCommand(client *c) {
/* ignore SYNC if already slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
+ if (c->flags & CLIENT_SLAVE) return;
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
- if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {
- addReplyError(c,"Can't SYNC while not connected with my master");
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
+ addReplySds(c,sdsnew("-NOMASTERLINK Can't SYNC while not connected with my master\r\n"));
return;
}
@@ -475,12 +639,12 @@ void syncCommand(redisClient *c) {
* the client about already issued commands. We need a fresh reply
* buffer registering the differences between the BGSAVE and the current
* dataset, so that we can copy to other slaves if needed. */
- if (listLength(c->reply) != 0 || c->bufpos != 0) {
+ if (clientHasPendingReplies(c)) {
addReplyError(c,"SYNC and PSYNC are invalid with pending output");
return;
}
- redisLog(REDIS_NOTICE,"Slave %s asks for synchronization",
+ serverLog(LL_NOTICE,"Slave %s asks for synchronization",
replicationGetSlaveName(c));
/* Try a partial resynchronization if this is a PSYNC command.
@@ -488,97 +652,112 @@ void syncCommand(redisClient *c) {
* when this happens masterTryPartialResynchronization() already
* replied with:
*
- * +FULLRESYNC <runid> <offset>
+ * +FULLRESYNC <replid> <offset>
*
- * So the slave knows the new runid and offset to try a PSYNC later
+ * So the slave knows the new replid and offset to try a PSYNC later
* if the connection with the master is lost. */
if (!strcasecmp(c->argv[0]->ptr,"psync")) {
- if (masterTryPartialResynchronization(c) == REDIS_OK) {
+ if (masterTryPartialResynchronization(c) == C_OK) {
server.stat_sync_partial_ok++;
return; /* No full resync needed, return. */
} else {
- char *master_runid = c->argv[1]->ptr;
+ char *master_replid = c->argv[1]->ptr;
/* Increment stats for failed PSYNCs, but only if the
- * runid is not "?", as this is used by slaves to force a full
+ * replid is not "?", as this is used by slaves to force a full
* resync on purpose when they are not albe to partially
* resync. */
- if (master_runid[0] != '?') server.stat_sync_partial_err++;
+ if (master_replid[0] != '?') server.stat_sync_partial_err++;
}
} else {
/* If a slave uses SYNC, we are dealing with an old implementation
* of the replication protocol (like redis-cli --slave). Flag the client
* so that we don't expect to receive REPLCONF ACK feedbacks. */
- c->flags |= REDIS_PRE_PSYNC;
+ c->flags |= CLIENT_PRE_PSYNC;
}
/* Full resynchronization. */
server.stat_sync_full++;
- /* Here we need to check if there is a background saving operation
- * in progress, or if it is required to start one */
+ /* Setup the slave as one waiting for BGSAVE to start. The following code
+ * paths will change the state if we handle the slave differently. */
+ c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
+ if (server.repl_disable_tcp_nodelay)
+ anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
+ c->repldbfd = -1;
+ c->flags |= CLIENT_SLAVE;
+ listAddNodeTail(server.slaves,c);
+
+ /* Create the replication backlog if needed. */
+ if (listLength(server.slaves) == 1 && server.repl_backlog == NULL) {
+ /* When we create the backlog from scratch, we always use a new
+ * replication ID and clear the ID2, since there is no valid
+ * past history. */
+ changeReplicationId();
+ clearReplicationId2();
+ createReplicationBacklog();
+ }
+
+ /* CASE 1: BGSAVE is in progress, with disk target. */
if (server.rdb_child_pid != -1 &&
- server.rdb_child_type == REDIS_RDB_CHILD_TYPE_DISK)
+ server.rdb_child_type == RDB_CHILD_TYPE_DISK)
{
/* Ok a background save is in progress. Let's check if it is a good
* one for replication, i.e. if there is another slave that is
* registering differences since the server forked to save. */
- redisClient *slave;
+ client *slave;
listNode *ln;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break;
}
- if (ln) {
+ /* To attach this slave, we check that it has at least all the
+ * capabilities of the slave that triggered the current BGSAVE. */
+ if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) {
/* Perfect, the server is already registering differences for
* another slave. Set the right state, and copy the buffer. */
copyClientOutputBuffer(c,slave);
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
+ replicationSetupSlaveForFullResync(c,slave->psync_initial_offset);
+ serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
/* No way, we need to wait for the next BGSAVE in order to
* register differences. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
- redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
+ serverLog(LL_NOTICE,"Can't attach the slave to the current BGSAVE. Waiting for next BGSAVE for SYNC");
}
+
+ /* CASE 2: BGSAVE is in progress, with socket target. */
} else if (server.rdb_child_pid != -1 &&
- server.rdb_child_type == REDIS_RDB_CHILD_TYPE_SOCKET)
+ server.rdb_child_type == RDB_CHILD_TYPE_SOCKET)
{
/* There is an RDB child process but it is writing directly to
* children sockets. We need to wait for the next BGSAVE
* in order to synchronize. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
- redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
+ serverLog(LL_NOTICE,"Current BGSAVE has socket target. Waiting for next BGSAVE for SYNC");
+
+ /* CASE 3: There is no BGSAVE is progress. */
} else {
- if (server.repl_diskless_sync) {
+ if (server.repl_diskless_sync && (c->slave_capa & SLAVE_CAPA_EOF)) {
/* Diskless replication RDB child is created inside
* replicationCron() since we want to delay its start a
* few seconds to wait for more slaves to arrive. */
- c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
if (server.repl_diskless_sync_delay)
- redisLog(REDIS_NOTICE,"Delay next BGSAVE for SYNC");
+ serverLog(LL_NOTICE,"Delay next BGSAVE for diskless SYNC");
} else {
- /* Ok we don't have a BGSAVE in progress, let's start one. */
- if (startBgsaveForReplication() != REDIS_OK) {
- redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
- addReplyError(c,"Unable to perform background save");
- return;
+ /* Target is disk (or the slave is not capable of supporting
+ * diskless replication) and we don't have a BGSAVE in progress,
+ * let's start one. */
+ if (server.aof_child_pid == -1) {
+ startBgsaveForReplication(c->slave_capa);
+ } else {
+ serverLog(LL_NOTICE,
+ "No BGSAVE in progress, but an AOF rewrite is active. "
+ "BGSAVE for replication delayed");
}
- c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
}
}
-
- if (server.repl_disable_tcp_nodelay)
- anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
- c->repldbfd = -1;
- c->flags |= REDIS_SLAVE;
- server.slaveseldb = -1; /* Force to re-emit the SELECT command. */
- listAddNodeTail(server.slaves,c);
- if (listLength(server.slaves) == 1 && server.repl_backlog == NULL)
- createReplicationBacklog();
return;
}
@@ -594,7 +773,7 @@ void syncCommand(redisClient *c) {
* In the future the same command can be used in order to configure
* the replication to initiate an incremental replication instead of a
* full resync. */
-void replconfCommand(redisClient *c) {
+void replconfCommand(client *c) {
int j;
if ((c->argc % 2) == 0) {
@@ -610,17 +789,32 @@ void replconfCommand(redisClient *c) {
long port;
if ((getLongFromObjectOrReply(c,c->argv[j+1],
- &port,NULL) != REDIS_OK))
+ &port,NULL) != C_OK))
return;
c->slave_listening_port = port;
+ } else if (!strcasecmp(c->argv[j]->ptr,"ip-address")) {
+ sds ip = c->argv[j+1]->ptr;
+ if (sdslen(ip) < sizeof(c->slave_ip)) {
+ memcpy(c->slave_ip,ip,sdslen(ip)+1);
+ } else {
+ addReplyErrorFormat(c,"REPLCONF ip-address provided by "
+ "slave instance is too long: %zd bytes", sdslen(ip));
+ return;
+ }
+ } else if (!strcasecmp(c->argv[j]->ptr,"capa")) {
+ /* Ignore capabilities not understood by this master. */
+ if (!strcasecmp(c->argv[j+1]->ptr,"eof"))
+ c->slave_capa |= SLAVE_CAPA_EOF;
+ else if (!strcasecmp(c->argv[j+1]->ptr,"psync2"))
+ c->slave_capa |= SLAVE_CAPA_PSYNC2;
} else if (!strcasecmp(c->argv[j]->ptr,"ack")) {
/* REPLCONF ACK is used by slave to inform the master the amount
* of replication stream that it processed so far. It is an
* internal only command that normal clients should never use. */
long long offset;
- if (!(c->flags & REDIS_SLAVE)) return;
- if ((getLongLongFromObject(c->argv[j+1], &offset) != REDIS_OK))
+ if (!(c->flags & CLIENT_SLAVE)) return;
+ if ((getLongLongFromObject(c->argv[j+1], &offset) != C_OK))
return;
if (offset > c->repl_ack_off)
c->repl_ack_off = offset;
@@ -628,7 +822,7 @@ void replconfCommand(redisClient *c) {
/* If this was a diskless replication, we need to really put
* the slave online when the first ACK is received (which
* confirms slave is online and ready to get more data). */
- if (c->repl_put_online_on_ack && c->replstate == REDIS_REPL_ONLINE)
+ if (c->repl_put_online_on_ack && c->replstate == SLAVE_STATE_ONLINE)
putSlaveOnline(c);
/* Note: this command does not reply anything! */
return;
@@ -636,7 +830,7 @@ void replconfCommand(redisClient *c) {
/* REPLCONF GETACK is used in order to request an ACK ASAP
* to the slave. */
if (server.masterhost && server.master) replicationSendAck();
- /* Note: this command does not reply anything! */
+ return;
} else {
addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s",
(char*)c->argv[j]->ptr);
@@ -652,31 +846,32 @@ void replconfCommand(redisClient *c) {
*
* It does a few things:
*
- * 1) Put the slave in ONLINE state.
+ * 1) Put the slave in ONLINE state (useless when the function is called
+ * because state is already ONLINE but repl_put_online_on_ack is true).
* 2) Make sure the writable event is re-installed, since calling the SYNC
* command disables it, so that we can accumulate output buffer without
* sending it to the slave.
* 3) Update the count of good slaves. */
-void putSlaveOnline(redisClient *slave) {
- slave->replstate = REDIS_REPL_ONLINE;
+void putSlaveOnline(client *slave) {
+ slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 0;
- slave->repl_ack_time = server.unixtime;
+ slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
sendReplyToClient, slave) == AE_ERR) {
- redisLog(REDIS_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno));
+ serverLog(LL_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno));
freeClient(slave);
return;
}
refreshGoodSlavesCount();
- redisLog(REDIS_NOTICE,"Synchronization with slave %s succeeded",
+ serverLog(LL_NOTICE,"Synchronization with slave %s succeeded",
replicationGetSlaveName(slave));
}
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
- redisClient *slave = privdata;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(mask);
- char buf[REDIS_IOBUF_LEN];
+ client *slave = privdata;
+ UNUSED(el);
+ UNUSED(mask);
+ char buf[PROTO_IOBUF_LEN];
ssize_t nwritten, buflen;
/* Before sending the RDB file, we send the preamble as configured by the
@@ -685,7 +880,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
if (slave->replpreamble) {
nwritten = write(fd,slave->replpreamble,sdslen(slave->replpreamble));
if (nwritten == -1) {
- redisLog(REDIS_VERBOSE,"Write error sending RDB preamble to slave: %s",
+ serverLog(LL_VERBOSE,"Write error sending RDB preamble to slave: %s",
strerror(errno));
freeClient(slave);
return;
@@ -701,18 +896,18 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
}
}
- /* If the preamble was already transfered, send the RDB bulk data. */
+ /* If the preamble was already transferred, send the RDB bulk data. */
lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
- buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
+ buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN);
if (buflen <= 0) {
- redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
+ serverLog(LL_WARNING,"Read error sending DB to slave: %s",
(buflen == 0) ? "premature EOF" : strerror(errno));
freeClient(slave);
return;
}
if ((nwritten = write(fd,buf,buflen)) == -1) {
if (errno != EAGAIN) {
- redisLog(REDIS_WARNING,"Write error sending DB to slave: %s",
+ serverLog(LL_WARNING,"Write error sending DB to slave: %s",
strerror(errno));
freeClient(slave);
}
@@ -738,23 +933,25 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
* BGSAVE was in progress, but it was not a good one for replication (no
* other slave was accumulating differences).
*
- * The argument bgsaveerr is REDIS_OK if the background saving succeeded
- * otherwise REDIS_ERR is passed to the function.
+ * The argument bgsaveerr is C_OK if the background saving succeeded
+ * otherwise C_ERR is passed to the function.
* The 'type' argument is the type of the child that terminated
* (if it had a disk or socket target). */
void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
listNode *ln;
int startbgsave = 0;
+ int mincapa = -1;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
startbgsave = 1;
- slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
+ mincapa = (mincapa == -1) ? slave->slave_capa :
+ (mincapa & slave->slave_capa);
+ } else if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
struct redis_stat buf;
/* If this was an RDB on disk save, we have to prepare to send
@@ -762,32 +959,33 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
* already an RDB -> Slaves socket transfer, used in the case of
* diskless replication, our work is trivial, we can just put
* the slave online. */
- if (type == REDIS_RDB_CHILD_TYPE_SOCKET) {
- redisLog(REDIS_NOTICE,
+ if (type == RDB_CHILD_TYPE_SOCKET) {
+ serverLog(LL_NOTICE,
"Streamed RDB transfer with slave %s succeeded (socket). Waiting for REPLCONF ACK from slave to enable streaming",
replicationGetSlaveName(slave));
/* Note: we wait for a REPLCONF ACK message from slave in
* order to really put it online (install the write handler
- * so that the accumulated data can be transfered). However
+ * so that the accumulated data can be transferred). However
* we change the replication state ASAP, since our slave
* is technically online now. */
- slave->replstate = REDIS_REPL_ONLINE;
+ slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 1;
+ slave->repl_ack_time = server.unixtime; /* Timeout otherwise. */
} else {
- if (bgsaveerr != REDIS_OK) {
+ if (bgsaveerr != C_OK) {
freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
+ serverLog(LL_WARNING,"SYNC failed. BGSAVE child returned an error");
continue;
}
if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
redis_fstat(slave->repldbfd,&buf) == -1) {
freeClient(slave);
- redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
+ serverLog(LL_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
continue;
}
slave->repldboff = 0;
slave->repldbsize = buf.st_size;
- slave->replstate = REDIS_REPL_SEND_BULK;
+ slave->replstate = SLAVE_STATE_SEND_BULK;
slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
(unsigned long long) slave->repldbsize);
@@ -799,39 +997,58 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
}
}
}
- if (startbgsave) {
- if (startBgsaveForReplication() != REDIS_OK) {
- listIter li;
+ if (startbgsave) startBgsaveForReplication(mincapa);
+}
- listRewind(server.slaves,&li);
- redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+/* Change the current instance replication ID with a new, random one.
+ * This will prevent successful PSYNCs between this master and other
+ * slaves, so the command should be called when something happens that
+ * alters the current story of the dataset. */
+void changeReplicationId(void) {
+ getRandomHexChars(server.replid,CONFIG_RUN_ID_SIZE);
+ server.replid[CONFIG_RUN_ID_SIZE] = '\0';
+}
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
- freeClient(slave);
- }
- }
- }
+/* Clear (invalidate) the secondary replication ID. This happens, for
+ * example, after a full resynchronization, when we start a new replication
+ * history. */
+void clearReplicationId2(void) {
+ memset(server.replid2,'0',sizeof(server.replid));
+ server.replid2[CONFIG_RUN_ID_SIZE] = '\0';
+ server.second_replid_offset = -1;
}
-/* ----------------------------------- SLAVE -------------------------------- */
+/* Use the current replication ID / offset as secondary replication
+ * ID, and change the current one in order to start a new history.
+ * This should be used when an instance is switched from slave to master
+ * so that it can serve PSYNC requests performed using the master
+ * replication ID. */
+void shiftReplicationId(void) {
+ memcpy(server.replid2,server.replid,sizeof(server.replid));
+ /* We set the second replid offset to the master offset + 1, since
+ * the slave will ask for the first byte it has not yet received, so
+ * we need to add one to the offset: for example if, as a slave, we are
+ * sure we have the same history as the master for 50 bytes, after we
+ * are turned into a master, we can accept a PSYNC request with offset
+ * 51, since the slave asking has the same history up to the 50th
+ * byte, and is asking for the new bytes starting at offset 51. */
+ server.second_replid_offset = server.master_repl_offset+1;
+ changeReplicationId();
+ serverLog(LL_WARNING,"Setting secondary replication ID to %s, valid up to offset: %lld. New replication ID is %s", server.replid2, server.second_replid_offset, server.replid);
+}
-/* Abort the async download of the bulk dataset while SYNC-ing with master */
-void replicationAbortSyncTransfer(void) {
- redisAssert(server.repl_state == REDIS_REPL_TRANSFER);
+/* ----------------------------------- SLAVE -------------------------------- */
- aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
- close(server.repl_transfer_s);
- close(server.repl_transfer_fd);
- unlink(server.repl_transfer_tmpfile);
- zfree(server.repl_transfer_tmpfile);
- server.repl_state = REDIS_REPL_CONNECT;
+/* Returns 1 if the given replication state is a handshake state,
+ * 0 otherwise. */
+int slaveIsInHandshakeState(void) {
+ return server.repl_state >= REPL_STATE_RECEIVE_PONG &&
+ server.repl_state <= REPL_STATE_RECEIVE_PSYNC;
}
/* Avoid the master to detect the slave is timing out while loading the
* RDB file in initial synchronization. We send a single newline character
- * that is valid protocol but is guaranteed to either be sent entierly or
+ * that is valid protocol but is guaranteed to either be sent entirely or
* not, since the byte is indivisible.
*
* The function is called in two contexts: while we flush the current
@@ -850,38 +1067,68 @@ void replicationSendNewlineToMaster(void) {
/* Callback used by emptyDb() while flushing away old data to load
* the new dataset received by the master. */
void replicationEmptyDbCallback(void *privdata) {
- REDIS_NOTUSED(privdata);
+ UNUSED(privdata);
replicationSendNewlineToMaster();
}
+/* Once we have a link with the master and the synchroniziation was
+ * performed, this function materializes the master client we store
+ * at server.master, starting from the specified file descriptor. */
+void replicationCreateMasterClient(int fd, int dbid) {
+ server.master = createClient(fd);
+ server.master->flags |= CLIENT_MASTER;
+ server.master->authenticated = 1;
+ server.master->reploff = server.master_initial_offset;
+ server.master->read_reploff = server.master->reploff;
+ memcpy(server.master->replid, server.master_replid,
+ sizeof(server.master_replid));
+ /* If master offset is set to -1, this master is old and is not
+ * PSYNC capable, so we flag it accordingly. */
+ if (server.master->reploff == -1)
+ server.master->flags |= CLIENT_PRE_PSYNC;
+ if (dbid != -1) selectDb(server.master,dbid);
+}
+
+void restartAOF() {
+ int retry = 10;
+ while (retry-- && startAppendOnly() == C_ERR) {
+ serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second.");
+ sleep(1);
+ }
+ if (!retry) {
+ serverLog(LL_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.");
+ exit(1);
+ }
+}
+
/* Asynchronously read the SYNC payload we receive from a master */
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
char buf[4096];
- ssize_t nread, readlen;
+ ssize_t nread, readlen, nwritten;
off_t left;
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
/* Static vars used to hold the EOF mark, and the last bytes received
* form the server: when they match, we reached the end of the transfer. */
- static char eofmark[REDIS_RUN_ID_SIZE];
- static char lastbytes[REDIS_RUN_ID_SIZE];
+ static char eofmark[CONFIG_RUN_ID_SIZE];
+ static char lastbytes[CONFIG_RUN_ID_SIZE];
static int usemark = 0;
/* If repl_transfer_size == -1 we still have to read the bulk length
* from the master reply. */
if (server.repl_transfer_size == -1) {
if (syncReadLine(fd,buf,1024,server.repl_syncio_timeout*1000) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"I/O error reading bulk count from MASTER: %s",
strerror(errno));
goto error;
}
if (buf[0] == '-') {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"MASTER aborted replication with an error: %s",
buf+1);
goto error;
@@ -892,7 +1139,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
server.repl_transfer_lastio = server.unixtime;
return;
} else if (buf[0] != '$') {
- redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
+ serverLog(LL_WARNING,"Bad protocol from MASTER, the first byte is not '$' (we received '%s'), are you sure the host and port are right?", buf);
goto error;
}
@@ -906,19 +1153,19 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
* At the end of the file the announced delimiter is transmitted. The
* delimiter is long and random enough that the probability of a
* collision with the actual file content can be ignored. */
- if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= REDIS_RUN_ID_SIZE) {
+ if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= CONFIG_RUN_ID_SIZE) {
usemark = 1;
- memcpy(eofmark,buf+5,REDIS_RUN_ID_SIZE);
- memset(lastbytes,0,REDIS_RUN_ID_SIZE);
+ memcpy(eofmark,buf+5,CONFIG_RUN_ID_SIZE);
+ memset(lastbytes,0,CONFIG_RUN_ID_SIZE);
/* Set any repl_transfer_size to avoid entering this code path
* at the next call. */
server.repl_transfer_size = 0;
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"MASTER <-> SLAVE sync: receiving streamed RDB from master");
} else {
usemark = 0;
server.repl_transfer_size = strtol(buf+1,NULL,10);
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"MASTER <-> SLAVE sync: receiving %lld bytes from master",
(long long) server.repl_transfer_size);
}
@@ -935,9 +1182,9 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
nread = read(fd,buf,readlen);
if (nread <= 0) {
- redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
+ serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
(nread == -1) ? strerror(errno) : "connection lost");
- replicationAbortSyncTransfer();
+ cancelReplicationHandshake();
return;
}
server.stat_net_input_bytes += nread;
@@ -948,19 +1195,20 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
if (usemark) {
/* Update the last bytes array, and check if it matches our delimiter.*/
- if (nread >= REDIS_RUN_ID_SIZE) {
- memcpy(lastbytes,buf+nread-REDIS_RUN_ID_SIZE,REDIS_RUN_ID_SIZE);
+ if (nread >= CONFIG_RUN_ID_SIZE) {
+ memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE);
} else {
- int rem = REDIS_RUN_ID_SIZE-nread;
+ int rem = CONFIG_RUN_ID_SIZE-nread;
memmove(lastbytes,lastbytes+nread,rem);
memcpy(lastbytes+rem,buf,nread);
}
- if (memcmp(lastbytes,eofmark,REDIS_RUN_ID_SIZE) == 0) eof_reached = 1;
+ if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1;
}
server.repl_transfer_lastio = server.unixtime;
- if (write(server.repl_transfer_fd,buf,nread) != nread) {
- redisLog(REDIS_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) {
+ serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> SLAVE synchronization: %s",
+ (nwritten == -1) ? strerror(errno) : "short write");
goto error;
}
server.repl_transfer_read += nread;
@@ -968,9 +1216,9 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Delete the last 40 bytes from the file if we reached EOF. */
if (usemark && eof_reached) {
if (ftruncate(server.repl_transfer_fd,
- server.repl_transfer_read - REDIS_RUN_ID_SIZE) == -1)
+ server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1)
{
- redisLog(REDIS_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno));
goto error;
}
}
@@ -995,62 +1243,65 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
}
if (eof_reached) {
+ int aof_is_enabled = server.aof_state != AOF_OFF;
+
if (rename(server.repl_transfer_tmpfile,server.rdb_filename) == -1) {
- redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
- replicationAbortSyncTransfer();
+ serverLog(LL_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
+ cancelReplicationHandshake();
return;
}
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Flushing old data");
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Flushing old data");
+ /* We need to stop any AOFRW fork before flusing and parsing
+ * RDB, otherwise we'll create a copy-on-write disaster. */
+ if(aof_is_enabled) stopAppendOnly();
signalFlushedDb(-1);
- emptyDb(replicationEmptyDbCallback);
+ emptyDb(
+ -1,
+ server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS,
+ replicationEmptyDbCallback);
/* Before loading the DB into memory we need to delete the readable
* handler, otherwise it will get called recursively since
* rdbLoad() will call the event loop to process events from time to
* time for non blocking loading. */
aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory");
- if (rdbLoad(server.rdb_filename) != REDIS_OK) {
- redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
- replicationAbortSyncTransfer();
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Loading DB in memory");
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ if (rdbLoad(server.rdb_filename,&rsi) != C_OK) {
+ serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
+ cancelReplicationHandshake();
+ /* Re-enable the AOF if we disabled it earlier, in order to restore
+ * the original configuration. */
+ if (aof_is_enabled) restartAOF();
return;
}
/* Final setup of the connected slave <- master link */
zfree(server.repl_transfer_tmpfile);
close(server.repl_transfer_fd);
- server.master = createClient(server.repl_transfer_s);
- server.master->flags |= REDIS_MASTER;
- server.master->authenticated = 1;
- server.repl_state = REDIS_REPL_CONNECTED;
- server.master->reploff = server.repl_master_initial_offset;
- memcpy(server.master->replrunid, server.repl_master_runid,
- sizeof(server.repl_master_runid));
- /* If master offset is set to -1, this master is old and is not
- * PSYNC capable, so we flag it accordingly. */
- if (server.master->reploff == -1)
- server.master->flags |= REDIS_PRE_PSYNC;
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
+ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db);
+ server.repl_state = REPL_STATE_CONNECTED;
+ server.repl_down_since = 0;
+ /* After a full resynchroniziation we use the replication ID and
+ * offset of the master. The secondary ID / offset are cleared since
+ * we are starting a new history. */
+ memcpy(server.replid,server.master->replid,sizeof(server.replid));
+ server.master_repl_offset = server.master->reploff;
+ clearReplicationId2();
+ /* Let's create the replication backlog if needed. Slaves need to
+ * accumulate the backlog regardless of the fact they have sub-slaves
+ * or not, in order to behave correctly if they are promoted to
+ * masters after a failover. */
+ if (server.repl_backlog == NULL) createReplicationBacklog();
+
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Finished with success");
/* Restart the AOF subsystem now that we finished the sync. This
* will trigger an AOF rewrite, and when done will start appending
* to the new file. */
- if (server.aof_state != REDIS_AOF_OFF) {
- int retry = 10;
-
- stopAppendOnly();
- while (retry-- && startAppendOnly() == REDIS_ERR) {
- redisLog(REDIS_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second.");
- sleep(1);
- }
- if (!retry) {
- redisLog(REDIS_WARNING,"FATAL: this slave instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now.");
- exit(1);
- }
- }
+ if (aof_is_enabled) restartAOF();
}
-
return;
error:
- replicationAbortSyncTransfer();
+ cancelReplicationHandshake();
return;
}
@@ -1060,38 +1311,61 @@ error:
* The command returns an sds string representing the result of the
* operation. On error the first byte is a "-".
*/
-char *sendSynchronousCommand(int fd, ...) {
- va_list ap;
- sds cmd = sdsempty();
- char *arg, buf[256];
+#define SYNC_CMD_READ (1<<0)
+#define SYNC_CMD_WRITE (1<<1)
+#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
+char *sendSynchronousCommand(int flags, int fd, ...) {
+
+ /* Create the command to send to the master, we use redis binary
+ * protocol to make sure correct arguments are sent. This function
+ * is not safe for all binary data. */
+ if (flags & SYNC_CMD_WRITE) {
+ char *arg;
+ va_list ap;
+ sds cmd = sdsempty();
+ sds cmdargs = sdsempty();
+ size_t argslen = 0;
+ va_start(ap,fd);
+
+ while(1) {
+ arg = va_arg(ap, char*);
+ if (arg == NULL) break;
+
+ cmdargs = sdscatprintf(cmdargs,"$%zu\r\n%s\r\n",strlen(arg),arg);
+ argslen++;
+ }
- /* Create the command to send to the master, we use simple inline
- * protocol for simplicity as currently we only send simple strings. */
- va_start(ap,fd);
- while(1) {
- arg = va_arg(ap, char*);
- if (arg == NULL) break;
+ va_end(ap);
- if (sdslen(cmd) != 0) cmd = sdscatlen(cmd," ",1);
- cmd = sdscat(cmd,arg);
- }
- cmd = sdscatlen(cmd,"\r\n",2);
+ cmd = sdscatprintf(cmd,"*%zu\r\n",argslen);
+ cmd = sdscatsds(cmd,cmdargs);
+ sdsfree(cmdargs);
- /* Transfer command to the server. */
- if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000) == -1) {
+ /* Transfer command to the server. */
+ if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
+ == -1)
+ {
+ sdsfree(cmd);
+ return sdscatprintf(sdsempty(),"-Writing to master: %s",
+ strerror(errno));
+ }
sdsfree(cmd);
- return sdscatprintf(sdsempty(),"-Writing to master: %s",
- strerror(errno));
}
- sdsfree(cmd);
/* Read the reply from the server. */
- if (syncReadLine(fd,buf,sizeof(buf),server.repl_syncio_timeout*1000) == -1)
- {
- return sdscatprintf(sdsempty(),"-Reading from master: %s",
- strerror(errno));
+ if (flags & SYNC_CMD_READ) {
+ char buf[256];
+
+ if (syncReadLine(fd,buf,sizeof(buf),server.repl_syncio_timeout*1000)
+ == -1)
+ {
+ return sdscatprintf(sdsempty(),"-Reading from master: %s",
+ strerror(errno));
+ }
+ server.repl_transfer_lastio = server.unixtime;
+ return sdsnew(buf);
}
- return sdsnew(buf);
+ return NULL;
}
/* Try a partial resynchronization with the master if we are about to reconnect.
@@ -1108,70 +1382,118 @@ char *sendSynchronousCommand(int fd, ...) {
* of successful partial resynchronization, the function will reuse
* 'fd' as file descriptor of the server.master client structure.
*
+ * The function is split in two halves: if read_reply is 0, the function
+ * writes the PSYNC command on the socket, and a new function call is
+ * needed, with read_reply set to 1, in order to read the reply of the
+ * command. This is useful in order to support non blocking operations, so
+ * that we write, return into the event loop, and read when there are data.
+ *
+ * When read_reply is 0 the function returns PSYNC_WRITE_ERR if there
+ * was a write error, or PSYNC_WAIT_REPLY to signal we need another call
+ * with read_reply set to 1. However even when read_reply is set to 1
+ * the function may return PSYNC_WAIT_REPLY again to signal there were
+ * insufficient data to read to complete its work. We should re-enter
+ * into the event loop and wait in such a case.
+ *
* The function returns:
*
- * PSYNC_CONTINUE: If the PSYNC command succeded and we can continue.
+ * PSYNC_CONTINUE: If the PSYNC command succeeded and we can continue.
* PSYNC_FULLRESYNC: If PSYNC is supported but a full resync is needed.
* In this case the master run_id and global replication
* offset is saved.
* PSYNC_NOT_SUPPORTED: If the server does not understand PSYNC at all and
* the caller should fall back to SYNC.
+ * PSYNC_WRITE_ERROR: There was an error writing the command to the socket.
+ * PSYNC_WAIT_REPLY: Call again the function with read_reply set to 1.
+ * PSYNC_TRY_LATER: Master is currently in a transient error condition.
+ *
+ * Notable side effects:
+ *
+ * 1) As a side effect of the function call the function removes the readable
+ * event handler from "fd", unless the return value is PSYNC_WAIT_REPLY.
+ * 2) server.master_initial_offset is set to the right value according
+ * to the master reply. This will be used to populate the 'server.master'
+ * structure replication offset.
*/
-#define PSYNC_CONTINUE 0
-#define PSYNC_FULLRESYNC 1
-#define PSYNC_NOT_SUPPORTED 2
-int slaveTryPartialResynchronization(int fd) {
- char *psync_runid;
+#define PSYNC_WRITE_ERROR 0
+#define PSYNC_WAIT_REPLY 1
+#define PSYNC_CONTINUE 2
+#define PSYNC_FULLRESYNC 3
+#define PSYNC_NOT_SUPPORTED 4
+#define PSYNC_TRY_LATER 5
+int slaveTryPartialResynchronization(int fd, int read_reply) {
+ char *psync_replid;
char psync_offset[32];
sds reply;
- /* Initially set repl_master_initial_offset to -1 to mark the current
- * master run_id and offset as not valid. Later if we'll be able to do
- * a FULL resync using the PSYNC command we'll set the offset at the
- * right value, so that this information will be propagated to the
- * client structure representing the master into server.master. */
- server.repl_master_initial_offset = -1;
-
- if (server.cached_master) {
- psync_runid = server.cached_master->replrunid;
- snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
- redisLog(REDIS_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_runid, psync_offset);
- } else {
- redisLog(REDIS_NOTICE,"Partial resynchronization not possible (no cached master)");
- psync_runid = "?";
- memcpy(psync_offset,"-1",3);
+ /* Writing half */
+ if (!read_reply) {
+ /* Initially set master_initial_offset to -1 to mark the current
+ * master run_id and offset as not valid. Later if we'll be able to do
+ * a FULL resync using the PSYNC command we'll set the offset at the
+ * right value, so that this information will be propagated to the
+ * client structure representing the master into server.master. */
+ server.master_initial_offset = -1;
+
+ if (server.cached_master) {
+ psync_replid = server.cached_master->replid;
+ snprintf(psync_offset,sizeof(psync_offset),"%lld", server.cached_master->reploff+1);
+ serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
+ } else {
+ serverLog(LL_NOTICE,"Partial resynchronization not possible (no cached master)");
+ psync_replid = "?";
+ memcpy(psync_offset,"-1",3);
+ }
+
+ /* Issue the PSYNC command */
+ reply = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PSYNC",psync_replid,psync_offset,NULL);
+ if (reply != NULL) {
+ serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
+ sdsfree(reply);
+ aeDeleteFileEvent(server.el,fd,AE_READABLE);
+ return PSYNC_WRITE_ERROR;
+ }
+ return PSYNC_WAIT_REPLY;
+ }
+
+ /* Reading half */
+ reply = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ if (sdslen(reply) == 0) {
+ /* The master may send empty newlines after it receives PSYNC
+ * and before to reply, just to keep the connection alive. */
+ sdsfree(reply);
+ return PSYNC_WAIT_REPLY;
}
- /* Issue the PSYNC command */
- reply = sendSynchronousCommand(fd,"PSYNC",psync_runid,psync_offset,NULL);
+ aeDeleteFileEvent(server.el,fd,AE_READABLE);
if (!strncmp(reply,"+FULLRESYNC",11)) {
- char *runid = NULL, *offset = NULL;
+ char *replid = NULL, *offset = NULL;
/* FULL RESYNC, parse the reply in order to extract the run id
* and the replication offset. */
- runid = strchr(reply,' ');
- if (runid) {
- runid++;
- offset = strchr(runid,' ');
+ replid = strchr(reply,' ');
+ if (replid) {
+ replid++;
+ offset = strchr(replid,' ');
if (offset) offset++;
}
- if (!runid || !offset || (offset-runid-1) != REDIS_RUN_ID_SIZE) {
- redisLog(REDIS_WARNING,
+ if (!replid || !offset || (offset-replid-1) != CONFIG_RUN_ID_SIZE) {
+ serverLog(LL_WARNING,
"Master replied with wrong +FULLRESYNC syntax.");
/* This is an unexpected condition, actually the +FULLRESYNC
* reply means that the master supports PSYNC, but the reply
* format seems wrong. To stay safe we blank the master
- * runid to make sure next PSYNCs will fail. */
- memset(server.repl_master_runid,0,REDIS_RUN_ID_SIZE+1);
+ * replid to make sure next PSYNCs will fail. */
+ memset(server.master_replid,0,CONFIG_RUN_ID_SIZE+1);
} else {
- memcpy(server.repl_master_runid, runid, offset-runid-1);
- server.repl_master_runid[REDIS_RUN_ID_SIZE] = '\0';
- server.repl_master_initial_offset = strtoll(offset,NULL,10);
- redisLog(REDIS_NOTICE,"Full resync from master: %s:%lld",
- server.repl_master_runid,
- server.repl_master_initial_offset);
+ memcpy(server.master_replid, replid, offset-replid-1);
+ server.master_replid[CONFIG_RUN_ID_SIZE] = '\0';
+ server.master_initial_offset = strtoll(offset,NULL,10);
+ serverLog(LL_NOTICE,"Full resync from master: %s:%lld",
+ server.master_replid,
+ server.master_initial_offset);
}
/* We are going to full resync, discard the cached master structure. */
replicationDiscardCachedMaster();
@@ -1180,24 +1502,76 @@ int slaveTryPartialResynchronization(int fd) {
}
if (!strncmp(reply,"+CONTINUE",9)) {
- /* Partial resync was accepted, set the replication state accordingly */
- redisLog(REDIS_NOTICE,
+ /* Partial resync was accepted. */
+ serverLog(LL_NOTICE,
"Successful partial resynchronization with master.");
+
+ /* Check the new replication ID advertised by the master. If it
+ * changed, we need to set the new ID as primary ID, and set or
+ * secondary ID as the old master ID up to the current offset, so
+ * that our sub-slaves will be able to PSYNC with us after a
+ * disconnection. */
+ char *start = reply+10;
+ char *end = reply+9;
+ while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
+ if (end-start == CONFIG_RUN_ID_SIZE) {
+ char new[CONFIG_RUN_ID_SIZE+1];
+ memcpy(new,start,CONFIG_RUN_ID_SIZE);
+ new[CONFIG_RUN_ID_SIZE] = '\0';
+
+ if (strcmp(new,server.cached_master->replid)) {
+ /* Master ID changed. */
+ serverLog(LL_WARNING,"Master replication ID changed to %s",new);
+
+ /* Set the old ID as our ID2, up to the current offset+1. */
+ memcpy(server.replid2,server.cached_master->replid,
+ sizeof(server.replid2));
+ server.second_replid_offset = server.master_repl_offset+1;
+
+ /* Update the cached master ID and our own primary ID to the
+ * new one. */
+ memcpy(server.replid,new,sizeof(server.replid));
+ memcpy(server.cached_master->replid,new,sizeof(server.replid));
+
+ /* Disconnect all the sub-slaves: they need to be notified. */
+ disconnectSlaves();
+ }
+ }
+
+ /* Setup the replication to continue. */
sdsfree(reply);
replicationResurrectCachedMaster(fd);
+
+ /* If this instance was restarted and we read the metadata to
+ * PSYNC from the persistence file, our replication backlog could
+ * be still not initialized. Create it. */
+ if (server.repl_backlog == NULL) createReplicationBacklog();
return PSYNC_CONTINUE;
}
- /* If we reach this point we receied either an error since the master does
- * not understand PSYNC, or an unexpected reply from the master.
- * Return PSYNC_NOT_SUPPORTED to the caller in both cases. */
+ /* If we reach this point we received either an error (since the master does
+ * not understand PSYNC or because it is in a special state and cannot
+ * serve our request), or an unexpected reply from the master.
+ *
+ * Return PSYNC_NOT_SUPPORTED on errors we don't understand, otherwise
+ * return PSYNC_TRY_LATER if we believe this is a transient error. */
+
+ if (!strncmp(reply,"-NOMASTERLINK",13) ||
+ !strncmp(reply,"-LOADING",8))
+ {
+ serverLog(LL_NOTICE,
+ "Master is currently unable to PSYNC "
+ "but should be in the future: %s", reply);
+ sdsfree(reply);
+ return PSYNC_TRY_LATER;
+ }
if (strncmp(reply,"-ERR",4)) {
/* If it's not an error, log the unexpected event. */
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Unexpected reply to PSYNC from master: %s", reply);
} else {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Master does not support PSYNC or is in "
"error state (reply: %s)", reply);
}
@@ -1206,108 +1580,180 @@ int slaveTryPartialResynchronization(int fd) {
return PSYNC_NOT_SUPPORTED;
}
+/* This handler fires when the non blocking connect was able to
+ * establish a connection with the master. */
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
- char tmpfile[256], *err;
- int dfd, maxtries = 5;
+ char tmpfile[256], *err = NULL;
+ int dfd = -1, maxtries = 5;
int sockerr = 0, psync_result;
socklen_t errlen = sizeof(sockerr);
- REDIS_NOTUSED(el);
- REDIS_NOTUSED(privdata);
- REDIS_NOTUSED(mask);
+ UNUSED(el);
+ UNUSED(privdata);
+ UNUSED(mask);
/* If this event fired after the user turned the instance into a master
* with SLAVEOF NO ONE we must just return ASAP. */
- if (server.repl_state == REDIS_REPL_NONE) {
+ if (server.repl_state == REPL_STATE_NONE) {
close(fd);
return;
}
- /* Check for errors in the socket. */
+ /* Check for errors in the socket: after a non blocking connect() we
+ * may find that the socket is in error state. */
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
sockerr = errno;
if (sockerr) {
- aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
- redisLog(REDIS_WARNING,"Error condition on socket for SYNC: %s",
+ serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
strerror(sockerr));
goto error;
}
- /* If we were connecting, it's time to send a non blocking PING, we want to
- * make sure the master is able to reply before going into the actual
- * replication process where we have long timeouts in the order of
- * seconds (in the meantime the slave would block). */
- if (server.repl_state == REDIS_REPL_CONNECTING) {
- redisLog(REDIS_NOTICE,"Non blocking connect for SYNC fired the event.");
+ /* Send a PING to check the master is able to reply without errors. */
+ if (server.repl_state == REPL_STATE_CONNECTING) {
+ serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
- server.repl_state = REDIS_REPL_RECEIVE_PONG;
+ server.repl_state = REPL_STATE_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
- syncWrite(fd,"PING\r\n",6,100);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PING",NULL);
+ if (err) goto write_error;
return;
}
/* Receive the PONG command. */
- if (server.repl_state == REDIS_REPL_RECEIVE_PONG) {
- char buf[1024];
-
- /* Delete the readable event, we no longer need it now that there is
- * the PING reply to read. */
- aeDeleteFileEvent(server.el,fd,AE_READABLE);
-
- /* Read the reply with explicit timeout. */
- buf[0] = '\0';
- if (syncReadLine(fd,buf,sizeof(buf),
- server.repl_syncio_timeout*1000) == -1)
- {
- redisLog(REDIS_WARNING,
- "I/O error reading PING reply from master: %s",
- strerror(errno));
- goto error;
- }
+ if (server.repl_state == REPL_STATE_RECEIVE_PONG) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
* Note that older versions of Redis replied with "operation not
* permitted" instead of using a proper error code, so we test
* both. */
- if (buf[0] != '+' &&
- strncmp(buf,"-NOAUTH",7) != 0 &&
- strncmp(buf,"-ERR operation not permitted",28) != 0)
+ if (err[0] != '+' &&
+ strncmp(err,"-NOAUTH",7) != 0 &&
+ strncmp(err,"-ERR operation not permitted",28) != 0)
{
- redisLog(REDIS_WARNING,"Error reply to PING from master: '%s'",buf);
+ serverLog(LL_WARNING,"Error reply to PING from master: '%s'",err);
+ sdsfree(err);
goto error;
} else {
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Master replied to PING, replication can continue...");
}
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_AUTH;
}
/* AUTH with the master if required. */
- if(server.masterauth) {
- err = sendSynchronousCommand(fd,"AUTH",server.masterauth,NULL);
+ if (server.repl_state == REPL_STATE_SEND_AUTH) {
+ if (server.masterauth) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",server.masterauth,NULL);
+ if (err) goto write_error;
+ server.repl_state = REPL_STATE_RECEIVE_AUTH;
+ return;
+ } else {
+ server.repl_state = REPL_STATE_SEND_PORT;
+ }
+ }
+
+ /* Receive AUTH reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_AUTH) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
if (err[0] == '-') {
- redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",err);
+ serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
goto error;
}
sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_PORT;
}
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
- {
- sds port = sdsfromlonglong(server.port);
- err = sendSynchronousCommand(fd,"REPLCONF","listening-port",port,
- NULL);
+ if (server.repl_state == REPL_STATE_SEND_PORT) {
+ sds port = sdsfromlonglong(server.slave_announce_port ?
+ server.slave_announce_port : server.port);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "listening-port",port, NULL);
sdsfree(port);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_PORT;
+ return;
+ }
+
+ /* Receive REPLCONF listening-port reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_PORT) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF listening-port. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF listening-port: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_IP;
+ }
+
+ /* Skip REPLCONF ip-address if there is no slave-announce-ip option set. */
+ if (server.repl_state == REPL_STATE_SEND_IP &&
+ server.slave_announce_ip == NULL)
+ {
+ server.repl_state = REPL_STATE_SEND_CAPA;
+ }
+
+ /* Set the slave ip, so that Master's INFO command can list the
+ * slave IP address port correctly in case of port forwarding or NAT. */
+ if (server.repl_state == REPL_STATE_SEND_IP) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "ip-address",server.slave_announce_ip, NULL);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_IP;
+ return;
+ }
+
+ /* Receive REPLCONF ip-address reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_IP) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
- redisLog(REDIS_NOTICE,"(Non critical) Master does not understand REPLCONF listening-port: %s", err);
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF ip-address: %s", err);
+ }
+ sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_CAPA;
+ }
+
+ /* Inform the master of our (slave) capabilities.
+ *
+ * EOF: supports EOF-style RDB transfer for diskless replication.
+ * PSYNC2: supports PSYNC v2, so understands +CONTINUE <new repl ID>.
+ *
+ * The master will ignore capabilities it does not understand. */
+ if (server.repl_state == REPL_STATE_SEND_CAPA) {
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ "capa","eof","capa","psync2",NULL);
+ if (err) goto write_error;
+ sdsfree(err);
+ server.repl_state = REPL_STATE_RECEIVE_CAPA;
+ return;
+ }
+
+ /* Receive CAPA reply. */
+ if (server.repl_state == REPL_STATE_RECEIVE_CAPA) {
+ err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ /* Ignore the error if any, not all the Redis versions support
+ * REPLCONF capa. */
+ if (err[0] == '-') {
+ serverLog(LL_NOTICE,"(Non critical) Master does not understand "
+ "REPLCONF capa: %s", err);
}
sdsfree(err);
+ server.repl_state = REPL_STATE_SEND_PSYNC;
}
/* Try a partial resynchonization. If we don't have a cached master
@@ -1315,19 +1761,54 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
* to start a full resynchronization so that we get the master run id
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
- psync_result = slaveTryPartialResynchronization(fd);
+ if (server.repl_state == REPL_STATE_SEND_PSYNC) {
+ if (slaveTryPartialResynchronization(fd,0) == PSYNC_WRITE_ERROR) {
+ err = sdsnew("Write error sending the PSYNC command.");
+ goto write_error;
+ }
+ server.repl_state = REPL_STATE_RECEIVE_PSYNC;
+ return;
+ }
+
+ /* If reached this point, we should be in REPL_STATE_RECEIVE_PSYNC. */
+ if (server.repl_state != REPL_STATE_RECEIVE_PSYNC) {
+ serverLog(LL_WARNING,"syncWithMaster(): state machine error, "
+ "state should be RECEIVE_PSYNC but is %d",
+ server.repl_state);
+ goto error;
+ }
+
+ psync_result = slaveTryPartialResynchronization(fd,1);
+ if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
+
+ /* If the master is in an transient error, we should try to PSYNC
+ * from scratch later, so go to the error path. This happens when
+ * the server is loading the dataset or is not connected with its
+ * master and so forth. */
+ if (psync_result == PSYNC_TRY_LATER) goto error;
+
+ /* Note: if PSYNC does not return WAIT_REPLY, it will take care of
+ * uninstalling the read handler from the file descriptor. */
+
if (psync_result == PSYNC_CONTINUE) {
- redisLog(REDIS_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
+ serverLog(LL_NOTICE, "MASTER <-> SLAVE sync: Master accepted a Partial Resynchronization.");
return;
}
+ /* PSYNC failed or is not supported: we want our slaves to resync with us
+ * as well, if we have any sub-slaves. The master may transfer us an
+ * entirely different data set and we have no way to incrementally feed
+ * our slaves after that. */
+ disconnectSlaves(); /* Force our slaves to resync with us as well. */
+ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
+
/* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
- * and the server.repl_master_runid and repl_master_initial_offset are
+ * and the server.master_replid and master_initial_offset are
* already populated. */
if (psync_result == PSYNC_NOT_SUPPORTED) {
- redisLog(REDIS_NOTICE,"Retrying with SYNC...");
+ serverLog(LL_NOTICE,"Retrying with SYNC...");
if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
- redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
+ serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
}
@@ -1342,7 +1823,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
sleep(1);
}
if (dfd == -1) {
- redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
+ serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
goto error;
}
@@ -1350,13 +1831,13 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
== AE_ERR)
{
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Can't create readable event for SYNC: %s (fd=%d)",
strerror(errno),fd);
goto error;
}
- server.repl_state = REDIS_REPL_TRANSFER;
+ server.repl_state = REPL_STATE_TRANSFER;
server.repl_transfer_size = -1;
server.repl_transfer_read = 0;
server.repl_transfer_last_fsync_off = 0;
@@ -1366,48 +1847,65 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
error:
+ aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
+ if (dfd != -1) close(dfd);
close(fd);
server.repl_transfer_s = -1;
- server.repl_state = REDIS_REPL_CONNECT;
+ server.repl_state = REPL_STATE_CONNECT;
return;
+
+write_error: /* Handle sendSynchronousCommand(SYNC_CMD_WRITE) errors. */
+ serverLog(LL_WARNING,"Sending command to master in replication handshake: %s", err);
+ sdsfree(err);
+ goto error;
}
int connectWithMaster(void) {
int fd;
- fd = anetTcpNonBlockBindConnect(NULL,
- server.masterhost,server.masterport,REDIS_BIND_ADDR);
+ fd = anetTcpNonBlockBestEffortBindConnect(NULL,
+ server.masterhost,server.masterport,NET_FIRST_BIND_ADDR);
if (fd == -1) {
- redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
+ serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
strerror(errno));
- return REDIS_ERR;
+ return C_ERR;
}
if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
AE_ERR)
{
close(fd);
- redisLog(REDIS_WARNING,"Can't create readable event for SYNC");
- return REDIS_ERR;
+ serverLog(LL_WARNING,"Can't create readable event for SYNC");
+ return C_ERR;
}
server.repl_transfer_lastio = server.unixtime;
server.repl_transfer_s = fd;
- server.repl_state = REDIS_REPL_CONNECTING;
- return REDIS_OK;
+ server.repl_state = REPL_STATE_CONNECTING;
+ return C_OK;
}
/* This function can be called when a non blocking connection is currently
- * in progress to undo it. */
+ * in progress to undo it.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
void undoConnectWithMaster(void) {
int fd = server.repl_transfer_s;
- redisAssert(server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG);
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
close(fd);
server.repl_transfer_s = -1;
- server.repl_state = REDIS_REPL_CONNECT;
+}
+
+/* Abort the async download of the bulk dataset while SYNC-ing with master.
+ * Never call this function directly, use cancelReplicationHandshake() instead.
+ */
+void replicationAbortSyncTransfer(void) {
+ serverAssert(server.repl_state == REPL_STATE_TRANSFER);
+ undoConnectWithMaster();
+ close(server.repl_transfer_fd);
+ unlink(server.repl_transfer_tmpfile);
+ zfree(server.repl_transfer_tmpfile);
}
/* This function aborts a non blocking replication attempt if there is one
@@ -1415,16 +1913,18 @@ void undoConnectWithMaster(void) {
* the initial bulk transfer.
*
* If there was a replication handshake in progress 1 is returned and
- * the replication state (server.repl_state) set to REDIS_REPL_CONNECT.
+ * the replication state (server.repl_state) set to REPL_STATE_CONNECT.
*
* Otherwise zero is returned and no operation is perforemd at all. */
int cancelReplicationHandshake(void) {
- if (server.repl_state == REDIS_REPL_TRANSFER) {
+ if (server.repl_state == REPL_STATE_TRANSFER) {
replicationAbortSyncTransfer();
- } else if (server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG)
+ server.repl_state = REPL_STATE_CONNECT;
+ } else if (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState())
{
undoConnectWithMaster();
+ server.repl_state = REPL_STATE_CONNECT;
} else {
return 0;
}
@@ -1433,17 +1933,24 @@ int cancelReplicationHandshake(void) {
/* Set replication to the specified master address and port. */
void replicationSetMaster(char *ip, int port) {
+ int was_master = server.masterhost == NULL;
+
sdsfree(server.masterhost);
server.masterhost = sdsnew(ip);
server.masterport = port;
- if (server.master) freeClient(server.master);
- disconnectSlaves(); /* Force our slaves to resync with us as well. */
- replicationDiscardCachedMaster(); /* Don't try a PSYNC. */
- freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
+ if (server.master) {
+ freeClient(server.master);
+ }
+ disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
+
+ /* Force our slaves to resync with us as well. They may hopefully be able
+ * to partially resync with us, but we can notify the replid change. */
+ disconnectSlaves();
cancelReplicationHandshake();
- server.repl_state = REDIS_REPL_CONNECT;
- server.master_repl_offset = 0;
- server.repl_down_since = 0;
+ /* Before destroying our master state, create a cached master using
+ * our own parameters, to later PSYNC with the new master. */
+ if (was_master) replicationCacheMasterUsingMyself();
+ server.repl_state = REPL_STATE_CONNECT;
}
/* Cancel replication, setting the instance as a master itself. */
@@ -1451,23 +1958,46 @@ void replicationUnsetMaster(void) {
if (server.masterhost == NULL) return; /* Nothing to do. */
sdsfree(server.masterhost);
server.masterhost = NULL;
- if (server.master) {
- if (listLength(server.slaves) == 0) {
- /* If this instance is turned into a master and there are no
- * slaves, it inherits the replication offset from the master.
- * Under certain conditions this makes replicas comparable by
- * replication offset to understand what is the most updated. */
- server.master_repl_offset = server.master->reploff;
- freeReplicationBacklog();
- }
- freeClient(server.master);
- }
+ /* When a slave is turned into a master, the current replication ID
+ * (that was inherited from the master at synchronization time) is
+ * used as secondary ID up to the current offset, and a new replication
+ * ID is created to continue with a new replication history. */
+ shiftReplicationId();
+ if (server.master) freeClient(server.master);
replicationDiscardCachedMaster();
cancelReplicationHandshake();
- server.repl_state = REDIS_REPL_NONE;
+ /* Disconnecting all the slaves is required: we need to inform slaves
+ * of the replication ID change (see shiftReplicationId() call). However
+ * the slaves will be able to partially resync with us, so it will be
+ * a very fast reconnection. */
+ disconnectSlaves();
+ server.repl_state = REPL_STATE_NONE;
+
+ /* We need to make sure the new master will start the replication stream
+ * with a SELECT statement. This is forced after a full resync, but
+ * with PSYNC version 2, there is no need for full resync after a
+ * master switch. */
+ server.slaveseldb = -1;
+
+ /* Once we turn from slave to master, we consider the starting time without
+ * slaves (that is used to count the replication backlog time to live) as
+ * starting from now. Otherwise the backlog will be freed after a
+ * failover if slaves do not connect immediately. */
+ server.repl_no_slaves_since = server.unixtime;
}
-void slaveofCommand(redisClient *c) {
+/* This function is called when the slave lose the connection with the
+ * master into an unexpected way. */
+void replicationHandleMasterDisconnection(void) {
+ server.master = NULL;
+ server.repl_state = REPL_STATE_CONNECT;
+ server.repl_down_since = server.unixtime;
+ /* We lost connection with our master, don't disconnect slaves yet,
+ * maybe we'll be able to PSYNC with our master later. We'll disconnect
+ * the slaves only if we'll have to do a full resync with our master. */
+}
+
+void slaveofCommand(client *c) {
/* SLAVEOF is not allowed in cluster mode as replication is automatically
* configured using the current address of the master node. */
if (server.cluster_enabled) {
@@ -1481,26 +2011,31 @@ void slaveofCommand(redisClient *c) {
!strcasecmp(c->argv[2]->ptr,"one")) {
if (server.masterhost) {
replicationUnsetMaster();
- redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')",
+ client);
+ sdsfree(client);
}
} else {
long port;
- if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &port, NULL) != C_OK))
return;
/* Check if we are already attached to the specified slave */
if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr)
&& server.masterport == port) {
- redisLog(REDIS_NOTICE,"SLAVE OF would result into synchronization with the master we are already connected with. No operation performed.");
+ serverLog(LL_NOTICE,"SLAVE OF would result into synchronization with the master we are already connected with. No operation performed.");
addReplySds(c,sdsnew("+OK Already connected to specified master\r\n"));
return;
}
/* There was no previous master or the user specified a different one,
* we can continue. */
replicationSetMaster(c->argv[1]->ptr, port);
- redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
- server.masterhost, server.masterport);
+ sds client = catClientInfoString(sdsempty(),c);
+ serverLog(LL_NOTICE,"SLAVE OF %s:%d enabled (user request from '%s')",
+ server.masterhost, server.masterport, client);
+ sdsfree(client);
}
addReply(c,shared.ok);
}
@@ -1508,7 +2043,7 @@ void slaveofCommand(redisClient *c) {
/* ROLE command: provide information about the role of the instance
* (master or slave) and additional information related to replication
* in an easy to process format. */
-void roleCommand(redisClient *c) {
+void roleCommand(client *c) {
if (server.masterhost == NULL) {
listIter li;
listNode *ln;
@@ -1521,13 +2056,17 @@ void roleCommand(redisClient *c) {
mbcount = addDeferredMultiBulkLength(c);
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
- char ip[REDIS_IP_STR_LEN];
+ client *slave = ln->value;
+ char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
- if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1) continue;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
+ if (slaveip[0] == '\0') {
+ if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1)
+ continue;
+ slaveip = ip;
+ }
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
addReplyMultiBulkLen(c,3);
- addReplyBulkCString(c,ip);
+ addReplyBulkCString(c,slaveip);
addReplyBulkLongLong(c,slave->slave_listening_port);
addReplyBulkLongLong(c,slave->repl_ack_off);
slaves++;
@@ -1540,14 +2079,17 @@ void roleCommand(redisClient *c) {
addReplyBulkCBuffer(c,"slave",5);
addReplyBulkCString(c,server.masterhost);
addReplyLongLong(c,server.masterport);
- switch(server.repl_state) {
- case REDIS_REPL_NONE: slavestate = "none"; break;
- case REDIS_REPL_CONNECT: slavestate = "connect"; break;
- case REDIS_REPL_CONNECTING: slavestate = "connecting"; break;
- case REDIS_REPL_RECEIVE_PONG: /* see next */
- case REDIS_REPL_TRANSFER: slavestate = "sync"; break;
- case REDIS_REPL_CONNECTED: slavestate = "connected"; break;
- default: slavestate = "unknown"; break;
+ if (slaveIsInHandshakeState()) {
+ slavestate = "handshake";
+ } else {
+ switch(server.repl_state) {
+ case REPL_STATE_NONE: slavestate = "none"; break;
+ case REPL_STATE_CONNECT: slavestate = "connect"; break;
+ case REPL_STATE_CONNECTING: slavestate = "connecting"; break;
+ case REPL_STATE_TRANSFER: slavestate = "sync"; break;
+ case REPL_STATE_CONNECTED: slavestate = "connected"; break;
+ default: slavestate = "unknown"; break;
+ }
}
addReplyBulkCString(c,slavestate);
addReplyLongLong(c,server.master ? server.master->reploff : -1);
@@ -1558,15 +2100,15 @@ void roleCommand(redisClient *c) {
* processed offset. If we are not connected with a master, the command has
* no effects. */
void replicationSendAck(void) {
- redisClient *c = server.master;
+ client *c = server.master;
if (c != NULL) {
- c->flags |= REDIS_MASTER_FORCE_REPLY;
+ c->flags |= CLIENT_MASTER_FORCE_REPLY;
addReplyMultiBulkLen(c,3);
addReplyBulkCString(c,"REPLCONF");
addReplyBulkCString(c,"ACK");
addReplyBulkLongLong(c,c->reploff);
- c->flags &= ~REDIS_MASTER_FORCE_REPLY;
+ c->flags &= ~CLIENT_MASTER_FORCE_REPLY;
}
}
@@ -1578,7 +2120,7 @@ void replicationSendAck(void) {
* functions. */
/* This function is called by freeClient() in order to cache the master
- * client structure instead of destryoing it. freeClient() will return
+ * client structure instead of destroying it. freeClient() will return
* ASAP after this function returns, so every action needed to avoid problems
* with a client that is really "suspended" has to be done by this function.
*
@@ -1590,31 +2132,31 @@ void replicationSendAck(void) {
* replicationResurrectCachedMaster() that is used after a successful PSYNC
* handshake in order to reactivate the cached master.
*/
-void replicationCacheMaster(redisClient *c) {
- listNode *ln;
-
- redisAssert(server.master != NULL && server.cached_master == NULL);
- redisLog(REDIS_NOTICE,"Caching the disconnected master state.");
-
- /* Remove from the list of clients, we don't want this client to be
- * listed by CLIENT LIST or processed in any way by batch operations. */
- ln = listSearchKey(server.clients,c);
- redisAssert(ln != NULL);
- listDelNode(server.clients,ln);
+void replicationCacheMaster(client *c) {
+ serverAssert(server.master != NULL && server.cached_master == NULL);
+ serverLog(LL_NOTICE,"Caching the disconnected master state.");
+
+ /* Unlink the client from the server structures. */
+ unlinkClient(c);
+
+ /* Reset the master client so that's ready to accept new commands:
+ * we want to discard te non processed query buffers and non processed
+ * offsets, including pending transactions, already populated arguments,
+ * pending outputs to the master. */
+ sdsclear(server.master->querybuf);
+ sdsclear(server.master->pending_querybuf);
+ server.master->read_reploff = server.master->reploff;
+ if (c->flags & CLIENT_MULTI) discardTransaction(c);
+ listEmpty(c->reply);
+ c->sentlen = 0;
+ c->reply_bytes = 0;
+ c->bufpos = 0;
+ resetClient(c);
/* Save the master. Server.master will be set to null later by
* replicationHandleMasterDisconnection(). */
server.cached_master = server.master;
- /* Remove the event handlers and close the socket. We'll later reuse
- * the socket of the new connection with the master during PSYNC. */
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- close(c->fd);
-
- /* Set fd to -1 so that we can safely call freeClient(c) later. */
- c->fd = -1;
-
/* Invalidate the Peer ID cache. */
if (c->peerid) {
sdsfree(c->peerid);
@@ -1627,13 +2169,38 @@ void replicationCacheMaster(redisClient *c) {
replicationHandleMasterDisconnection();
}
+/* This function is called when a master is turend into a slave, in order to
+ * create from scratch a cached master for the new client, that will allow
+ * to PSYNC with the slave that was promoted as the new master after a
+ * failover.
+ *
+ * Assuming this instance was previously the master instance of the new master,
+ * the new master will accept its replication ID, and potentiall also the
+ * current offset if no data was lost during the failover. So we use our
+ * current replication ID and offset in order to synthesize a cached master. */
+void replicationCacheMasterUsingMyself(void) {
+ /* The master client we create can be set to any DBID, because
+ * the new master will start its replication stream with SELECT. */
+ server.master_initial_offset = server.master_repl_offset;
+ replicationCreateMasterClient(-1,-1);
+
+ /* Use our own ID / offset. */
+ memcpy(server.master->replid, server.replid, sizeof(server.replid));
+
+ /* Set as cached master. */
+ unlinkClient(server.master);
+ server.cached_master = server.master;
+ server.master = NULL;
+ serverLog(LL_NOTICE,"Before turning into a slave, using my master parameters to synthesize a cached master: I may be able to synchronize with the new master with just a partial transfer.");
+}
+
/* Free a cached master, called when there are no longer the conditions for
* a partial resync on reconnection. */
void replicationDiscardCachedMaster(void) {
if (server.cached_master == NULL) return;
- redisLog(REDIS_NOTICE,"Discarding previously cached master state.");
- server.cached_master->flags &= ~REDIS_MASTER;
+ serverLog(LL_NOTICE,"Discarding previously cached master state.");
+ server.cached_master->flags &= ~CLIENT_MASTER;
freeClient(server.cached_master);
server.cached_master = NULL;
}
@@ -1648,25 +2215,26 @@ void replicationResurrectCachedMaster(int newfd) {
server.master = server.cached_master;
server.cached_master = NULL;
server.master->fd = newfd;
- server.master->flags &= ~(REDIS_CLOSE_AFTER_REPLY|REDIS_CLOSE_ASAP);
+ server.master->flags &= ~(CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP);
server.master->authenticated = 1;
server.master->lastinteraction = server.unixtime;
- server.repl_state = REDIS_REPL_CONNECTED;
+ server.repl_state = REPL_STATE_CONNECTED;
+ server.repl_down_since = 0;
/* Re-add to the list of clients. */
- listAddNodeTail(server.clients,server.master);
+ linkClient(server.master);
if (aeCreateFileEvent(server.el, newfd, AE_READABLE,
readQueryFromClient, server.master)) {
- redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
/* We may also need to install the write handler as well if there is
* pending data in the write buffers. */
- if (server.master->bufpos || listLength(server.master->reply)) {
+ if (clientHasPendingReplies(server.master)) {
if (aeCreateFileEvent(server.el, newfd, AE_WRITABLE,
sendReplyToClient, server.master)) {
- redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
+ serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
}
@@ -1687,10 +2255,10 @@ void refreshGoodSlavesCount(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
time_t lag = server.unixtime - slave->repl_ack_time;
- if (slave->replstate == REDIS_REPL_ONLINE &&
+ if (slave->replstate == SLAVE_STATE_ONLINE &&
lag <= server.repl_min_slaves_max_lag) good++;
}
server.repl_good_slaves_count = good;
@@ -1764,14 +2332,14 @@ void replicationScriptCacheAdd(sds sha1) {
sds oldest = listNodeValue(ln);
retval = dictDelete(server.repl_scriptcache_dict,oldest);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
listDelNode(server.repl_scriptcache_fifo,ln);
}
/* Add current. */
retval = dictAdd(server.repl_scriptcache_dict,key,NULL);
listAddNodeHead(server.repl_scriptcache_fifo,key);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
/* Returns non-zero if the specified entry exists inside the cache, that is,
@@ -1823,9 +2391,9 @@ int replicationCountAcksByOffset(long long offset) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
if (slave->repl_ack_off >= offset) count++;
}
return count;
@@ -1833,20 +2401,25 @@ int replicationCountAcksByOffset(long long offset) {
/* WAIT for N replicas to acknowledge the processing of our latest
* write command (and all the previous commands). */
-void waitCommand(redisClient *c) {
+void waitCommand(client *c) {
mstime_t timeout;
long numreplicas, ackreplicas;
long long offset = c->woff;
+ if (server.masterhost) {
+ addReplyError(c,"WAIT cannot be used with slave instances. Please also note that since Redis 4.0 if a slave is configured to be writable (which is not the default) writes to slaves are just local and are not propagated.");
+ return;
+ }
+
/* Argument parsing. */
- if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[1],&numreplicas,NULL) != C_OK)
return;
if (getTimeoutFromObjectOrReply(c,c->argv[2],&timeout,UNIT_MILLISECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
/* First try without blocking at all. */
ackreplicas = replicationCountAcksByOffset(c->woff);
- if (ackreplicas >= numreplicas || c->flags & REDIS_MULTI) {
+ if (ackreplicas >= numreplicas || c->flags & CLIENT_MULTI) {
addReplyLongLong(c,ackreplicas);
return;
}
@@ -1857,7 +2430,7 @@ void waitCommand(redisClient *c) {
c->bpop.reploffset = offset;
c->bpop.numreplicas = numreplicas;
listAddNodeTail(server.clients_waiting_acks,c);
- blockClient(c,REDIS_BLOCKED_WAIT);
+ blockClient(c,BLOCKED_WAIT);
/* Make sure that the server will send an ACK request to all the slaves
* before returning to the event loop. */
@@ -1868,9 +2441,9 @@ void waitCommand(redisClient *c) {
* specific cleanup. We just remove the client from the list of clients
* waiting for replica acks. Never call it directly, call unblockClient()
* instead. */
-void unblockClientWaitingReplicas(redisClient *c) {
+void unblockClientWaitingReplicas(client *c) {
listNode *ln = listSearchKey(server.clients_waiting_acks,c);
- redisAssert(ln != NULL);
+ serverAssert(ln != NULL);
listDelNode(server.clients_waiting_acks,ln);
}
@@ -1885,7 +2458,7 @@ void processClientsWaitingReplicas(void) {
listRewind(server.clients_waiting_acks,&li);
while((ln = listNext(&li))) {
- redisClient *c = ln->value;
+ client *c = ln->value;
/* Every time we find a client that is satisfied for a given
* offset and number of replicas, we remember it so the next client
@@ -1933,38 +2506,40 @@ long long replicationGetSlaveOffset(void) {
/* Replication cron function, called 1 time per second. */
void replicationCron(void) {
+ static long long replication_cron_loops = 0;
+
/* Non blocking connection timeout? */
if (server.masterhost &&
- (server.repl_state == REDIS_REPL_CONNECTING ||
- server.repl_state == REDIS_REPL_RECEIVE_PONG) &&
- (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
+ (server.repl_state == REPL_STATE_CONNECTING ||
+ slaveIsInHandshakeState()) &&
+ (time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"Timeout connecting to the MASTER...");
- undoConnectWithMaster();
+ serverLog(LL_WARNING,"Timeout connecting to the MASTER...");
+ cancelReplicationHandshake();
}
/* Bulk transfer I/O timeout? */
- if (server.masterhost && server.repl_state == REDIS_REPL_TRANSFER &&
+ if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER &&
(time(NULL)-server.repl_transfer_lastio) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
- replicationAbortSyncTransfer();
+ serverLog(LL_WARNING,"Timeout receiving bulk data from MASTER... If the problem persists try to set the 'repl-timeout' parameter in redis.conf to a larger value.");
+ cancelReplicationHandshake();
}
/* Timed out master when we are an already connected slave? */
- if (server.masterhost && server.repl_state == REDIS_REPL_CONNECTED &&
+ if (server.masterhost && server.repl_state == REPL_STATE_CONNECTED &&
(time(NULL)-server.master->lastinteraction) > server.repl_timeout)
{
- redisLog(REDIS_WARNING,"MASTER timeout: no data nor PING received...");
+ serverLog(LL_WARNING,"MASTER timeout: no data nor PING received...");
freeClient(server.master);
}
/* Check if we should connect to a MASTER */
- if (server.repl_state == REDIS_REPL_CONNECT) {
- redisLog(REDIS_NOTICE,"Connecting to MASTER %s:%d",
+ if (server.repl_state == REPL_STATE_CONNECT) {
+ serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
server.masterhost, server.masterport);
- if (connectWithMaster() == REDIS_OK) {
- redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync started");
+ if (connectWithMaster() == C_OK) {
+ serverLog(LL_NOTICE,"MASTER <-> SLAVE sync started");
}
}
@@ -1972,38 +2547,53 @@ void replicationCron(void) {
* Note that we do not send periodic acks to masters that don't
* support PSYNC and replication offsets. */
if (server.masterhost && server.master &&
- !(server.master->flags & REDIS_PRE_PSYNC))
+ !(server.master->flags & CLIENT_PRE_PSYNC))
replicationSendAck();
/* If we have attached slaves, PING them from time to time.
* So slaves can implement an explicit timeout to masters, and will
* be able to detect a link disconnection even if the TCP connection
* will not actually go down. */
- if (!(server.cronloops % (server.repl_ping_slave_period * server.hz))) {
- listIter li;
- listNode *ln;
- robj *ping_argv[1];
+ listIter li;
+ listNode *ln;
+ robj *ping_argv[1];
- /* First, send PING */
+ /* First, send PING according to ping_slave_period. */
+ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 &&
+ listLength(server.slaves))
+ {
ping_argv[0] = createStringObject("PING",4);
- replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1);
+ replicationFeedSlaves(server.slaves, server.slaveseldb,
+ ping_argv, 1);
decrRefCount(ping_argv[0]);
+ }
- /* Second, send a newline to all the slaves in pre-synchronization
- * stage, that is, slaves waiting for the master to create the RDB file.
- * The newline will be ignored by the slave but will refresh the
- * last-io timer preventing a timeout. */
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ /* Second, send a newline to all the slaves in pre-synchronization
+ * stage, that is, slaves waiting for the master to create the RDB file.
+ *
+ * Also send the a newline to all the chained slaves we have, if we lost
+ * connection from our master, to keep the slaves aware that their
+ * master is online. This is needed since sub-slaves only receive proxied
+ * data from top-level masters, so there is no explicit pinging in order
+ * to avoid altering the replication offsets. This special out of band
+ * pings (newlines) can be sent, they will have no effect in the offset.
+ *
+ * The newline will be ignored by the slave but will refresh the
+ * last interaction timer preventing a timeout. In this case we ignore the
+ * ping period and refresh the connection once per second since certain
+ * timeouts are set at a few seconds (example: PSYNC response). */
+ listRewind(server.slaves,&li);
+ while((ln = listNext(&li))) {
+ client *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START ||
- (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END &&
- server.rdb_child_type != REDIS_RDB_CHILD_TYPE_SOCKET))
- {
- if (write(slave->fd, "\n", 1) == -1) {
- /* Don't worry, it's just a ping. */
- }
+ int is_presync =
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
+ (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
+
+ if (is_presync) {
+ if (write(slave->fd, "\n", 1) == -1) {
+ /* Don't worry about socket errors, it's just a ping. */
}
}
}
@@ -2015,29 +2605,50 @@ void replicationCron(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
+ client *slave = ln->value;
- if (slave->replstate != REDIS_REPL_ONLINE) continue;
- if (slave->flags & REDIS_PRE_PSYNC) continue;
+ if (slave->replstate != SLAVE_STATE_ONLINE) continue;
+ if (slave->flags & CLIENT_PRE_PSYNC) continue;
if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout)
{
- redisLog(REDIS_WARNING, "Disconnecting timedout slave: %s",
+ serverLog(LL_WARNING, "Disconnecting timedout slave: %s",
replicationGetSlaveName(slave));
freeClient(slave);
}
}
}
- /* If we have no attached slaves and there is a replication backlog
- * using memory, free it after some (configured) time. */
+ /* If this is a master without attached slaves and there is a replication
+ * backlog active, in order to reclaim memory we can free it after some
+ * (configured) time. Note that this cannot be done for slaves: slaves
+ * without sub-slaves attached should still accumulate data into the
+ * backlog, in order to reply to PSYNC queries if they are turned into
+ * masters after a failover. */
if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&
- server.repl_backlog)
+ server.repl_backlog && server.masterhost == NULL)
{
time_t idle = server.unixtime - server.repl_no_slaves_since;
if (idle > server.repl_backlog_time_limit) {
+ /* When we free the backlog, we always use a new
+ * replication ID and clear the ID2. This is needed
+ * because when there is no backlog, the master_repl_offset
+ * is not updated, but we would still retain our replication
+ * ID, leading to the following problem:
+ *
+ * 1. We are a master instance.
+ * 2. Our slave is promoted to master. It's repl-id-2 will
+ * be the same as our repl-id.
+ * 3. We, yet as master, receive some updates, that will not
+ * increment the master_repl_offset.
+ * 4. Later we are turned into a slave, connecto to the new
+ * master that will accept our PSYNC request by second
+ * replication ID, but there will be data inconsistency
+ * because we received writes. */
+ changeReplicationId();
+ clearReplicationId2();
freeReplicationBacklog();
- redisLog(REDIS_NOTICE,
+ serverLog(LL_NOTICE,
"Replication backlog freed after %d seconds "
"without connected slaves.",
(int) server.repl_backlog_time_limit);
@@ -2048,54 +2659,49 @@ void replicationCron(void) {
* free our Replication Script Cache as there is no need to propagate
* EVALSHA at all. */
if (listLength(server.slaves) == 0 &&
- server.aof_state == REDIS_AOF_OFF &&
+ server.aof_state == AOF_OFF &&
listLength(server.repl_scriptcache_fifo) != 0)
{
replicationScriptCacheFlush();
}
- /* If we are using diskless replication and there are slaves waiting
- * in WAIT_BGSAVE_START state, check if enough seconds elapsed and
- * start a BGSAVE.
+ /* Start a BGSAVE good for replication if we have slaves in
+ * WAIT_BGSAVE_START state.
*
- * This code is also useful to trigger a BGSAVE if the diskless
- * replication was turned off with CONFIG SET, while there were already
- * slaves in WAIT_BGSAVE_START state. */
+ * In case of diskless replication, we make sure to wait the specified
+ * number of seconds (according to configuration) so that other slaves
+ * have the time to arrive before we start streaming. */
if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
time_t idle, max_idle = 0;
int slaves_waiting = 0;
+ int mincapa = -1;
listNode *ln;
listIter li;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
+ client *slave = ln->value;
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
idle = server.unixtime - slave->lastinteraction;
if (idle > max_idle) max_idle = idle;
slaves_waiting++;
+ mincapa = (mincapa == -1) ? slave->slave_capa :
+ (mincapa & slave->slave_capa);
}
}
- if (slaves_waiting && max_idle > server.repl_diskless_sync_delay) {
- /* Start a BGSAVE. Usually with socket target, or with disk target
- * if there was a recent socket -> disk config change. */
- if (startBgsaveForReplication() == REDIS_OK) {
- /* It started! We need to change the state of slaves
- * from WAIT_BGSAVE_START to WAIT_BGSAVE_END in case
- * the current target is disk. Otherwise it was already done
- * by rdbSaveToSlavesSockets() which is called by
- * startBgsaveForReplication(). */
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = ln->value;
- if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
- slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
- }
- }
+ if (slaves_waiting &&
+ (!server.repl_diskless_sync ||
+ max_idle > server.repl_diskless_sync_delay))
+ {
+ /* Start the BGSAVE. The called function may start a
+ * BGSAVE with socket target or disk target depending on the
+ * configuration and slaves capabilities. */
+ startBgsaveForReplication(mincapa);
}
}
/* Refresh the number of slaves with lag <= min-slaves-max-lag. */
refreshGoodSlavesCount();
+ replication_cron_loops++; /* Incremented with frequency 1 HZ. */
}
diff --git a/src/rio.c b/src/rio.c
index 738e56fd0..c9c76b8f2 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -53,7 +53,7 @@
#include "util.h"
#include "crc64.h"
#include "config.h"
-#include "redis.h"
+#include "server.h"
/* ------------------------- Buffer I/O implementation ----------------------- */
@@ -81,7 +81,7 @@ static off_t rioBufferTell(rio *r) {
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
static int rioBufferFlush(rio *r) {
- REDIS_NOTUSED(r);
+ UNUSED(r);
return 1; /* Nothing to do, our write just appends to the buffer. */
}
@@ -116,7 +116,7 @@ static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
r->io.file.buffered >= r->io.file.autosync)
{
fflush(r->io.file.fp);
- aof_fsync(fileno(r->io.file.fp));
+ redis_fsync(fileno(r->io.file.fp));
r->io.file.buffered = 0;
}
return retval;
@@ -163,7 +163,7 @@ void rioInitWithFile(rio *r, FILE *fp) {
* The function returns success as long as we are able to correctly write
* to at least one file descriptor.
*
- * When buf is NULL adn len is 0, the function performs a flush operation
+ * When buf is NULL and len is 0, the function performs a flush operation
* if there is some pending buffer, so this function is also used in order
* to implement rioFdsetFlush(). */
static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
@@ -176,8 +176,8 @@ static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
* a given size, we actually write to the sockets. */
if (len) {
r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
- len = 0; /* Prevent entering the while belove if we don't flush. */
- if (sdslen(r->io.fdset.buf) > REDIS_IOBUF_LEN) doflush = 1;
+ len = 0; /* Prevent entering the while below if we don't flush. */
+ if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1;
}
if (doflush) {
@@ -232,9 +232,9 @@ static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
/* Returns 1 or 0 for success/failure. */
static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
- REDIS_NOTUSED(r);
- REDIS_NOTUSED(buf);
- REDIS_NOTUSED(len);
+ UNUSED(r);
+ UNUSED(buf);
+ UNUSED(len);
return 0; /* Error, this target does not support reading. */
}
@@ -276,6 +276,7 @@ void rioInitWithFdset(rio *r, int *fds, int numfds) {
r->io.fdset.buf = sdsempty();
}
+/* release the rio stream. */
void rioFreeFdset(rio *r) {
zfree(r->io.fdset.fds);
zfree(r->io.fdset.state);
@@ -299,7 +300,7 @@ void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
* disk I/O concentrated in very little time. When we fsync in an explicit
* way instead the I/O pressure is more distributed across time. */
void rioSetAutoSync(rio *r, off_t bytes) {
- redisAssert(r->read == rioFileIO.read);
+ serverAssert(r->read == rioFileIO.read);
r->io.file.autosync = bytes;
}
@@ -309,7 +310,7 @@ void rioSetAutoSync(rio *r, off_t bytes) {
* generating the Redis protocol for the Append Only File. */
/* Write multi bulk count in the format: "*<count>\r\n". */
-size_t rioWriteBulkCount(rio *r, char prefix, int count) {
+size_t rioWriteBulkCount(rio *r, char prefix, long count) {
char cbuf[128];
int clen;
diff --git a/src/rio.h b/src/rio.h
index e5fa0cd33..c996c54f6 100644
--- a/src/rio.h
+++ b/src/rio.h
@@ -128,11 +128,16 @@ void rioInitWithFile(rio *r, FILE *fp);
void rioInitWithBuffer(rio *r, sds s);
void rioInitWithFdset(rio *r, int *fds, int numfds);
-size_t rioWriteBulkCount(rio *r, char prefix, int count);
+void rioFreeFdset(rio *r);
+
+size_t rioWriteBulkCount(rio *r, char prefix, long count);
size_t rioWriteBulkString(rio *r, const char *buf, size_t len);
size_t rioWriteBulkLongLong(rio *r, long long l);
size_t rioWriteBulkDouble(rio *r, double d);
+struct redisObject;
+int rioWriteBulkObject(rio *r, struct redisObject *obj);
+
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len);
void rioSetAutoSync(rio *r, off_t bytes);
diff --git a/src/scripting.c b/src/scripting.c
index b6a333a43..2732c87fb 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "sha1.h"
#include "rand.h"
#include "cluster.h"
@@ -45,7 +45,67 @@ char *redisProtocolToLuaType_Error(lua_State *lua, char *reply);
char *redisProtocolToLuaType_MultiBulk(lua_State *lua, char *reply);
int redis_math_random (lua_State *L);
int redis_math_randomseed (lua_State *L);
-void sha1hex(char *digest, char *script, size_t len);
+void ldbInit(void);
+void ldbDisable(client *c);
+void ldbEnable(client *c);
+void evalGenericCommandWithDebugging(client *c, int evalsha);
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar);
+void ldbLog(sds entry);
+void ldbLogRedisReply(char *reply);
+sds ldbCatStackValue(sds s, lua_State *lua, int idx);
+
+/* Debugger shared state is stored inside this global structure. */
+#define LDB_BREAKPOINTS_MAX 64 /* Max number of breakpoints. */
+#define LDB_MAX_LEN_DEFAULT 256 /* Default len limit for replies / var dumps. */
+struct ldbState {
+ int fd; /* Socket of the debugging client. */
+ int active; /* Are we debugging EVAL right now? */
+ int forked; /* Is this a fork()ed debugging session? */
+ list *logs; /* List of messages to send to the client. */
+ list *traces; /* Messages about Redis commands executed since last stop.*/
+ list *children; /* All forked debugging sessions pids. */
+ int bp[LDB_BREAKPOINTS_MAX]; /* An array of breakpoints line numbers. */
+ int bpcount; /* Number of valid entries inside bp. */
+ int step; /* Stop at next line ragardless of breakpoints. */
+ int luabp; /* Stop at next line because redis.breakpoint() was called. */
+ sds *src; /* Lua script source code split by line. */
+ int lines; /* Number of lines in 'src'. */
+ int currentline; /* Current line number. */
+ sds cbuf; /* Debugger client command buffer. */
+ size_t maxlen; /* Max var dump / reply length. */
+ int maxlen_hint_sent; /* Did we already hint about "set maxlen"? */
+} ldb;
+
+/* ---------------------------------------------------------------------------
+ * Utility functions.
+ * ------------------------------------------------------------------------- */
+
+/* Perform the SHA1 of the input string. We use this both for hashing script
+ * bodies in order to obtain the Lua function name, and in the implementation
+ * of redis.sha1().
+ *
+ * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
+ * hexadecimal number, plus 1 byte for null term. */
+void sha1hex(char *digest, char *script, size_t len) {
+ SHA1_CTX ctx;
+ unsigned char hash[20];
+ char *cset = "0123456789abcdef";
+ int j;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx,(unsigned char*)script,len);
+ SHA1Final(hash,&ctx);
+
+ for (j = 0; j < 20; j++) {
+ digest[j*2] = cset[((hash[j]&0xF0)>>4)];
+ digest[j*2+1] = cset[(hash[j]&0xF)];
+ }
+ digest[40] = '\0';
+}
+
+/* ---------------------------------------------------------------------------
+ * Redis reply to Lua type conversion functions.
+ * ------------------------------------------------------------------------- */
/* Take a Redis reply in the Redis protocol format and convert it into a
* Lua type. Thanks to this function, and the introduction of not connected
@@ -54,13 +114,11 @@ void sha1hex(char *digest, char *script, size_t len);
* Basically we take the arguments, execute the Redis command in the context
* of a non connected client, then take the generated reply and convert it
* into a suitable Lua type. With this trick the scripting feature does not
- * need the introduction of a full Redis internals API. Basically the script
+ * need the introduction of a full Redis internals API. The script
* is like a normal client that bypasses all the slow I/O paths.
*
* Note: in this function we do not do any sanity check as the reply is
* generated by Redis directly. This allows us to go faster.
- * The reply string can be altered during the parsing as it is discarded
- * after the conversion is completed.
*
* Errors are returned as a table with a single 'err' field set to the
* error string.
@@ -70,21 +128,11 @@ char *redisProtocolToLuaType(lua_State *lua, char* reply) {
char *p = reply;
switch(*p) {
- case ':':
- p = redisProtocolToLuaType_Int(lua,reply);
- break;
- case '$':
- p = redisProtocolToLuaType_Bulk(lua,reply);
- break;
- case '+':
- p = redisProtocolToLuaType_Status(lua,reply);
- break;
- case '-':
- p = redisProtocolToLuaType_Error(lua,reply);
- break;
- case '*':
- p = redisProtocolToLuaType_MultiBulk(lua,reply);
- break;
+ case ':': p = redisProtocolToLuaType_Int(lua,reply); break;
+ case '$': p = redisProtocolToLuaType_Bulk(lua,reply); break;
+ case '+': p = redisProtocolToLuaType_Status(lua,reply); break;
+ case '-': p = redisProtocolToLuaType_Error(lua,reply); break;
+ case '*': p = redisProtocolToLuaType_MultiBulk(lua,reply); break;
}
return p;
}
@@ -152,9 +200,20 @@ char *redisProtocolToLuaType_MultiBulk(lua_State *lua, char *reply) {
return p;
}
+/* This function is used in order to push an error on the Lua stack in the
+ * format used by redis.pcall to return errors, which is a lua table
+ * with a single "err" field set to the error string. Note that this
+ * table is never a valid reply by proper commands, since the returned
+ * tables are otherwise always indexed by integers, never by strings. */
void luaPushError(lua_State *lua, char *error) {
lua_Debug dbg;
+ /* If debugging is active and in step mode, log errors resulting from
+ * Redis commands. */
+ if (ldb.active && ldb.step) {
+ ldbLog(sdscatprintf(sdsempty(),"<error> %s",error));
+ }
+
lua_newtable(lua);
lua_pushstring(lua,"err");
@@ -170,6 +229,16 @@ void luaPushError(lua_State *lua, char *error) {
lua_settable(lua,-3);
}
+/* In case the error set into the Lua stack by luaPushError() was generated
+ * by the non-error-trapping version of redis.pcall(), which is redis.call(),
+ * this function will raise the Lua error so that the execution of the
+ * script will be halted. */
+int luaRaiseError(lua_State *lua) {
+ lua_pushstring(lua,"err");
+ lua_gettable(lua,-2);
+ return lua_error(lua);
+}
+
/* Sort the array currently in the stack. We do this to make the output
* of commands like KEYS or SMEMBERS something deterministic when called
* from Lua (to play well with AOf/replication).
@@ -201,12 +270,85 @@ void luaSortArray(lua_State *lua) {
lua_pop(lua,1); /* Stack: array (sorted) */
}
+/* ---------------------------------------------------------------------------
+ * Lua reply to Redis reply conversion functions.
+ * ------------------------------------------------------------------------- */
+
+void luaReplyToRedisReply(client *c, lua_State *lua) {
+ int t = lua_type(lua,-1);
+
+ switch(t) {
+ case LUA_TSTRING:
+ addReplyBulkCBuffer(c,(char*)lua_tostring(lua,-1),lua_strlen(lua,-1));
+ break;
+ case LUA_TBOOLEAN:
+ addReply(c,lua_toboolean(lua,-1) ? shared.cone : shared.nullbulk);
+ break;
+ case LUA_TNUMBER:
+ addReplyLongLong(c,(long long)lua_tonumber(lua,-1));
+ break;
+ case LUA_TTABLE:
+ /* We need to check if it is an array, an error, or a status reply.
+ * Error are returned as a single element table with 'err' field.
+ * Status replies are returned as single element table with 'ok'
+ * field. */
+ lua_pushstring(lua,"err");
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds err = sdsnew(lua_tostring(lua,-1));
+ sdsmapchars(err,"\r\n"," ",2);
+ addReplySds(c,sdscatprintf(sdsempty(),"-%s\r\n",err));
+ sdsfree(err);
+ lua_pop(lua,2);
+ return;
+ }
+
+ lua_pop(lua,1);
+ lua_pushstring(lua,"ok");
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TSTRING) {
+ sds ok = sdsnew(lua_tostring(lua,-1));
+ sdsmapchars(ok,"\r\n"," ",2);
+ addReplySds(c,sdscatprintf(sdsempty(),"+%s\r\n",ok));
+ sdsfree(ok);
+ lua_pop(lua,1);
+ } else {
+ void *replylen = addDeferredMultiBulkLength(c);
+ int j = 1, mbulklen = 0;
+
+ lua_pop(lua,1); /* Discard the 'ok' field value we popped */
+ while(1) {
+ lua_pushnumber(lua,j++);
+ lua_gettable(lua,-2);
+ t = lua_type(lua,-1);
+ if (t == LUA_TNIL) {
+ lua_pop(lua,1);
+ break;
+ }
+ luaReplyToRedisReply(c, lua);
+ mbulklen++;
+ }
+ setDeferredMultiBulkLength(c,replylen,mbulklen);
+ }
+ break;
+ default:
+ addReply(c,shared.nullbulk);
+ }
+ lua_pop(lua,1);
+}
+
+/* ---------------------------------------------------------------------------
+ * Lua redis.* functions implementations.
+ * ------------------------------------------------------------------------- */
+
#define LUA_CMD_OBJCACHE_SIZE 32
#define LUA_CMD_OBJCACHE_MAX_LEN 64
int luaRedisGenericCommand(lua_State *lua, int raise_error) {
int j, argc = lua_gettop(lua);
struct redisCommand *cmd;
- redisClient *c = server.lua_client;
+ client *c = server.lua_client;
sds reply;
/* Cached across calls. */
@@ -214,12 +356,35 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
static int argv_size = 0;
static robj *cached_objects[LUA_CMD_OBJCACHE_SIZE];
static size_t cached_objects_len[LUA_CMD_OBJCACHE_SIZE];
+ static int inuse = 0; /* Recursive calls detection. */
+
+ /* Reflect MULTI state */
+ if (server.lua_multi_emitted || (server.lua_caller->flags & CLIENT_MULTI)) {
+ c->flags |= CLIENT_MULTI;
+ } else {
+ c->flags &= ~CLIENT_MULTI;
+ }
+
+ /* By using Lua debug hooks it is possible to trigger a recursive call
+ * to luaRedisGenericCommand(), which normally should never happen.
+ * To make this function reentrant is futile and makes it slower, but
+ * we should at least detect such a misuse, and abort. */
+ if (inuse) {
+ char *recursion_warning =
+ "luaRedisGenericCommand() recursive call detected. "
+ "Are you doing funny stuff with Lua debug hooks?";
+ serverLog(LL_WARNING,"%s",recursion_warning);
+ luaPushError(lua,recursion_warning);
+ return 1;
+ }
+ inuse++;
/* Require at least one argument */
if (argc == 0) {
luaPushError(lua,
"Please specify at least one argument for redis.call()");
- return 1;
+ inuse--;
+ return raise_error ? luaRaiseError(lua) : 1;
}
/* Build the arguments vector */
@@ -249,14 +414,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (j < LUA_CMD_OBJCACHE_SIZE && cached_objects[j] &&
cached_objects_len[j] >= obj_len)
{
- char *s = cached_objects[j]->ptr;
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
-
+ sds s = cached_objects[j]->ptr;
argv[j] = cached_objects[j];
cached_objects[j] = NULL;
memcpy(s,obj_s,obj_len+1);
- sh->free += sh->len - obj_len;
- sh->len = obj_len;
+ sdssetlen(s, obj_len);
} else {
argv[j] = createStringObject(obj_s, obj_len);
}
@@ -273,13 +435,30 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
luaPushError(lua,
"Lua redis() command arguments must be strings or integers");
- return 1;
+ inuse--;
+ return raise_error ? luaRaiseError(lua) : 1;
}
/* Setup our fake client for command execution */
c->argv = argv;
c->argc = argc;
+ /* Log the command if debugging is active. */
+ if (ldb.active && ldb.step) {
+ sds cmdlog = sdsnew("<redis>");
+ for (j = 0; j < c->argc; j++) {
+ if (j == 10) {
+ cmdlog = sdscatprintf(cmdlog," ... (%d more)",
+ c->argc-j-1);
+ break;
+ } else {
+ cmdlog = sdscatlen(cmdlog," ",1);
+ cmdlog = sdscatsds(cmdlog,c->argv[j]->ptr);
+ }
+ }
+ ldbLog(cmdlog);
+ }
+
/* Command lookup */
cmd = lookupCommand(argv[0]->ptr);
if (!cmd || ((cmd->arity > 0 && cmd->arity != argc) ||
@@ -292,10 +471,10 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
luaPushError(lua,"Unknown Redis command called from Lua script");
goto cleanup;
}
- c->cmd = cmd;
+ c->cmd = c->lastcmd = cmd;
/* There are commands that are not allowed inside scripts. */
- if (cmd->flags & REDIS_CMD_NOSCRIPT) {
+ if (cmd->flags & CMD_NOSCRIPT) {
luaPushError(lua, "This Redis command is not allowed from scripts");
goto cleanup;
}
@@ -303,22 +482,28 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
/* Write commands are forbidden against read-only slaves, or if a
* command marked as non-deterministic was already called in the context
* of this script. */
- if (cmd->flags & REDIS_CMD_WRITE) {
- if (server.lua_random_dirty) {
+ if (cmd->flags & CMD_WRITE) {
+ int deny_write_type = writeCommandsDeniedByDiskError();
+ if (server.lua_random_dirty && !server.lua_replicate_commands) {
luaPushError(lua,
- "Write commands not allowed after non deterministic commands");
+ "Write commands not allowed after non deterministic commands. Call redis.replicate_commands() at the start of your script in order to switch to single commands replication mode.");
goto cleanup;
} else if (server.masterhost && server.repl_slave_ro &&
!server.loading &&
- !(server.lua_caller->flags & REDIS_MASTER))
+ !(server.lua_caller->flags & CLIENT_MASTER))
{
luaPushError(lua, shared.roslaveerr->ptr);
goto cleanup;
- } else if (server.stop_writes_on_bgsave_err &&
- server.saveparamslen > 0 &&
- server.lastbgsave_status == REDIS_ERR)
- {
- luaPushError(lua, shared.bgsaveerr->ptr);
+ } else if (deny_write_type != DISK_ERROR_TYPE_NONE) {
+ if (deny_write_type == DISK_ERROR_TYPE_RDB) {
+ luaPushError(lua, shared.bgsaveerr->ptr);
+ } else {
+ sds aof_write_err = sdscatfmt(sdsempty(),
+ "-MISCONF Errors writing to the AOF file: %s\r\n",
+ strerror(server.aof_last_write_errno));
+ luaPushError(lua, aof_write_err);
+ sdsfree(aof_write_err);
+ }
goto cleanup;
}
}
@@ -328,23 +513,26 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
* first write in the context of this script, otherwise we can't stop
* in the middle. */
if (server.maxmemory && server.lua_write_dirty == 0 &&
- (cmd->flags & REDIS_CMD_DENYOOM))
+ (cmd->flags & CMD_DENYOOM))
{
- if (freeMemoryIfNeeded() == REDIS_ERR) {
+ if (freeMemoryIfNeeded() == C_ERR) {
luaPushError(lua, shared.oomerr->ptr);
goto cleanup;
}
}
- if (cmd->flags & REDIS_CMD_RANDOM) server.lua_random_dirty = 1;
- if (cmd->flags & REDIS_CMD_WRITE) server.lua_write_dirty = 1;
+ if (cmd->flags & CMD_RANDOM) server.lua_random_dirty = 1;
+ if (cmd->flags & CMD_WRITE) server.lua_write_dirty = 1;
/* If this is a Redis Cluster node, we need to make sure Lua is not
- * trying to access non-local keys. */
- if (server.cluster_enabled) {
+ * trying to access non-local keys, with the exception of commands
+ * received from our master or when loading the AOF back in memory. */
+ if (server.cluster_enabled && !server.loading &&
+ !(server.lua_caller->flags & CLIENT_MASTER))
+ {
/* Duplicate relevant flags in the lua client. */
- c->flags &= ~(REDIS_READONLY|REDIS_ASKING);
- c->flags |= server.lua_caller->flags & (REDIS_READONLY|REDIS_ASKING);
+ c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
+ c->flags |= server.lua_caller->flags & (CLIENT_READONLY|CLIENT_ASKING);
if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
server.cluster->myself)
{
@@ -355,13 +543,34 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
}
+ /* If we are using single commands replication, we need to wrap what
+ * we propagate into a MULTI/EXEC block, so that it will be atomic like
+ * a Lua script in the context of AOF and slaves. */
+ if (server.lua_replicate_commands &&
+ !server.lua_multi_emitted &&
+ !(server.lua_caller->flags & CLIENT_MULTI) &&
+ server.lua_write_dirty &&
+ server.lua_repl != PROPAGATE_NONE)
+ {
+ execCommandPropagateMulti(server.lua_caller);
+ server.lua_multi_emitted = 1;
+ }
+
/* Run the command */
- call(c,REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
+ int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
+ if (server.lua_replicate_commands) {
+ /* Set flags according to redis.set_repl() settings. */
+ if (server.lua_repl & PROPAGATE_AOF)
+ call_flags |= CMD_CALL_PROPAGATE_AOF;
+ if (server.lua_repl & PROPAGATE_REPL)
+ call_flags |= CMD_CALL_PROPAGATE_REPL;
+ }
+ call(c,call_flags);
/* Convert the result of the Redis command into a suitable Lua type.
* The first thing we need is to create a single string from the client
* output buffers. */
- if (listLength(c->reply) == 0 && c->bufpos < REDIS_REPLY_CHUNK_BYTES) {
+ if (listLength(c->reply) == 0 && c->bufpos < PROTO_REPLY_CHUNK_BYTES) {
/* This is a fast path for the common case of a reply inside the
* client static buffer. Don't create an SDS string but just use
* the client buffer directly. */
@@ -372,17 +581,23 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
reply = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
- robj *o = listNodeValue(listFirst(c->reply));
+ clientReplyBlock *o = listNodeValue(listFirst(c->reply));
- reply = sdscatlen(reply,o->ptr,sdslen(o->ptr));
+ reply = sdscatlen(reply,o->buf,o->used);
listDelNode(c->reply,listFirst(c->reply));
}
}
if (raise_error && reply[0] != '-') raise_error = 0;
redisProtocolToLuaType(lua,reply);
+
+ /* If the debugger is active, log the reply from Redis. */
+ if (ldb.active && ldb.step)
+ ldbLogRedisReply(reply);
+
/* Sort the output array if needed, assuming it is a non-null multi bulk
* reply as expected. */
- if ((cmd->flags & REDIS_CMD_SORT_FOR_SCRIPT) &&
+ if ((cmd->flags & CMD_SORT_FOR_SCRIPT) &&
+ (server.lua_replicate_commands == 0) &&
(reply[0] == '*' && reply[1] != '-')) {
luaSortArray(lua);
}
@@ -400,15 +615,14 @@ cleanup:
* (we must be the only owner) for us to cache it. */
if (j < LUA_CMD_OBJCACHE_SIZE &&
o->refcount == 1 &&
- (o->encoding == REDIS_ENCODING_RAW ||
- o->encoding == REDIS_ENCODING_EMBSTR) &&
+ (o->encoding == OBJ_ENCODING_RAW ||
+ o->encoding == OBJ_ENCODING_EMBSTR) &&
sdslen(o->ptr) <= LUA_CMD_OBJCACHE_MAX_LEN)
{
- struct sdshdr *sh = (void*)(((char*)(o->ptr))-(sizeof(struct sdshdr)));
-
+ sds s = o->ptr;
if (cached_objects[j]) decrRefCount(cached_objects[j]);
cached_objects[j] = o;
- cached_objects_len[j] = sh->free + sh->len;
+ cached_objects_len[j] = sdsalloc(s);
} else {
decrRefCount(o);
}
@@ -424,17 +638,19 @@ cleanup:
/* If we are here we should have an error in the stack, in the
* form of a table with an "err" field. Extract the string to
* return the plain error. */
- lua_pushstring(lua,"err");
- lua_gettable(lua,-2);
- return lua_error(lua);
+ inuse--;
+ return luaRaiseError(lua);
}
+ inuse--;
return 1;
}
+/* redis.call() */
int luaRedisCallCommand(lua_State *lua) {
return luaRedisGenericCommand(lua,1);
}
+/* redis.pcall() */
int luaRedisPCallCommand(lua_State *lua) {
return luaRedisGenericCommand(lua,0);
}
@@ -448,8 +664,8 @@ int luaRedisSha1hexCommand(lua_State *lua) {
char *s;
if (argc != 1) {
- luaPushError(lua, "wrong number of arguments");
- return 1;
+ lua_pushstring(lua, "wrong number of arguments");
+ return lua_error(lua);
}
s = (char*)lua_tolstring(lua,1,&len);
@@ -478,30 +694,110 @@ int luaRedisReturnSingleFieldTable(lua_State *lua, char *field) {
return 1;
}
+/* redis.error_reply() */
int luaRedisErrorReplyCommand(lua_State *lua) {
return luaRedisReturnSingleFieldTable(lua,"err");
}
+/* redis.status_reply() */
int luaRedisStatusReplyCommand(lua_State *lua) {
return luaRedisReturnSingleFieldTable(lua,"ok");
}
+/* redis.replicate_commands()
+ *
+ * Turn on single commands replication if the script never called
+ * a write command so far, and returns true. Otherwise if the script
+ * already started to write, returns false and stick to whole scripts
+ * replication, which is our default. */
+int luaRedisReplicateCommandsCommand(lua_State *lua) {
+ if (server.lua_write_dirty) {
+ lua_pushboolean(lua,0);
+ } else {
+ server.lua_replicate_commands = 1;
+ /* When we switch to single commands replication, we can provide
+ * different math.random() sequences at every call, which is what
+ * the user normally expects. */
+ redisSrand48(rand());
+ lua_pushboolean(lua,1);
+ }
+ return 1;
+}
+
+/* redis.breakpoint()
+ *
+ * Allows to stop execution during a debuggign session from within
+ * the Lua code implementation, like if a breakpoint was set in the code
+ * immediately after the function. */
+int luaRedisBreakpointCommand(lua_State *lua) {
+ if (ldb.active) {
+ ldb.luabp = 1;
+ lua_pushboolean(lua,1);
+ } else {
+ lua_pushboolean(lua,0);
+ }
+ return 1;
+}
+
+/* redis.debug()
+ *
+ * Log a string message into the output console.
+ * Can take multiple arguments that will be separated by commas.
+ * Nothing is returned to the caller. */
+int luaRedisDebugCommand(lua_State *lua) {
+ if (!ldb.active) return 0;
+ int argc = lua_gettop(lua);
+ sds log = sdscatprintf(sdsempty(),"<debug> line %d: ", ldb.currentline);
+ while(argc--) {
+ log = ldbCatStackValue(log,lua,-1 - argc);
+ if (argc != 0) log = sdscatlen(log,", ",2);
+ }
+ ldbLog(log);
+ return 0;
+}
+
+/* redis.set_repl()
+ *
+ * Set the propagation of write commands executed in the context of the
+ * script to on/off for AOF and slaves. */
+int luaRedisSetReplCommand(lua_State *lua) {
+ int argc = lua_gettop(lua);
+ int flags;
+
+ if (server.lua_replicate_commands == 0) {
+ lua_pushstring(lua, "You can set the replication behavior only after turning on single commands replication with redis.replicate_commands().");
+ return lua_error(lua);
+ } else if (argc != 1) {
+ lua_pushstring(lua, "redis.set_repl() requires two arguments.");
+ return lua_error(lua);
+ }
+
+ flags = lua_tonumber(lua,-1);
+ if ((flags & ~(PROPAGATE_AOF|PROPAGATE_REPL)) != 0) {
+ lua_pushstring(lua, "Invalid replication flags. Use REPL_AOF, REPL_SLAVE, REPL_ALL or REPL_NONE.");
+ return lua_error(lua);
+ }
+ server.lua_repl = flags;
+ return 0;
+}
+
+/* redis.log() */
int luaLogCommand(lua_State *lua) {
int j, argc = lua_gettop(lua);
int level;
sds log;
if (argc < 2) {
- luaPushError(lua, "redis.log() requires two arguments or more.");
- return 1;
+ lua_pushstring(lua, "redis.log() requires two arguments or more.");
+ return lua_error(lua);
} else if (!lua_isnumber(lua,-argc)) {
- luaPushError(lua, "First argument must be a number (log level).");
- return 1;
+ lua_pushstring(lua, "First argument must be a number (log level).");
+ return lua_error(lua);
}
level = lua_tonumber(lua,-argc);
- if (level < REDIS_DEBUG || level > REDIS_WARNING) {
- luaPushError(lua, "Invalid debug level.");
- return 1;
+ if (level < LL_DEBUG || level > LL_WARNING) {
+ lua_pushstring(lua, "Invalid debug level.");
+ return lua_error(lua);
}
/* Glue together all the arguments */
@@ -516,34 +812,14 @@ int luaLogCommand(lua_State *lua) {
log = sdscatlen(log,s,len);
}
}
- redisLogRaw(level,log);
+ serverLogRaw(level,log);
sdsfree(log);
return 0;
}
-void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
- long long elapsed;
- REDIS_NOTUSED(ar);
- REDIS_NOTUSED(lua);
-
- elapsed = mstime() - server.lua_time_start;
- if (elapsed >= server.lua_time_limit && server.lua_timedout == 0) {
- redisLog(REDIS_WARNING,"Lua slow script detected: still in execution after %lld milliseconds. You can try killing the script using the SCRIPT KILL command.",elapsed);
- server.lua_timedout = 1;
- /* Once the script timeouts we reenter the event loop to permit others
- * to call SCRIPT KILL or SHUTDOWN NOSAVE if needed. For this reason
- * we need to mask the client executing the script from the event loop.
- * If we don't do that the client may disconnect and could no longer be
- * here when the EVAL command will return. */
- aeDeleteFileEvent(server.el, server.lua_caller->fd, AE_READABLE);
- }
- if (server.lua_timedout) processEventsWhileBlocked();
- if (server.lua_kill) {
- redisLog(REDIS_WARNING,"Lua script killed by user with SCRIPT KILL.");
- lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");
- lua_error(lua);
- }
-}
+/* ---------------------------------------------------------------------------
+ * Lua engine initialization and reset.
+ * ------------------------------------------------------------------------- */
void luaLoadLib(lua_State *lua, const char *libname, lua_CFunction luafunc) {
lua_pushcfunction(lua, luafunc);
@@ -578,6 +854,8 @@ void luaLoadLibraries(lua_State *lua) {
void luaRemoveUnsupportedFunctions(lua_State *lua) {
lua_pushnil(lua);
lua_setglobal(lua,"loadfile");
+ lua_pushnil(lua);
+ lua_setglobal(lua,"dofile");
}
/* This function installs metamethods in the global table _G that prevent
@@ -592,11 +870,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
/* strict.lua from: http://metalua.luaforge.net/src/lib/strict.lua.html.
* Modified to be adapted to Redis. */
+ s[j++]="local dbg=debug\n";
s[j++]="local mt = {}\n";
s[j++]="setmetatable(_G, mt)\n";
s[j++]="mt.__newindex = function (t, n, v)\n";
- s[j++]=" if debug.getinfo(2) then\n";
- s[j++]=" local w = debug.getinfo(2, \"S\").what\n";
+ s[j++]=" if dbg.getinfo(2) then\n";
+ s[j++]=" local w = dbg.getinfo(2, \"S\").what\n";
s[j++]=" if w ~= \"main\" and w ~= \"C\" then\n";
s[j++]=" error(\"Script attempted to create global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
@@ -604,11 +883,12 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
s[j++]=" rawset(t, n, v)\n";
s[j++]="end\n";
s[j++]="mt.__index = function (t, n)\n";
- s[j++]=" if debug.getinfo(2) and debug.getinfo(2, \"S\").what ~= \"C\" then\n";
- s[j++]=" error(\"Script attempted to access unexisting global variable '\"..tostring(n)..\"'\", 2)\n";
+ s[j++]=" if dbg.getinfo(2) and dbg.getinfo(2, \"S\").what ~= \"C\" then\n";
+ s[j++]=" error(\"Script attempted to access nonexistent global variable '\"..tostring(n)..\"'\", 2)\n";
s[j++]=" end\n";
s[j++]=" return rawget(t, n)\n";
s[j++]="end\n";
+ s[j++]="debug = nil\n";
s[j++]=NULL;
for (j = 0; s[j] != NULL; j++) code = sdscatlen(code,s[j],strlen(s[j]));
@@ -618,12 +898,26 @@ void scriptingEnableGlobalsProtection(lua_State *lua) {
}
/* Initialize the scripting environment.
- * It is possible to call this function to reset the scripting environment
- * assuming that we call scriptingRelease() before.
- * See scriptingReset() for more information. */
-void scriptingInit(void) {
+ *
+ * This function is called the first time at server startup with
+ * the 'setup' argument set to 1.
+ *
+ * It can be called again multiple times during the lifetime of the Redis
+ * process, with 'setup' set to 0, and following a scriptingRelease() call,
+ * in order to reset the Lua scripting environment.
+ *
+ * However it is simpler to just call scriptingReset() that does just that. */
+void scriptingInit(int setup) {
lua_State *lua = lua_open();
+ if (setup) {
+ server.lua_client = NULL;
+ server.lua_caller = NULL;
+ server.lua_timedout = 0;
+ server.lua_always_replicate_commands = 0; /* Only DEBUG can change it.*/
+ ldbInit();
+ }
+
luaLoadLibraries(lua);
luaRemoveUnsupportedFunctions(lua);
@@ -631,6 +925,7 @@ void scriptingInit(void) {
* This is useful for replication, as we need to replicate EVALSHA
* as EVAL, so we need to remember the associated script. */
server.lua_scripts = dictCreate(&shaScriptObjectDictType,NULL);
+ server.lua_scripts_mem = 0;
/* Register the redis commands table and fields */
lua_newtable(lua);
@@ -651,19 +946,19 @@ void scriptingInit(void) {
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_DEBUG");
- lua_pushnumber(lua,REDIS_DEBUG);
+ lua_pushnumber(lua,LL_DEBUG);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_VERBOSE");
- lua_pushnumber(lua,REDIS_VERBOSE);
+ lua_pushnumber(lua,LL_VERBOSE);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_NOTICE");
- lua_pushnumber(lua,REDIS_NOTICE);
+ lua_pushnumber(lua,LL_NOTICE);
lua_settable(lua,-3);
lua_pushstring(lua,"LOG_WARNING");
- lua_pushnumber(lua,REDIS_WARNING);
+ lua_pushnumber(lua,LL_WARNING);
lua_settable(lua,-3);
/* redis.sha1hex */
@@ -679,6 +974,42 @@ void scriptingInit(void) {
lua_pushcfunction(lua, luaRedisStatusReplyCommand);
lua_settable(lua, -3);
+ /* redis.replicate_commands */
+ lua_pushstring(lua, "replicate_commands");
+ lua_pushcfunction(lua, luaRedisReplicateCommandsCommand);
+ lua_settable(lua, -3);
+
+ /* redis.set_repl and associated flags. */
+ lua_pushstring(lua,"set_repl");
+ lua_pushcfunction(lua,luaRedisSetReplCommand);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_NONE");
+ lua_pushnumber(lua,PROPAGATE_NONE);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_AOF");
+ lua_pushnumber(lua,PROPAGATE_AOF);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_SLAVE");
+ lua_pushnumber(lua,PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ lua_pushstring(lua,"REPL_ALL");
+ lua_pushnumber(lua,PROPAGATE_AOF|PROPAGATE_REPL);
+ lua_settable(lua,-3);
+
+ /* redis.breakpoint */
+ lua_pushstring(lua,"breakpoint");
+ lua_pushcfunction(lua,luaRedisBreakpointCommand);
+ lua_settable(lua,-3);
+
+ /* redis.debug */
+ lua_pushstring(lua,"debug");
+ lua_pushcfunction(lua,luaRedisDebugCommand);
+ lua_settable(lua,-3);
+
/* Finally set the table as 'redis' global var. */
lua_setglobal(lua,"redis");
@@ -712,10 +1043,11 @@ void scriptingInit(void) {
* information about the caller, that's what makes sense from the point
* of view of the user debugging a script. */
{
- char *errh_func = "function __redis__err__handler(err)\n"
- " local i = debug.getinfo(2,'nSl')\n"
+ char *errh_func = "local dbg = debug\n"
+ "function __redis__err__handler(err)\n"
+ " local i = dbg.getinfo(2,'nSl')\n"
" if i and i.what == 'C' then\n"
- " i = debug.getinfo(3,'nSl')\n"
+ " i = dbg.getinfo(3,'nSl')\n"
" end\n"
" if i then\n"
" return i.source .. ':' .. i.currentline .. ': ' .. err\n"
@@ -733,7 +1065,7 @@ void scriptingInit(void) {
* by scriptingReset(). */
if (server.lua_client == NULL) {
server.lua_client = createClient(-1);
- server.lua_client->flags |= REDIS_LUA_CLIENT;
+ server.lua_client->flags |= CLIENT_LUA;
}
/* Lua beginners often don't use "local", this is likely to introduce
@@ -748,99 +1080,13 @@ void scriptingInit(void) {
* This function is used in order to reset the scripting environment. */
void scriptingRelease(void) {
dictRelease(server.lua_scripts);
+ server.lua_scripts_mem = 0;
lua_close(server.lua);
}
void scriptingReset(void) {
scriptingRelease();
- scriptingInit();
-}
-
-/* Perform the SHA1 of the input string. We use this both for hashing script
- * bodies in order to obtain the Lua function name, and in the implementation
- * of redis.sha1().
- *
- * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
- * hexadecimal number, plus 1 byte for null term. */
-void sha1hex(char *digest, char *script, size_t len) {
- SHA1_CTX ctx;
- unsigned char hash[20];
- char *cset = "0123456789abcdef";
- int j;
-
- SHA1Init(&ctx);
- SHA1Update(&ctx,(unsigned char*)script,len);
- SHA1Final(hash,&ctx);
-
- for (j = 0; j < 20; j++) {
- digest[j*2] = cset[((hash[j]&0xF0)>>4)];
- digest[j*2+1] = cset[(hash[j]&0xF)];
- }
- digest[40] = '\0';
-}
-
-void luaReplyToRedisReply(redisClient *c, lua_State *lua) {
- int t = lua_type(lua,-1);
-
- switch(t) {
- case LUA_TSTRING:
- addReplyBulkCBuffer(c,(char*)lua_tostring(lua,-1),lua_strlen(lua,-1));
- break;
- case LUA_TBOOLEAN:
- addReply(c,lua_toboolean(lua,-1) ? shared.cone : shared.nullbulk);
- break;
- case LUA_TNUMBER:
- addReplyLongLong(c,(long long)lua_tonumber(lua,-1));
- break;
- case LUA_TTABLE:
- /* We need to check if it is an array, an error, or a status reply.
- * Error are returned as a single element table with 'err' field.
- * Status replies are returned as single element table with 'ok' field */
- lua_pushstring(lua,"err");
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TSTRING) {
- sds err = sdsnew(lua_tostring(lua,-1));
- sdsmapchars(err,"\r\n"," ",2);
- addReplySds(c,sdscatprintf(sdsempty(),"-%s\r\n",err));
- sdsfree(err);
- lua_pop(lua,2);
- return;
- }
-
- lua_pop(lua,1);
- lua_pushstring(lua,"ok");
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TSTRING) {
- sds ok = sdsnew(lua_tostring(lua,-1));
- sdsmapchars(ok,"\r\n"," ",2);
- addReplySds(c,sdscatprintf(sdsempty(),"+%s\r\n",ok));
- sdsfree(ok);
- lua_pop(lua,1);
- } else {
- void *replylen = addDeferredMultiBulkLength(c);
- int j = 1, mbulklen = 0;
-
- lua_pop(lua,1); /* Discard the 'ok' field value we popped */
- while(1) {
- lua_pushnumber(lua,j++);
- lua_gettable(lua,-2);
- t = lua_type(lua,-1);
- if (t == LUA_TNIL) {
- lua_pop(lua,1);
- break;
- }
- luaReplyToRedisReply(c, lua);
- mbulklen++;
- }
- setDeferredMultiBulkLength(c,replylen,mbulklen);
- }
- break;
- default:
- addReply(c,shared.nullbulk);
- }
- lua_pop(lua,1);
+ scriptingInit(0);
}
/* Set an array of Redis String Objects as a Lua array (table) stored into a
@@ -856,59 +1102,157 @@ void luaSetGlobalArray(lua_State *lua, char *var, robj **elev, int elec) {
lua_setglobal(lua,var);
}
-/* Define a lua function with the specified function name and body.
- * The function name musts be a 2 characters long string, since all the
- * functions we defined in the Lua context are in the form:
+/* ---------------------------------------------------------------------------
+ * Redis provided math.random
+ * ------------------------------------------------------------------------- */
+
+/* We replace math.random() with our implementation that is not affected
+ * by specific libc random() implementations and will output the same sequence
+ * (for the same seed) in every arch. */
+
+/* The following implementation is the one shipped with Lua itself but with
+ * rand() replaced by redisLrand48(). */
+int redis_math_random (lua_State *L) {
+ /* the `%' avoids the (rare) case of r==1, and is needed also because on
+ some systems (SunOS!) `rand()' may return a value larger than RAND_MAX */
+ lua_Number r = (lua_Number)(redisLrand48()%REDIS_LRAND48_MAX) /
+ (lua_Number)REDIS_LRAND48_MAX;
+ switch (lua_gettop(L)) { /* check number of arguments */
+ case 0: { /* no arguments */
+ lua_pushnumber(L, r); /* Number between 0 and 1 */
+ break;
+ }
+ case 1: { /* only upper limit */
+ int u = luaL_checkint(L, 1);
+ luaL_argcheck(L, 1<=u, 1, "interval is empty");
+ lua_pushnumber(L, floor(r*u)+1); /* int between 1 and `u' */
+ break;
+ }
+ case 2: { /* lower and upper limits */
+ int l = luaL_checkint(L, 1);
+ int u = luaL_checkint(L, 2);
+ luaL_argcheck(L, l<=u, 2, "interval is empty");
+ lua_pushnumber(L, floor(r*(u-l+1))+l); /* int between `l' and `u' */
+ break;
+ }
+ default: return luaL_error(L, "wrong number of arguments");
+ }
+ return 1;
+}
+
+int redis_math_randomseed (lua_State *L) {
+ redisSrand48(luaL_checkint(L, 1));
+ return 0;
+}
+
+/* ---------------------------------------------------------------------------
+ * EVAL and SCRIPT commands implementation
+ * ------------------------------------------------------------------------- */
+
+/* Define a Lua function with the specified body.
+ * The function name will be generated in the following form:
*
* f_<hex sha1 sum>
*
- * On success REDIS_OK is returned, and nothing is left on the Lua stack.
- * On error REDIS_ERR is returned and an appropriate error is set in the
- * client context. */
-int luaCreateFunction(redisClient *c, lua_State *lua, char *funcname, robj *body) {
- sds funcdef = sdsempty();
+ * The function increments the reference count of the 'body' object as a
+ * side effect of a successful call.
+ *
+ * On success a pointer to an SDS string representing the function SHA1 of the
+ * just added function is returned (and will be valid until the next call
+ * to scriptingReset() function), otherwise NULL is returned.
+ *
+ * The function handles the fact of being called with a script that already
+ * exists, and in such a case, it behaves like in the success case.
+ *
+ * If 'c' is not NULL, on error the client is informed with an appropriate
+ * error describing the nature of the problem and the Lua interpreter error. */
+sds luaCreateFunction(client *c, lua_State *lua, robj *body) {
+ char funcname[43];
+ dictEntry *de;
+
+ funcname[0] = 'f';
+ funcname[1] = '_';
+ sha1hex(funcname+2,body->ptr,sdslen(body->ptr));
+
+ sds sha = sdsnewlen(funcname+2,40);
+ if ((de = dictFind(server.lua_scripts,sha)) != NULL) {
+ sdsfree(sha);
+ return dictGetKey(de);
+ }
+ sds funcdef = sdsempty();
funcdef = sdscat(funcdef,"function ");
funcdef = sdscatlen(funcdef,funcname,42);
funcdef = sdscatlen(funcdef,"() ",3);
funcdef = sdscatlen(funcdef,body->ptr,sdslen(body->ptr));
- funcdef = sdscatlen(funcdef," end",4);
+ funcdef = sdscatlen(funcdef,"\nend",4);
if (luaL_loadbuffer(lua,funcdef,sdslen(funcdef),"@user_script")) {
- addReplyErrorFormat(c,"Error compiling script (new function): %s\n",
- lua_tostring(lua,-1));
+ if (c != NULL) {
+ addReplyErrorFormat(c,
+ "Error compiling script (new function): %s\n",
+ lua_tostring(lua,-1));
+ }
lua_pop(lua,1);
+ sdsfree(sha);
sdsfree(funcdef);
- return REDIS_ERR;
+ return NULL;
}
sdsfree(funcdef);
+
if (lua_pcall(lua,0,0,0)) {
- addReplyErrorFormat(c,"Error running script (new function): %s\n",
- lua_tostring(lua,-1));
+ if (c != NULL) {
+ addReplyErrorFormat(c,"Error running script (new function): %s\n",
+ lua_tostring(lua,-1));
+ }
lua_pop(lua,1);
- return REDIS_ERR;
+ sdsfree(sha);
+ return NULL;
}
/* We also save a SHA1 -> Original script map in a dictionary
* so that we can replicate / write in the AOF all the
* EVALSHA commands as EVAL using the original script. */
- {
- int retval = dictAdd(server.lua_scripts,
- sdsnewlen(funcname+2,40),body);
- redisAssertWithInfo(c,NULL,retval == DICT_OK);
- incrRefCount(body);
+ int retval = dictAdd(server.lua_scripts,sha,body);
+ serverAssertWithInfo(c ? c : server.lua_client,NULL,retval == DICT_OK);
+ server.lua_scripts_mem += sdsZmallocSize(sha) + sdsZmallocSize(body->ptr);
+ incrRefCount(body);
+ return sha;
+}
+
+/* This is the Lua script "count" hook that we use to detect scripts timeout. */
+void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
+ long long elapsed;
+ UNUSED(ar);
+ UNUSED(lua);
+
+ elapsed = mstime() - server.lua_time_start;
+ if (elapsed >= server.lua_time_limit && server.lua_timedout == 0) {
+ serverLog(LL_WARNING,"Lua slow script detected: still in execution after %lld milliseconds. You can try killing the script using the SCRIPT KILL command.",elapsed);
+ server.lua_timedout = 1;
+ /* Once the script timeouts we reenter the event loop to permit others
+ * to call SCRIPT KILL or SHUTDOWN NOSAVE if needed. For this reason
+ * we need to mask the client executing the script from the event loop.
+ * If we don't do that the client may disconnect and could no longer be
+ * here when the EVAL command will return. */
+ aeDeleteFileEvent(server.el, server.lua_caller->fd, AE_READABLE);
+ }
+ if (server.lua_timedout) processEventsWhileBlocked();
+ if (server.lua_kill) {
+ serverLog(LL_WARNING,"Lua script killed by user with SCRIPT KILL.");
+ lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");
+ lua_error(lua);
}
- return REDIS_OK;
}
-void evalGenericCommand(redisClient *c, int evalsha) {
+void evalGenericCommand(client *c, int evalsha) {
lua_State *lua = server.lua;
char funcname[43];
long long numkeys;
int delhook = 0, err;
- /* We want the same PRNG sequence at every call so that our PRNG is
- * not affected by external state. */
+ /* When we replicate whole scripts, we want the same PRNG sequence at
+ * every call so that our PRNG is not affected by external state. */
redisSrand48(0);
/* We set this flag to zero to remember that so far no random command
@@ -921,9 +1265,12 @@ void evalGenericCommand(redisClient *c, int evalsha) {
* is called after a random command was used. */
server.lua_random_dirty = 0;
server.lua_write_dirty = 0;
+ server.lua_replicate_commands = server.lua_always_replicate_commands;
+ server.lua_multi_emitted = 0;
+ server.lua_repl = PROPAGATE_AOF|PROPAGATE_REPL;
/* Get the number of arguments that are keys */
- if (getLongLongFromObjectOrReply(c,c->argv[2],&numkeys,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&numkeys,NULL) != C_OK)
return;
if (numkeys > (c->argc - 3)) {
addReplyError(c,"Number of keys can't be greater than number of args");
@@ -969,15 +1316,15 @@ void evalGenericCommand(redisClient *c, int evalsha) {
addReply(c, shared.noscripterr);
return;
}
- if (luaCreateFunction(c,lua,funcname,c->argv[1]) == REDIS_ERR) {
+ if (luaCreateFunction(c,lua,c->argv[1]) == NULL) {
lua_pop(lua,1); /* remove the error handler from the stack. */
/* The error is sent to the client by luaCreateFunction()
- * itself when it returns REDIS_ERR. */
+ * itself when it returns NULL. */
return;
}
/* Now the following is guaranteed to return non nil */
lua_getglobal(lua, funcname);
- redisAssert(!lua_isnil(lua,-1));
+ serverAssert(!lua_isnil(lua,-1));
}
/* Populate the argv and keys table accordingly to the arguments that
@@ -991,13 +1338,21 @@ void evalGenericCommand(redisClient *c, int evalsha) {
/* Set a hook in order to be able to stop the script execution if it
* is running for too much time.
* We set the hook only if the time limit is enabled as the hook will
- * make the Lua script execution slower. */
+ * make the Lua script execution slower.
+ *
+ * If we are debugging, we set instead a "line" hook so that the
+ * debugger is call-back at every line executed by the script. */
server.lua_caller = c;
server.lua_time_start = mstime();
server.lua_kill = 0;
- if (server.lua_time_limit > 0 && server.masterhost == NULL) {
+ if (server.lua_time_limit > 0 && server.masterhost == NULL &&
+ ldb.active == 0)
+ {
lua_sethook(lua,luaMaskCountHook,LUA_MASKCOUNT,100000);
delhook = 1;
+ } else if (ldb.active) {
+ lua_sethook(server.lua,luaLdbLineHook,LUA_MASKLINE|LUA_MASKCOUNT,100000);
+ delhook = 1;
}
/* At this point whether this script was never seen before or if it was
@@ -1006,7 +1361,7 @@ void evalGenericCommand(redisClient *c, int evalsha) {
err = lua_pcall(lua,0,1,-2);
/* Perform some cleanup that we need to do both on error and success. */
- if (delhook) lua_sethook(lua,luaMaskCountHook,0,0); /* Disable hook */
+ if (delhook) lua_sethook(lua,NULL,0,0); /* Disable hook */
if (server.lua_timedout) {
server.lua_timedout = 0;
/* Restore the readable handler that was unregistered when the
@@ -1044,6 +1399,19 @@ void evalGenericCommand(redisClient *c, int evalsha) {
lua_pop(lua,1); /* Remove the error handler. */
}
+ /* If we are using single commands replication, emit EXEC if there
+ * was at least a write. */
+ if (server.lua_replicate_commands) {
+ preventCommandPropagation(c);
+ if (server.lua_multi_emitted) {
+ robj *propargv[1];
+ propargv[0] = createStringObject("EXEC",4);
+ alsoPropagate(server.execCommand,c->db->id,propargv,1,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(propargv[0]);
+ }
+ }
+
/* EVALSHA should be propagated to Slave and AOF file as full EVAL, unless
* we are sure that the script was already in the context of all the
* attached slaves *and* the current AOF file if enabled.
@@ -1054,7 +1422,7 @@ void evalGenericCommand(redisClient *c, int evalsha) {
* For repliation, everytime a new slave attaches to the master, we need to
* flush our cache of scripts that can be replicated as EVALSHA, while
* for AOF we need to do so every time we rewrite the AOF file. */
- if (evalsha) {
+ if (evalsha && !server.lua_replicate_commands) {
if (!replicationScriptCacheExists(c->argv[1]->ptr)) {
/* This script is not in our script cache, replicate it as
* EVAL, then add it into the script cache, as from now on
@@ -1062,20 +1430,23 @@ void evalGenericCommand(redisClient *c, int evalsha) {
robj *script = dictFetchValue(server.lua_scripts,c->argv[1]->ptr);
replicationScriptCacheAdd(c->argv[1]->ptr);
- redisAssertWithInfo(c,NULL,script != NULL);
+ serverAssertWithInfo(c,NULL,script != NULL);
rewriteClientCommandArgument(c,0,
resetRefCount(createStringObject("EVAL",4)));
rewriteClientCommandArgument(c,1,script);
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL|REDIS_PROPAGATE_AOF);
+ forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
}
}
}
-void evalCommand(redisClient *c) {
- evalGenericCommand(c,0);
+void evalCommand(client *c) {
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,0);
+ else
+ evalGenericCommandWithDebugging(c,0);
}
-void evalShaCommand(redisClient *c) {
+void evalShaCommand(client *c) {
if (sdslen(c->argv[1]->ptr) != 40) {
/* We know that a match is not possible if the provided SHA is
* not the right length. So we return an error ASAP, this way
@@ -1084,54 +1455,26 @@ void evalShaCommand(redisClient *c) {
addReply(c, shared.noscripterr);
return;
}
- evalGenericCommand(c,1);
-}
-
-/* We replace math.random() with our implementation that is not affected
- * by specific libc random() implementations and will output the same sequence
- * (for the same seed) in every arch. */
-
-/* The following implementation is the one shipped with Lua itself but with
- * rand() replaced by redisLrand48(). */
-int redis_math_random (lua_State *L) {
- /* the `%' avoids the (rare) case of r==1, and is needed also because on
- some systems (SunOS!) `rand()' may return a value larger than RAND_MAX */
- lua_Number r = (lua_Number)(redisLrand48()%REDIS_LRAND48_MAX) /
- (lua_Number)REDIS_LRAND48_MAX;
- switch (lua_gettop(L)) { /* check number of arguments */
- case 0: { /* no arguments */
- lua_pushnumber(L, r); /* Number between 0 and 1 */
- break;
- }
- case 1: { /* only upper limit */
- int u = luaL_checkint(L, 1);
- luaL_argcheck(L, 1<=u, 1, "interval is empty");
- lua_pushnumber(L, floor(r*u)+1); /* int between 1 and `u' */
- break;
- }
- case 2: { /* lower and upper limits */
- int l = luaL_checkint(L, 1);
- int u = luaL_checkint(L, 2);
- luaL_argcheck(L, l<=u, 2, "interval is empty");
- lua_pushnumber(L, floor(r*(u-l+1))+l); /* int between `l' and `u' */
- break;
+ if (!(c->flags & CLIENT_LUA_DEBUG))
+ evalGenericCommand(c,1);
+ else {
+ addReplyError(c,"Please use EVAL instead of EVALSHA for debugging");
+ return;
}
- default: return luaL_error(L, "wrong number of arguments");
- }
- return 1;
}
-int redis_math_randomseed (lua_State *L) {
- redisSrand48(luaL_checkint(L, 1));
- return 0;
-}
-
-/* ---------------------------------------------------------------------------
- * SCRIPT command for script environment introspection and control
- * ------------------------------------------------------------------------- */
-
-void scriptCommand(redisClient *c) {
- if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
+void scriptCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"DEBUG (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
+"EXISTS <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
+"FLUSH -- Flush the Lua scripts cache. Very dangerous on slaves.",
+"KILL -- Kill the currently executing Lua script.",
+"LOAD <script> -- Load a script into the scripts cache, without executing it.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
scriptingReset();
addReply(c,shared.ok);
replicationScriptCacheFlush();
@@ -1147,23 +1490,10 @@ void scriptCommand(redisClient *c) {
addReply(c,shared.czero);
}
} else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"load")) {
- char funcname[43];
- sds sha;
-
- funcname[0] = 'f';
- funcname[1] = '_';
- sha1hex(funcname+2,c->argv[2]->ptr,sdslen(c->argv[2]->ptr));
- sha = sdsnewlen(funcname+2,40);
- if (dictFind(server.lua_scripts,sha) == NULL) {
- if (luaCreateFunction(c,server.lua,funcname,c->argv[2])
- == REDIS_ERR) {
- sdsfree(sha);
- return;
- }
- }
- addReplyBulkCBuffer(c,funcname+2,40);
- sdsfree(sha);
- forceCommandPropagation(c,REDIS_PROPAGATE_REPL|REDIS_PROPAGATE_AOF);
+ sds sha = luaCreateFunction(c,server.lua,c->argv[2]);
+ if (sha == NULL) return; /* The error was sent by luaCreateFunction(). */
+ addReplyBulkCBuffer(c,sha,40);
+ forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"kill")) {
if (server.lua_caller == NULL) {
addReplySds(c,sdsnew("-NOTBUSY No scripts in execution right now.\r\n"));
@@ -1173,7 +1503,927 @@ void scriptCommand(redisClient *c) {
server.lua_kill = 1;
addReply(c,shared.ok);
}
+ } else if (c->argc == 3 && !strcasecmp(c->argv[1]->ptr,"debug")) {
+ if (clientHasPendingReplies(c)) {
+ addReplyError(c,"SCRIPT DEBUG must be called outside a pipeline");
+ return;
+ }
+ if (!strcasecmp(c->argv[2]->ptr,"no")) {
+ ldbDisable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"yes")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[2]->ptr,"sync")) {
+ ldbEnable(c);
+ addReply(c,shared.ok);
+ c->flags |= CLIENT_LUA_DEBUG_SYNC;
+ } else {
+ addReplyError(c,"Use SCRIPT DEBUG yes/sync/no");
+ return;
+ }
} else {
- addReplyError(c, "Unknown SCRIPT subcommand or wrong # of args.");
+ addReplySubcommandSyntaxError(c);
}
}
+
+/* ---------------------------------------------------------------------------
+ * LDB: Redis Lua debugging facilities
+ * ------------------------------------------------------------------------- */
+
+/* Initialize Lua debugger data structures. */
+void ldbInit(void) {
+ ldb.fd = -1;
+ ldb.active = 0;
+ ldb.logs = listCreate();
+ listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree);
+ ldb.children = listCreate();
+ ldb.src = NULL;
+ ldb.lines = 0;
+ ldb.cbuf = sdsempty();
+}
+
+/* Remove all the pending messages in the specified list. */
+void ldbFlushLog(list *log) {
+ listNode *ln;
+
+ while((ln = listFirst(log)) != NULL)
+ listDelNode(log,ln);
+}
+
+/* Enable debug mode of Lua scripts for this client. */
+void ldbEnable(client *c) {
+ c->flags |= CLIENT_LUA_DEBUG;
+ ldbFlushLog(ldb.logs);
+ ldb.fd = c->fd;
+ ldb.step = 1;
+ ldb.bpcount = 0;
+ ldb.luabp = 0;
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+ ldb.maxlen = LDB_MAX_LEN_DEFAULT;
+ ldb.maxlen_hint_sent = 0;
+}
+
+/* Exit debugging mode from the POV of client. This function is not enough
+ * to properly shut down a client debugging session, see ldbEndSession()
+ * for more information. */
+void ldbDisable(client *c) {
+ c->flags &= ~(CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC);
+}
+
+/* Append a log entry to the specified LDB log. */
+void ldbLog(sds entry) {
+ listAddNodeTail(ldb.logs,entry);
+}
+
+/* A version of ldbLog() which prevents producing logs greater than
+ * ldb.maxlen. The first time the limit is reached an hint is generated
+ * to inform the user that reply trimming can be disabled using the
+ * debugger "maxlen" command. */
+void ldbLogWithMaxLen(sds entry) {
+ int trimmed = 0;
+ if (ldb.maxlen && sdslen(entry) > ldb.maxlen) {
+ sdsrange(entry,0,ldb.maxlen-1);
+ entry = sdscatlen(entry," ...",4);
+ trimmed = 1;
+ }
+ ldbLog(entry);
+ if (trimmed && ldb.maxlen_hint_sent == 0) {
+ ldb.maxlen_hint_sent = 1;
+ ldbLog(sdsnew(
+ "<hint> The above reply was trimmed. Use 'maxlen 0' to disable trimming."));
+ }
+}
+
+/* Send ldb.logs to the debugging client as a multi-bulk reply
+ * consisting of simple strings. Log entries which include newlines have them
+ * replaced with spaces. The entries sent are also consumed. */
+void ldbSendLogs(void) {
+ sds proto = sdsempty();
+ proto = sdscatfmt(proto,"*%i\r\n", (int)listLength(ldb.logs));
+ while(listLength(ldb.logs)) {
+ listNode *ln = listFirst(ldb.logs);
+ proto = sdscatlen(proto,"+",1);
+ sdsmapchars(ln->value,"\r\n"," ",2);
+ proto = sdscatsds(proto,ln->value);
+ proto = sdscatlen(proto,"\r\n",2);
+ listDelNode(ldb.logs,ln);
+ }
+ if (write(ldb.fd,proto,sdslen(proto)) == -1) {
+ /* Avoid warning. We don't check the return value of write()
+ * since the next read() will catch the I/O error and will
+ * close the debugging session. */
+ }
+ sdsfree(proto);
+}
+
+/* Start a debugging session before calling EVAL implementation.
+ * The techique we use is to capture the client socket file descriptor,
+ * in order to perform direct I/O with it from within Lua hooks. This
+ * way we don't have to re-enter Redis in order to handle I/O.
+ *
+ * The function returns 1 if the caller should proceed to call EVAL,
+ * and 0 if instead the caller should abort the operation (this happens
+ * for the parent in a forked session, since it's up to the children
+ * to continue, or when fork returned an error).
+ *
+ * The caller should call ldbEndSession() only if ldbStartSession()
+ * returned 1. */
+int ldbStartSession(client *c) {
+ ldb.forked = (c->flags & CLIENT_LUA_DEBUG_SYNC) == 0;
+ if (ldb.forked) {
+ pid_t cp = fork();
+ if (cp == -1) {
+ addReplyError(c,"Fork() failed: can't run EVAL in debugging mode.");
+ return 0;
+ } else if (cp == 0) {
+ /* Child. Let's ignore important signals handled by the parent. */
+ struct sigaction act;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+
+ /* Log the creation of the child and close the listening
+ * socket to make sure if the parent crashes a reset is sent
+ * to the clients. */
+ serverLog(LL_WARNING,"Redis forked for debugging eval");
+ closeListeningSockets(0);
+ } else {
+ /* Parent */
+ listAddNodeTail(ldb.children,(void*)(unsigned long)cp);
+ freeClientAsync(c); /* Close the client in the parent side. */
+ return 0;
+ }
+ } else {
+ serverLog(LL_WARNING,
+ "Redis synchronous debugging eval session started");
+ }
+
+ /* Setup our debugging session. */
+ anetBlock(NULL,ldb.fd);
+ anetSendTimeout(NULL,ldb.fd,5000);
+ ldb.active = 1;
+
+ /* First argument of EVAL is the script itself. We split it into different
+ * lines since this is the way the debugger accesses the source code. */
+ sds srcstring = sdsdup(c->argv[1]->ptr);
+ size_t srclen = sdslen(srcstring);
+ while(srclen && (srcstring[srclen-1] == '\n' ||
+ srcstring[srclen-1] == '\r'))
+ {
+ srcstring[--srclen] = '\0';
+ }
+ sdssetlen(srcstring,srclen);
+ ldb.src = sdssplitlen(srcstring,sdslen(srcstring),"\n",1,&ldb.lines);
+ sdsfree(srcstring);
+ return 1;
+}
+
+/* End a debugging session after the EVAL call with debugging enabled
+ * returned. */
+void ldbEndSession(client *c) {
+ /* Emit the remaining logs and an <endsession> mark. */
+ ldbLog(sdsnew("<endsession>"));
+ ldbSendLogs();
+
+ /* If it's a fork()ed session, we just exit. */
+ if (ldb.forked) {
+ writeToClient(c->fd, c, 0);
+ serverLog(LL_WARNING,"Lua debugging session child exiting");
+ exitFromChild(0);
+ } else {
+ serverLog(LL_WARNING,
+ "Redis synchronous debugging eval session ended");
+ }
+
+ /* Otherwise let's restore client's state. */
+ anetNonBlock(NULL,ldb.fd);
+ anetSendTimeout(NULL,ldb.fd,0);
+
+ /* Close the client connectin after sending the final EVAL reply
+ * in order to signal the end of the debugging session. */
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+
+ /* Cleanup. */
+ sdsfreesplitres(ldb.src,ldb.lines);
+ ldb.lines = 0;
+ ldb.active = 0;
+}
+
+/* If the specified pid is among the list of children spawned for
+ * forked debugging sessions, it is removed from the children list.
+ * If the pid was found non-zero is returned. */
+int ldbRemoveChild(pid_t pid) {
+ listNode *ln = listSearchKey(ldb.children,(void*)(unsigned long)pid);
+ if (ln) {
+ listDelNode(ldb.children,ln);
+ return 1;
+ }
+ return 0;
+}
+
+/* Return the number of children we still did not received termination
+ * acknowledge via wait() in the parent process. */
+int ldbPendingChildren(void) {
+ return listLength(ldb.children);
+}
+
+/* Kill all the forked sessions. */
+void ldbKillForkedSessions(void) {
+ listIter li;
+ listNode *ln;
+
+ listRewind(ldb.children,&li);
+ while((ln = listNext(&li))) {
+ pid_t pid = (unsigned long) ln->value;
+ serverLog(LL_WARNING,"Killing debugging session %ld",(long)pid);
+ kill(pid,SIGKILL);
+ }
+ listRelease(ldb.children);
+ ldb.children = listCreate();
+}
+
+/* Wrapper for EVAL / EVALSHA that enables debugging, and makes sure
+ * that when EVAL returns, whatever happened, the session is ended. */
+void evalGenericCommandWithDebugging(client *c, int evalsha) {
+ if (ldbStartSession(c)) {
+ evalGenericCommand(c,evalsha);
+ ldbEndSession(c);
+ } else {
+ ldbDisable(c);
+ }
+}
+
+/* Return a pointer to ldb.src source code line, considering line to be
+ * one-based, and returning a special string for out of range lines. */
+char *ldbGetSourceLine(int line) {
+ int idx = line-1;
+ if (idx < 0 || idx >= ldb.lines) return "<out of range source code line>";
+ return ldb.src[idx];
+}
+
+/* Return true if there is a breakpoint in the specified line. */
+int ldbIsBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++)
+ if (ldb.bp[j] == line) return 1;
+ return 0;
+}
+
+/* Add the specified breakpoint. Ignore it if we already reached the max.
+ * Returns 1 if the breakpoint was added (or was already set). 0 if there is
+ * no space for the breakpoint or if the line is invalid. */
+int ldbAddBreakpoint(int line) {
+ if (line <= 0 || line > ldb.lines) return 0;
+ if (!ldbIsBreakpoint(line) && ldb.bpcount != LDB_BREAKPOINTS_MAX) {
+ ldb.bp[ldb.bpcount++] = line;
+ return 1;
+ }
+ return 0;
+}
+
+/* Remove the specified breakpoint, returning 1 if the operation was
+ * performed or 0 if there was no such breakpoint. */
+int ldbDelBreakpoint(int line) {
+ int j;
+
+ for (j = 0; j < ldb.bpcount; j++) {
+ if (ldb.bp[j] == line) {
+ ldb.bpcount--;
+ memmove(ldb.bp+j,ldb.bp+j+1,ldb.bpcount-j);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Expect a valid multi-bulk command in the debugging client query buffer.
+ * On success the command is parsed and returned as an array of SDS strings,
+ * otherwise NULL is returned and there is to read more buffer. */
+sds *ldbReplParseCommand(int *argcp) {
+ sds *argv = NULL;
+ int argc = 0;
+ if (sdslen(ldb.cbuf) == 0) return NULL;
+
+ /* Working on a copy is simpler in this case. We can modify it freely
+ * for the sake of simpler parsing. */
+ sds copy = sdsdup(ldb.cbuf);
+ char *p = copy;
+
+ /* This Redis protocol parser is a joke... just the simplest thing that
+ * works in this context. It is also very forgiving regarding broken
+ * protocol. */
+
+ /* Seek and parse *<count>\r\n. */
+ p = strchr(p,'*'); if (!p) goto protoerr;
+ char *plen = p+1; /* Multi bulk len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto protoerr;
+ *p = '\0'; p += 2;
+ *argcp = atoi(plen);
+ if (*argcp <= 0 || *argcp > 1024) goto protoerr;
+
+ /* Parse each argument. */
+ argv = zmalloc(sizeof(sds)*(*argcp));
+ argc = 0;
+ while(argc < *argcp) {
+ if (*p != '$') goto protoerr;
+ plen = p+1; /* Bulk string len pointer. */
+ p = strstr(p,"\r\n"); if (!p) goto protoerr;
+ *p = '\0'; p += 2;
+ int slen = atoi(plen); /* Length of this arg. */
+ if (slen <= 0 || slen > 1024) goto protoerr;
+ argv[argc++] = sdsnewlen(p,slen);
+ p += slen; /* Skip the already parsed argument. */
+ if (p[0] != '\r' || p[1] != '\n') goto protoerr;
+ p += 2; /* Skip \r\n. */
+ }
+ sdsfree(copy);
+ return argv;
+
+protoerr:
+ sdsfreesplitres(argv,argc);
+ sdsfree(copy);
+ return NULL;
+}
+
+/* Log the specified line in the Lua debugger output. */
+void ldbLogSourceLine(int lnum) {
+ char *line = ldbGetSourceLine(lnum);
+ char *prefix;
+ int bp = ldbIsBreakpoint(lnum);
+ int current = ldb.currentline == lnum;
+
+ if (current && bp)
+ prefix = "->#";
+ else if (current)
+ prefix = "-> ";
+ else if (bp)
+ prefix = " #";
+ else
+ prefix = " ";
+ sds thisline = sdscatprintf(sdsempty(),"%s%-3d %s", prefix, lnum, line);
+ ldbLog(thisline);
+}
+
+/* Implement the "list" command of the Lua debugger. If around is 0
+ * the whole file is listed, otherwise only a small portion of the file
+ * around the specified line is shown. When a line number is specified
+ * the amonut of context (lines before/after) is specified via the
+ * 'context' argument. */
+void ldbList(int around, int context) {
+ int j;
+
+ for (j = 1; j <= ldb.lines; j++) {
+ if (around != 0 && abs(around-j) > context) continue;
+ ldbLogSourceLine(j);
+ }
+}
+
+/* Append an human readable representation of the Lua value at position 'idx'
+ * on the stack of the 'lua' state, to the SDS string passed as argument.
+ * The new SDS string with the represented value attached is returned.
+ * Used in order to implement ldbLogStackValue().
+ *
+ * The element is not automatically removed from the stack, nor it is
+ * converted to a different type. */
+#define LDB_MAX_VALUES_DEPTH (LUA_MINSTACK/2)
+sds ldbCatStackValueRec(sds s, lua_State *lua, int idx, int level) {
+ int t = lua_type(lua,idx);
+
+ if (level++ == LDB_MAX_VALUES_DEPTH)
+ return sdscat(s,"<max recursion level reached! Nested table?>");
+
+ switch(t) {
+ case LUA_TSTRING:
+ {
+ size_t strl;
+ char *strp = (char*)lua_tolstring(lua,idx,&strl);
+ s = sdscatrepr(s,strp,strl);
+ }
+ break;
+ case LUA_TBOOLEAN:
+ s = sdscat(s,lua_toboolean(lua,idx) ? "true" : "false");
+ break;
+ case LUA_TNUMBER:
+ s = sdscatprintf(s,"%g",(double)lua_tonumber(lua,idx));
+ break;
+ case LUA_TNIL:
+ s = sdscatlen(s,"nil",3);
+ break;
+ case LUA_TTABLE:
+ {
+ int expected_index = 1; /* First index we expect in an array. */
+ int is_array = 1; /* Will be set to null if check fails. */
+ /* Note: we create two representations at the same time, one
+ * assuming the table is an array, one assuming it is not. At the
+ * end we know what is true and select the right one. */
+ sds repr1 = sdsempty();
+ sds repr2 = sdsempty();
+ lua_pushnil(lua); /* The first key to start the iteration is nil. */
+ while (lua_next(lua,idx-1)) {
+ /* Test if so far the table looks like an array. */
+ if (is_array &&
+ (lua_type(lua,-2) != LUA_TNUMBER ||
+ lua_tonumber(lua,-2) != expected_index)) is_array = 0;
+ /* Stack now: table, key, value */
+ /* Array repr. */
+ repr1 = ldbCatStackValueRec(repr1,lua,-1,level);
+ repr1 = sdscatlen(repr1,"; ",2);
+ /* Full repr. */
+ repr2 = sdscatlen(repr2,"[",1);
+ repr2 = ldbCatStackValueRec(repr2,lua,-2,level);
+ repr2 = sdscatlen(repr2,"]=",2);
+ repr2 = ldbCatStackValueRec(repr2,lua,-1,level);
+ repr2 = sdscatlen(repr2,"; ",2);
+ lua_pop(lua,1); /* Stack: table, key. Ready for next iteration. */
+ expected_index++;
+ }
+ /* Strip the last " ;" from both the representations. */
+ if (sdslen(repr1)) sdsrange(repr1,0,-3);
+ if (sdslen(repr2)) sdsrange(repr2,0,-3);
+ /* Select the right one and discard the other. */
+ s = sdscatlen(s,"{",1);
+ s = sdscatsds(s,is_array ? repr1 : repr2);
+ s = sdscatlen(s,"}",1);
+ sdsfree(repr1);
+ sdsfree(repr2);
+ }
+ break;
+ case LUA_TFUNCTION:
+ case LUA_TUSERDATA:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ {
+ const void *p = lua_topointer(lua,idx);
+ char *typename = "unknown";
+ if (t == LUA_TFUNCTION) typename = "function";
+ else if (t == LUA_TUSERDATA) typename = "userdata";
+ else if (t == LUA_TTHREAD) typename = "thread";
+ else if (t == LUA_TLIGHTUSERDATA) typename = "light-userdata";
+ s = sdscatprintf(s,"\"%s@%p\"",typename,p);
+ }
+ break;
+ default:
+ s = sdscat(s,"\"<unknown-lua-type>\"");
+ break;
+ }
+ return s;
+}
+
+/* Higher level wrapper for ldbCatStackValueRec() that just uses an initial
+ * recursion level of '0'. */
+sds ldbCatStackValue(sds s, lua_State *lua, int idx) {
+ return ldbCatStackValueRec(s,lua,idx,0);
+}
+
+/* Produce a debugger log entry representing the value of the Lua object
+ * currently on the top of the stack. The element is ot popped nor modified.
+ * Check ldbCatStackValue() for the actual implementation. */
+void ldbLogStackValue(lua_State *lua, char *prefix) {
+ sds s = sdsnew(prefix);
+ s = ldbCatStackValue(s,lua,-1);
+ ldbLogWithMaxLen(s);
+}
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply);
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply);
+
+/* Get Redis protocol from 'reply' and appends it in human readable form to
+ * the passed SDS string 'o'.
+ *
+ * Note that the SDS string is passed by reference (pointer of pointer to
+ * char*) so that we can return a modified pointer, as for SDS semantics. */
+char *ldbRedisProtocolToHuman(sds *o, char *reply) {
+ char *p = reply;
+ switch(*p) {
+ case ':': p = ldbRedisProtocolToHuman_Int(o,reply); break;
+ case '$': p = ldbRedisProtocolToHuman_Bulk(o,reply); break;
+ case '+': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '-': p = ldbRedisProtocolToHuman_Status(o,reply); break;
+ case '*': p = ldbRedisProtocolToHuman_MultiBulk(o,reply); break;
+ }
+ return p;
+}
+
+/* The following functions are helpers for ldbRedisProtocolToHuman(), each
+ * take care of a given Redis return type. */
+
+char *ldbRedisProtocolToHuman_Int(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ *o = sdscatlen(*o,reply+1,p-reply-1);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_Bulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long bulklen;
+
+ string2ll(reply+1,p-reply-1,&bulklen);
+ if (bulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p+2;
+ } else {
+ *o = sdscatrepr(*o,p+2,bulklen);
+ return p+2+bulklen+2;
+ }
+}
+
+char *ldbRedisProtocolToHuman_Status(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+
+ *o = sdscatrepr(*o,reply,p-reply);
+ return p+2;
+}
+
+char *ldbRedisProtocolToHuman_MultiBulk(sds *o, char *reply) {
+ char *p = strchr(reply+1,'\r');
+ long long mbulklen;
+ int j = 0;
+
+ string2ll(reply+1,p-reply-1,&mbulklen);
+ p += 2;
+ if (mbulklen == -1) {
+ *o = sdscatlen(*o,"NULL",4);
+ return p;
+ }
+ *o = sdscatlen(*o,"[",1);
+ for (j = 0; j < mbulklen; j++) {
+ p = ldbRedisProtocolToHuman(o,p);
+ if (j != mbulklen-1) *o = sdscatlen(*o,",",1);
+ }
+ *o = sdscatlen(*o,"]",1);
+ return p;
+}
+
+/* Log a Redis reply as debugger output, in an human readable format.
+ * If the resulting string is longer than 'len' plus a few more chars
+ * used as prefix, it gets truncated. */
+void ldbLogRedisReply(char *reply) {
+ sds log = sdsnew("<reply> ");
+ ldbRedisProtocolToHuman(&log,reply);
+ ldbLogWithMaxLen(log);
+}
+
+/* Implements the "print <var>" command of the Lua debugger. It scans for Lua
+ * var "varname" starting from the current stack frame up to the top stack
+ * frame. The first matching variable is printed. */
+void ldbPrint(lua_State *lua, char *varname) {
+ lua_Debug ar;
+
+ int l = 0; /* Stack level. */
+ while (lua_getstack(lua,l,&ar) != 0) {
+ l++;
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (strcmp(varname,name) == 0) {
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ return;
+ } else {
+ lua_pop(lua,1); /* Discard the var name on the stack. */
+ }
+ }
+ }
+
+ /* Let's try with global vars in two selected cases */
+ if (!strcmp(varname,"ARGV") || !strcmp(varname,"KEYS")) {
+ lua_getglobal(lua, varname);
+ ldbLogStackValue(lua,"<value> ");
+ lua_pop(lua,1);
+ } else {
+ ldbLog(sdsnew("No such variable."));
+ }
+}
+
+/* Implements the "print" command (without arguments) of the Lua debugger.
+ * Prints all the variables in the current stack frame. */
+void ldbPrintAll(lua_State *lua) {
+ lua_Debug ar;
+ int vars = 0;
+
+ if (lua_getstack(lua,0,&ar) != 0) {
+ const char *name;
+ int i = 1; /* Variable index. */
+ while((name = lua_getlocal(lua,&ar,i)) != NULL) {
+ i++;
+ if (!strstr(name,"(*temporary)")) {
+ sds prefix = sdscatprintf(sdsempty(),"<value> %s = ",name);
+ ldbLogStackValue(lua,prefix);
+ sdsfree(prefix);
+ vars++;
+ }
+ lua_pop(lua,1);
+ }
+ }
+
+ if (vars == 0) {
+ ldbLog(sdsnew("No local variables in the current context."));
+ }
+}
+
+/* Implements the break command to list, add and remove breakpoints. */
+void ldbBreak(sds *argv, int argc) {
+ if (argc == 1) {
+ if (ldb.bpcount == 0) {
+ ldbLog(sdsnew("No breakpoints set. Use 'b <line>' to add one."));
+ return;
+ } else {
+ ldbLog(sdscatfmt(sdsempty(),"%i breakpoints set:",ldb.bpcount));
+ int j;
+ for (j = 0; j < ldb.bpcount; j++)
+ ldbLogSourceLine(ldb.bp[j]);
+ }
+ } else {
+ int j;
+ for (j = 1; j < argc; j++) {
+ char *arg = argv[j];
+ long line;
+ if (!string2l(arg,sdslen(arg),&line)) {
+ ldbLog(sdscatfmt(sdsempty(),"Invalid argument:'%s'",arg));
+ } else {
+ if (line == 0) {
+ ldb.bpcount = 0;
+ ldbLog(sdsnew("All breakpoints removed."));
+ } else if (line > 0) {
+ if (ldb.bpcount == LDB_BREAKPOINTS_MAX) {
+ ldbLog(sdsnew("Too many breakpoints set."));
+ } else if (ldbAddBreakpoint(line)) {
+ ldbList(line,1);
+ } else {
+ ldbLog(sdsnew("Wrong line number."));
+ }
+ } else if (line < 0) {
+ if (ldbDelBreakpoint(-line))
+ ldbLog(sdsnew("Breakpoint removed."));
+ else
+ ldbLog(sdsnew("No breakpoint in the specified line."));
+ }
+ }
+ }
+ }
+}
+
+/* Implements the Lua debugger "eval" command. It just compiles the user
+ * passed fragment of code and executes it, showing the result left on
+ * the stack. */
+void ldbEval(lua_State *lua, sds *argv, int argc) {
+ /* Glue the script together if it is composed of multiple arguments. */
+ sds code = sdsjoinsds(argv+1,argc-1," ",1);
+ sds expr = sdscatsds(sdsnew("return "),code);
+
+ /* Try to compile it as an expression, prepending "return ". */
+ if (luaL_loadbuffer(lua,expr,sdslen(expr),"@ldb_eval")) {
+ lua_pop(lua,1);
+ /* Failed? Try as a statement. */
+ if (luaL_loadbuffer(lua,code,sdslen(code),"@ldb_eval")) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ sdsfree(code);
+ return;
+ }
+ }
+
+ /* Call it. */
+ sdsfree(code);
+ sdsfree(expr);
+ if (lua_pcall(lua,0,1,0)) {
+ ldbLog(sdscatfmt(sdsempty(),"<error> %s",lua_tostring(lua,-1)));
+ lua_pop(lua,1);
+ return;
+ }
+ ldbLogStackValue(lua,"<retval> ");
+ lua_pop(lua,1);
+}
+
+/* Implement the debugger "redis" command. We use a trick in order to make
+ * the implementation very simple: we just call the Lua redis.call() command
+ * implementation, with ldb.step enabled, so as a side effect the Redis command
+ * and its reply are logged. */
+void ldbRedis(lua_State *lua, sds *argv, int argc) {
+ int j, saved_rc = server.lua_replicate_commands;
+
+ lua_getglobal(lua,"redis");
+ lua_pushstring(lua,"call");
+ lua_gettable(lua,-2); /* Stack: redis, redis.call */
+ for (j = 1; j < argc; j++)
+ lua_pushlstring(lua,argv[j],sdslen(argv[j]));
+ ldb.step = 1; /* Force redis.call() to log. */
+ server.lua_replicate_commands = 1;
+ lua_pcall(lua,argc-1,1,0); /* Stack: redis, result */
+ ldb.step = 0; /* Disable logging. */
+ server.lua_replicate_commands = saved_rc;
+ lua_pop(lua,2); /* Discard the result and clean the stack. */
+}
+
+/* Implements "trace" command of the Lua debugger. It just prints a backtrace
+ * querying Lua starting from the current callframe back to the outer one. */
+void ldbTrace(lua_State *lua) {
+ lua_Debug ar;
+ int level = 0;
+
+ while(lua_getstack(lua,level,&ar)) {
+ lua_getinfo(lua,"Snl",&ar);
+ if(strstr(ar.short_src,"user_script") != NULL) {
+ ldbLog(sdscatprintf(sdsempty(),"%s %s:",
+ (level == 0) ? "In" : "From",
+ ar.name ? ar.name : "top level"));
+ ldbLogSourceLine(ar.currentline);
+ }
+ level++;
+ }
+ if (level == 0) {
+ ldbLog(sdsnew("<error> Can't retrieve Lua stack."));
+ }
+}
+
+/* Impleemnts the debugger "maxlen" command. It just queries or sets the
+ * ldb.maxlen variable. */
+void ldbMaxlen(sds *argv, int argc) {
+ if (argc == 2) {
+ int newval = atoi(argv[1]);
+ ldb.maxlen_hint_sent = 1; /* User knows about this command. */
+ if (newval != 0 && newval <= 60) newval = 60;
+ ldb.maxlen = newval;
+ }
+ if (ldb.maxlen) {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are truncated at %d bytes.",(int)ldb.maxlen));
+ } else {
+ ldbLog(sdscatprintf(sdsempty(),"<value> replies are unlimited."));
+ }
+}
+
+/* Read debugging commands from client.
+ * Return C_OK if the debugging session is continuing, otherwise
+ * C_ERR if the client closed the connection or is timing out. */
+int ldbRepl(lua_State *lua) {
+ sds *argv;
+ int argc;
+
+ /* We continue processing commands until a command that should return
+ * to the Lua interpreter is found. */
+ while(1) {
+ while((argv = ldbReplParseCommand(&argc)) == NULL) {
+ char buf[1024];
+ int nread = read(ldb.fd,buf,sizeof(buf));
+ if (nread <= 0) {
+ /* Make sure the script runs without user input since the
+ * client is no longer connected. */
+ ldb.step = 0;
+ ldb.bpcount = 0;
+ return C_ERR;
+ }
+ ldb.cbuf = sdscatlen(ldb.cbuf,buf,nread);
+ }
+
+ /* Flush the old buffer. */
+ sdsfree(ldb.cbuf);
+ ldb.cbuf = sdsempty();
+
+ /* Execute the command. */
+ if (!strcasecmp(argv[0],"h") || !strcasecmp(argv[0],"help")) {
+ldbLog(sdsnew("Redis Lua debugger help:"));
+ldbLog(sdsnew("[h]elp Show this help."));
+ldbLog(sdsnew("[s]tep Run current line and stop again."));
+ldbLog(sdsnew("[n]ext Alias for step."));
+ldbLog(sdsnew("[c]continue Run till next breakpoint."));
+ldbLog(sdsnew("[l]list List source code around current line."));
+ldbLog(sdsnew("[l]list [line] List source code around [line]."));
+ldbLog(sdsnew(" line = 0 means: current position."));
+ldbLog(sdsnew("[l]list [line] [ctx] In this form [ctx] specifies how many lines"));
+ldbLog(sdsnew(" to show before/after [line]."));
+ldbLog(sdsnew("[w]hole List all source code. Alias for 'list 1 1000000'."));
+ldbLog(sdsnew("[p]rint Show all the local variables."));
+ldbLog(sdsnew("[p]rint <var> Show the value of the specified variable."));
+ldbLog(sdsnew(" Can also show global vars KEYS and ARGV."));
+ldbLog(sdsnew("[b]reak Show all breakpoints."));
+ldbLog(sdsnew("[b]reak <line> Add a breakpoint to the specified line."));
+ldbLog(sdsnew("[b]reak -<line> Remove breakpoint from the specified line."));
+ldbLog(sdsnew("[b]reak 0 Remove all breakpoints."));
+ldbLog(sdsnew("[t]race Show a backtrace."));
+ldbLog(sdsnew("[e]eval <code> Execute some Lua code (in a different callframe)."));
+ldbLog(sdsnew("[r]edis <cmd> Execute a Redis command."));
+ldbLog(sdsnew("[m]axlen [len] Trim logged Redis replies and Lua var dumps to len."));
+ldbLog(sdsnew(" Specifying zero as <len> means unlimited."));
+ldbLog(sdsnew("[a]bort Stop the execution of the script. In sync"));
+ldbLog(sdsnew(" mode dataset changes will be retained."));
+ldbLog(sdsnew(""));
+ldbLog(sdsnew("Debugger functions you can call from Lua scripts:"));
+ldbLog(sdsnew("redis.debug() Produce logs in the debugger console."));
+ldbLog(sdsnew("redis.breakpoint() Stop execution like if there was a breakpoing."));
+ldbLog(sdsnew(" in the next line of code."));
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"s") || !strcasecmp(argv[0],"step") ||
+ !strcasecmp(argv[0],"n") || !strcasecmp(argv[0],"next")) {
+ ldb.step = 1;
+ break;
+ } else if (!strcasecmp(argv[0],"c") || !strcasecmp(argv[0],"continue")){
+ break;
+ } else if (!strcasecmp(argv[0],"t") || !strcasecmp(argv[0],"trace")) {
+ ldbTrace(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"m") || !strcasecmp(argv[0],"maxlen")) {
+ ldbMaxlen(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"b") || !strcasecmp(argv[0],"break")) {
+ ldbBreak(argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"e") || !strcasecmp(argv[0],"eval")) {
+ ldbEval(lua,argv,argc);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"a") || !strcasecmp(argv[0],"abort")) {
+ lua_pushstring(lua, "script aborted for user request");
+ lua_error(lua);
+ } else if (argc > 1 &&
+ (!strcasecmp(argv[0],"r") || !strcasecmp(argv[0],"redis"))) {
+ ldbRedis(lua,argv,argc);
+ ldbSendLogs();
+ } else if ((!strcasecmp(argv[0],"p") || !strcasecmp(argv[0],"print"))) {
+ if (argc == 2)
+ ldbPrint(lua,argv[1]);
+ else
+ ldbPrintAll(lua);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"l") || !strcasecmp(argv[0],"list")){
+ int around = ldb.currentline, ctx = 5;
+ if (argc > 1) {
+ int num = atoi(argv[1]);
+ if (num > 0) around = num;
+ }
+ if (argc > 2) ctx = atoi(argv[2]);
+ ldbList(around,ctx);
+ ldbSendLogs();
+ } else if (!strcasecmp(argv[0],"w") || !strcasecmp(argv[0],"whole")){
+ ldbList(1,1000000);
+ ldbSendLogs();
+ } else {
+ ldbLog(sdsnew("<error> Unknown Redis Lua debugger command or "
+ "wrong number of arguments."));
+ ldbSendLogs();
+ }
+
+ /* Free the command vector. */
+ sdsfreesplitres(argv,argc);
+ }
+
+ /* Free the current command argv if we break inside the while loop. */
+ sdsfreesplitres(argv,argc);
+ return C_OK;
+}
+
+/* This is the core of our Lua debugger, called each time Lua is about
+ * to start executing a new line. */
+void luaLdbLineHook(lua_State *lua, lua_Debug *ar) {
+ lua_getstack(lua,0,ar);
+ lua_getinfo(lua,"Sl",ar);
+ ldb.currentline = ar->currentline;
+
+ int bp = ldbIsBreakpoint(ldb.currentline) || ldb.luabp;
+ int timeout = 0;
+
+ /* Events outside our script are not interesting. */
+ if(strstr(ar->short_src,"user_script") == NULL) return;
+
+ /* Check if a timeout occurred. */
+ if (ar->event == LUA_HOOKCOUNT && ldb.step == 0 && bp == 0) {
+ mstime_t elapsed = mstime() - server.lua_time_start;
+ mstime_t timelimit = server.lua_time_limit ?
+ server.lua_time_limit : 5000;
+ if (elapsed >= timelimit) {
+ timeout = 1;
+ ldb.step = 1;
+ } else {
+ return; /* No timeout, ignore the COUNT event. */
+ }
+ }
+
+ if (ldb.step || bp) {
+ char *reason = "step over";
+ if (bp) reason = ldb.luabp ? "redis.breakpoint() called" :
+ "break point";
+ else if (timeout) reason = "timeout reached, infinite loop?";
+ ldb.step = 0;
+ ldb.luabp = 0;
+ ldbLog(sdscatprintf(sdsempty(),
+ "* Stopped at %d, stop reason = %s",
+ ldb.currentline, reason));
+ ldbLogSourceLine(ldb.currentline);
+ ldbSendLogs();
+ if (ldbRepl(lua) == C_ERR && timeout) {
+ /* If the client closed the connection and we have a timeout
+ * connection, let's kill the script otherwise the process
+ * will remain blocked indefinitely. */
+ lua_pushstring(lua, "timeout during Lua debugging with client closing connection");
+ lua_error(lua);
+ }
+ server.lua_time_start = mstime();
+ }
+}
+
diff --git a/src/sds.c b/src/sds.c
index 05ee0ad56..39ad595ed 100644
--- a/src/sds.c
+++ b/src/sds.c
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,12 +35,48 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#include <limits.h>
#include "sds.h"
-#include "zmalloc.h"
+#include "sdsalloc.h"
+
+const char *SDS_NOINIT = "SDS_NOINIT";
+
+static inline int sdsHdrSize(char type) {
+ switch(type&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return sizeof(struct sdshdr5);
+ case SDS_TYPE_8:
+ return sizeof(struct sdshdr8);
+ case SDS_TYPE_16:
+ return sizeof(struct sdshdr16);
+ case SDS_TYPE_32:
+ return sizeof(struct sdshdr32);
+ case SDS_TYPE_64:
+ return sizeof(struct sdshdr64);
+ }
+ return 0;
+}
+
+static inline char sdsReqType(size_t string_size) {
+ if (string_size < 1<<5)
+ return SDS_TYPE_5;
+ if (string_size < 1<<8)
+ return SDS_TYPE_8;
+ if (string_size < 1<<16)
+ return SDS_TYPE_16;
+#if (LONG_MAX == LLONG_MAX)
+ if (string_size < 1ll<<32)
+ return SDS_TYPE_32;
+ return SDS_TYPE_64;
+#else
+ return SDS_TYPE_32;
+#endif
+}
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
+ * If SDS_NOINIT is used, the buffer is left uninitialized;
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
@@ -49,20 +87,61 @@
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds sdsnewlen(const void *init, size_t initlen) {
- struct sdshdr *sh;
-
- if (init) {
- sh = zmalloc(sizeof(struct sdshdr)+initlen+1);
- } else {
- sh = zcalloc(sizeof(struct sdshdr)+initlen+1);
- }
+ void *sh;
+ sds s;
+ char type = sdsReqType(initlen);
+ /* Empty strings are usually created in order to append. Use type 8
+ * since type 5 is not good at this. */
+ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
+ int hdrlen = sdsHdrSize(type);
+ unsigned char *fp; /* flags pointer. */
+
+ sh = s_malloc(hdrlen+initlen+1);
+ if (init==SDS_NOINIT)
+ init = NULL;
+ else if (!init)
+ memset(sh, 0, hdrlen+initlen+1);
if (sh == NULL) return NULL;
- sh->len = initlen;
- sh->free = 0;
+ s = (char*)sh+hdrlen;
+ fp = ((unsigned char*)s)-1;
+ switch(type) {
+ case SDS_TYPE_5: {
+ *fp = type | (initlen << SDS_TYPE_BITS);
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ sh->len = initlen;
+ sh->alloc = initlen;
+ *fp = type;
+ break;
+ }
+ }
if (initlen && init)
- memcpy(sh->buf, init, initlen);
- sh->buf[initlen] = '\0';
- return (char*)sh->buf;
+ memcpy(s, init, initlen);
+ s[initlen] = '\0';
+ return s;
}
/* Create an empty (zero length) sds string. Even in this case the string
@@ -71,7 +150,7 @@ sds sdsempty(void) {
return sdsnewlen("",0);
}
-/* Create a new sds string starting from a null termined C string. */
+/* Create a new sds string starting from a null terminated C string. */
sds sdsnew(const char *init) {
size_t initlen = (init == NULL) ? 0 : strlen(init);
return sdsnewlen(init, initlen);
@@ -85,7 +164,7 @@ sds sdsdup(const sds s) {
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
- zfree(s-sizeof(struct sdshdr));
+ s_free((char*)s-sdsHdrSize(s[-1]));
}
/* Set the sds string length to the length as obtained with strlen(), so
@@ -103,10 +182,8 @@ void sdsfree(sds s) {
* the output will be "6" as the string was modified but the logical length
* remains 6 bytes. */
void sdsupdatelen(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- int reallen = strlen(s);
- sh->free += (sh->len-reallen);
- sh->len = reallen;
+ size_t reallen = strlen(s);
+ sdssetlen(s, reallen);
}
/* Modify an sds string in-place to make it empty (zero length).
@@ -114,10 +191,8 @@ void sdsupdatelen(sds s) {
* so that next append operations will not require allocations up to the
* number of bytes previously available. */
void sdsclear(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- sh->free += sh->len;
- sh->len = 0;
- sh->buf[0] = '\0';
+ sdssetlen(s, 0);
+ s[0] = '\0';
}
/* Enlarge the free space at the end of the sds string so that the caller
@@ -127,23 +202,48 @@ void sdsclear(sds s) {
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
- struct sdshdr *sh, *newsh;
- size_t free = sdsavail(s);
+ void *sh, *newsh;
+ size_t avail = sdsavail(s);
size_t len, newlen;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen;
+
+ /* Return ASAP if there is enough space left. */
+ if (avail >= addlen) return s;
- if (free >= addlen) return s;
len = sdslen(s);
- sh = (void*) (s-(sizeof(struct sdshdr)));
+ sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
- newsh = zrealloc(sh, sizeof(struct sdshdr)+newlen+1);
- if (newsh == NULL) return NULL;
- newsh->free = newlen - len;
- return newsh->buf;
+ type = sdsReqType(newlen);
+
+ /* Don't use type 5: the user is appending to the string and type 5 is
+ * not able to remember empty space, so sdsMakeRoomFor() must be called
+ * at every appending operation. */
+ if (type == SDS_TYPE_5) type = SDS_TYPE_8;
+
+ hdrlen = sdsHdrSize(type);
+ if (oldtype==type) {
+ newsh = s_realloc(sh, hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+hdrlen;
+ } else {
+ /* Since the header size changes, need to move the string forward,
+ * and can't use realloc */
+ newsh = s_malloc(hdrlen+newlen+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, newlen);
+ return s;
}
/* Reallocate the sds string so that it has no free space at the end. The
@@ -153,15 +253,39 @@ sds sdsMakeRoomFor(sds s, size_t addlen) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
- struct sdshdr *sh;
-
- sh = (void*) (s-(sizeof(struct sdshdr)));
- sh = zrealloc(sh, sizeof(struct sdshdr)+sh->len+1);
- sh->free = 0;
- return sh->buf;
+ void *sh, *newsh;
+ char type, oldtype = s[-1] & SDS_TYPE_MASK;
+ int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
+ size_t len = sdslen(s);
+ sh = (char*)s-oldhdrlen;
+
+ /* Check what would be the minimum SDS header that is just good enough to
+ * fit this string. */
+ type = sdsReqType(len);
+ hdrlen = sdsHdrSize(type);
+
+ /* If the type is the same, or at least a large enough type is still
+ * required, we just realloc(), letting the allocator to do the copy
+ * only if really needed. Otherwise if the change is huge, we manually
+ * reallocate the string to use the different header type. */
+ if (oldtype==type || type > SDS_TYPE_8) {
+ newsh = s_realloc(sh, oldhdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ s = (char*)newsh+oldhdrlen;
+ } else {
+ newsh = s_malloc(hdrlen+len+1);
+ if (newsh == NULL) return NULL;
+ memcpy((char*)newsh+hdrlen, s, len+1);
+ s_free(sh);
+ s = (char*)newsh+hdrlen;
+ s[-1] = type;
+ sdssetlen(s, len);
+ }
+ sdssetalloc(s, len);
+ return s;
}
-/* Return the total size of the allocation of the specifed sds string,
+/* Return the total size of the allocation of the specified sds string,
* including:
* 1) The sds header before the pointer.
* 2) The string.
@@ -169,9 +293,14 @@ sds sdsRemoveFreeSpace(sds s) {
* 4) The implicit null term.
*/
size_t sdsAllocSize(sds s) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+ size_t alloc = sdsalloc(s);
+ return sdsHdrSize(s[-1])+alloc+1;
+}
- return sizeof(*sh)+sh->len+sh->free+1;
+/* Return the pointer of the actual SDS allocation (normally SDS strings
+ * are referenced by the start of the string buffer). */
+void *sdsAllocPtr(sds s) {
+ return (void*) (s-sdsHdrSize(s[-1]));
}
/* Increment the sds length and decrements the left free space at the
@@ -197,16 +326,45 @@ size_t sdsAllocSize(sds s) {
* ... check for nread <= 0 and handle it ...
* sdsIncrLen(s, nread);
*/
-void sdsIncrLen(sds s, int incr) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
-
- if (incr >= 0)
- assert(sh->free >= (unsigned int)incr);
- else
- assert(sh->len >= (unsigned int)(-incr));
- sh->len += incr;
- sh->free -= incr;
- s[sh->len] = '\0';
+void sdsIncrLen(sds s, ssize_t incr) {
+ unsigned char flags = s[-1];
+ size_t len;
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char oldlen = SDS_TYPE_5_LEN(flags);
+ assert((incr > 0 && oldlen+incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr)));
+ *fp = SDS_TYPE_5 | ((oldlen+incr) << SDS_TYPE_BITS);
+ len = oldlen+incr;
+ break;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (unsigned int)incr) || (incr < 0 && sh->len >= (unsigned int)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ assert((incr >= 0 && sh->alloc-sh->len >= (uint64_t)incr) || (incr < 0 && sh->len >= (uint64_t)(-incr)));
+ len = (sh->len += incr);
+ break;
+ }
+ default: len = 0; /* Just to avoid compilation warnings. */
+ }
+ s[len] = '\0';
}
/* Grow the sds to have the specified length. Bytes that were not part of
@@ -215,19 +373,15 @@ void sdsIncrLen(sds s, int incr) {
* if the specified length is smaller than the current length, no operation
* is performed. */
sds sdsgrowzero(sds s, size_t len) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- size_t totlen, curlen = sh->len;
+ size_t curlen = sdslen(s);
if (len <= curlen) return s;
s = sdsMakeRoomFor(s,len-curlen);
if (s == NULL) return NULL;
/* Make sure added region doesn't contain garbage */
- sh = (void*)(s-(sizeof(struct sdshdr)));
memset(s+curlen,0,(len-curlen+1)); /* also set trailing \0 byte */
- totlen = sh->len+sh->free;
- sh->len = len;
- sh->free = totlen-sh->len;
+ sdssetlen(s, len);
return s;
}
@@ -237,15 +391,12 @@ sds sdsgrowzero(sds s, size_t len) {
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
- struct sdshdr *sh;
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
- sh->len = curlen+len;
- sh->free = sh->free-len;
+ sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
@@ -269,19 +420,13 @@ sds sdscatsds(sds s, const sds t) {
/* Destructively modify the sds string 's' to hold the specified binary
* safe string pointed by 't' of length 'len' bytes. */
sds sdscpylen(sds s, const char *t, size_t len) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
- size_t totlen = sh->free+sh->len;
-
- if (totlen < len) {
- s = sdsMakeRoomFor(s,len-sh->len);
+ if (sdsalloc(s) < len) {
+ s = sdsMakeRoomFor(s,len-sdslen(s));
if (s == NULL) return NULL;
- sh = (void*) (s-(sizeof(struct sdshdr)));
- totlen = sh->free+sh->len;
}
memcpy(s, t, len);
s[len] = '\0';
- sh->len = len;
- sh->free = totlen-len;
+ sdssetlen(s, len);
return s;
}
@@ -378,7 +523,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
if (buflen > sizeof(staticbuf)) {
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
@@ -392,9 +537,9 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
vsnprintf(buf, buflen, fmt, cpy);
va_end(cpy);
if (buf[buflen-2] != '\0') {
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
buflen *= 2;
- buf = zmalloc(buflen);
+ buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
@@ -403,7 +548,7 @@ sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);
- if (buf != staticbuf) zfree(buf);
+ if (buf != staticbuf) s_free(buf);
return t;
}
@@ -449,10 +594,9 @@ sds sdscatprintf(sds s, const char *fmt, ...) {
* %% - Verbatim "%" character.
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
size_t initlen = sdslen(s);
const char *f = fmt;
- int i;
+ long i;
va_list ap;
va_start(ap,fmt);
@@ -460,14 +604,13 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
- unsigned int l;
+ size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
- if (sh->free == 0) {
+ if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
switch(*f) {
@@ -479,13 +622,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
case 'S':
str = va_arg(ap,char*);
l = (next == 's') ? strlen(str) : sdslen(str);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,str,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
break;
case 'i':
@@ -497,13 +638,11 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsll2str(buf,num);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
@@ -516,27 +655,23 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
{
char buf[SDS_LLSTR_SIZE];
l = sdsull2str(buf,unum);
- if (sh->free < l) {
+ if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
- sh = (void*) (s-(sizeof(struct sdshdr)));
}
memcpy(s+i,buf,l);
- sh->len += l;
- sh->free -= l;
+ sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
s[i++] = next;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
- sh->len += 1;
- sh->free -= 1;
+ sdsinclen(s,1);
break;
}
f++;
@@ -557,13 +692,12 @@ sds sdscatfmt(sds s, char const *fmt, ...) {
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
- * s = sdstrim(s,"A. :");
+ * s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/
sds sdstrim(sds s, const char *cset) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
char *start, *end, *sp, *ep;
size_t len;
@@ -572,10 +706,9 @@ sds sdstrim(sds s, const char *cset) {
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
- if (sh->buf != sp) memmove(sh->buf, sp, len);
- sh->buf[len] = '\0';
- sh->free = sh->free+(sh->len-len);
- sh->len = len;
+ if (s != sp) memmove(s, sp, len);
+ s[len] = '\0';
+ sdssetlen(s,len);
return s;
}
@@ -595,8 +728,7 @@ sds sdstrim(sds s, const char *cset) {
* s = sdsnew("Hello World");
* sdsrange(s,1,-1); => "ello World"
*/
-void sdsrange(sds s, int start, int end) {
- struct sdshdr *sh = (void*) (s-(sizeof(struct sdshdr)));
+void sdsrange(sds s, ssize_t start, ssize_t end) {
size_t newlen, len = sdslen(s);
if (len == 0) return;
@@ -610,31 +742,30 @@ void sdsrange(sds s, int start, int end) {
}
newlen = (start > end) ? 0 : (end-start)+1;
if (newlen != 0) {
- if (start >= (signed)len) {
+ if (start >= (ssize_t)len) {
newlen = 0;
- } else if (end >= (signed)len) {
+ } else if (end >= (ssize_t)len) {
end = len-1;
newlen = (start > end) ? 0 : (end-start)+1;
}
} else {
start = 0;
}
- if (start && newlen) memmove(sh->buf, sh->buf+start, newlen);
- sh->buf[newlen] = 0;
- sh->free = sh->free+(sh->len-newlen);
- sh->len = newlen;
+ if (start && newlen) memmove(s, s+start, newlen);
+ s[newlen] = 0;
+ sdssetlen(s,newlen);
}
/* Apply tolower() to every character of the sds string 's'. */
void sdstolower(sds s) {
- int len = sdslen(s), j;
+ size_t len = sdslen(s), j;
for (j = 0; j < len; j++) s[j] = tolower(s[j]);
}
/* Apply toupper() to every character of the sds string 's'. */
void sdstoupper(sds s) {
- int len = sdslen(s), j;
+ size_t len = sdslen(s), j;
for (j = 0; j < len; j++) s[j] = toupper(s[j]);
}
@@ -658,7 +789,7 @@ int sdscmp(const sds s1, const sds s2) {
l2 = sdslen(s2);
minlen = (l1 < l2) ? l1 : l2;
cmp = memcmp(s1,s2,minlen);
- if (cmp == 0) return l1-l2;
+ if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0);
return cmp;
}
@@ -678,13 +809,14 @@ int sdscmp(const sds s1, const sds s2) {
* requires length arguments. sdssplit() is just the
* same function but for zero-terminated strings.
*/
-sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count) {
- int elements = 0, slots = 5, start = 0, j;
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {
+ int elements = 0, slots = 5;
+ long start = 0, j;
sds *tokens;
if (seplen < 1 || len < 0) return NULL;
- tokens = zmalloc(sizeof(sds)*slots);
+ tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;
if (len == 0) {
@@ -697,7 +829,7 @@ sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count
sds *newtokens;
slots *= 2;
- newtokens = zrealloc(tokens,sizeof(sds)*slots);
+ newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
@@ -721,7 +853,7 @@ cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
- zfree(tokens);
+ s_free(tokens);
*count = 0;
return NULL;
}
@@ -732,7 +864,7 @@ void sdsfreesplitres(sds *tokens, int count) {
if (!tokens) return;
while(count--)
sdsfree(tokens[count]);
- zfree(tokens);
+ s_free(tokens);
}
/* Append to the sds string "s" an escaped string representation where
@@ -906,13 +1038,13 @@ sds *sdssplitargs(const char *line, int *argc) {
if (*p) p++;
}
/* add the token to the vector */
- vector = zrealloc(vector,((*argc)+1)*sizeof(char*));
+ vector = s_realloc(vector,((*argc)+1)*sizeof(char*));
vector[*argc] = current;
(*argc)++;
current = NULL;
} else {
/* Even on empty input string return something not NULL. */
- if (vector == NULL) vector = zmalloc(sizeof(void*));
+ if (vector == NULL) vector = s_malloc(sizeof(void*));
return vector;
}
}
@@ -920,7 +1052,7 @@ sds *sdssplitargs(const char *line, int *argc) {
err:
while((*argc)--)
sdsfree(vector[*argc]);
- zfree(vector);
+ s_free(vector);
if (current) sdsfree(current);
*argc = 0;
return NULL;
@@ -962,17 +1094,35 @@ sds sdsjoin(char **argv, int argc, char *sep) {
return join;
}
-#if defined(REDIS_TEST) || defined(SDS_TEST_MAIN)
+/* Like sdsjoin, but joins an array of SDS strings. */
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen) {
+ sds join = sdsempty();
+ int j;
+
+ for (j = 0; j < argc; j++) {
+ join = sdscatsds(join, argv[j]);
+ if (j != argc-1) join = sdscatlen(join,sep,seplen);
+ }
+ return join;
+}
+
+/* Wrappers to the allocators used by SDS. Note that SDS will actually
+ * just use the macros defined into sdsalloc.h in order to avoid to pay
+ * the overhead of function calls. Here we define these wrappers only for
+ * the programs SDS is linked to, if they want to touch the SDS internals
+ * even if they use a different allocator. */
+void *sds_malloc(size_t size) { return s_malloc(size); }
+void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
+void sds_free(void *ptr) { s_free(ptr); }
+
+#if defined(SDS_TEST_MAIN)
#include <stdio.h>
#include "testhelp.h"
#include "limits.h"
#define UNUSED(x) (void)(x)
-int sdsTest(int argc, char *argv[]) {
- UNUSED(argc);
- UNUSED(argv);
+int sdsTest(void) {
{
- struct sdshdr *sh;
sds x = sdsnew("foo"), y;
test_cond("Create a string and obtain the length",
@@ -1008,6 +1158,7 @@ int sdsTest(int argc, char *argv[]) {
sdslen(x) == 60 &&
memcmp(x,"--Hello Hi! World -9223372036854775808,"
"9223372036854775807--",60) == 0)
+ printf("[%s]\n",x);
sdsfree(x);
x = sdsnew("--");
@@ -1096,20 +1247,37 @@ int sdsTest(int argc, char *argv[]) {
{
unsigned int oldfree;
+ char *p;
+ int step = 10, j, i;
sdsfree(x);
+ sdsfree(y);
x = sdsnew("0");
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsnew() free/len buffers", sh->len == 1 && sh->free == 0);
- x = sdsMakeRoomFor(x,1);
- sh = (void*) (x-(sizeof(struct sdshdr)));
- test_cond("sdsMakeRoomFor()", sh->len == 1 && sh->free > 0);
- oldfree = sh->free;
- x[1] = '1';
- sdsIncrLen(x,1);
- test_cond("sdsIncrLen() -- content", x[0] == '0' && x[1] == '1');
- test_cond("sdsIncrLen() -- len", sh->len == 2);
- test_cond("sdsIncrLen() -- free", sh->free == oldfree-1);
+ test_cond("sdsnew() free/len buffers", sdslen(x) == 1 && sdsavail(x) == 0);
+
+ /* Run the test a few times in order to hit the first two
+ * SDS header types. */
+ for (i = 0; i < 10; i++) {
+ int oldlen = sdslen(x);
+ x = sdsMakeRoomFor(x,step);
+ int type = x[-1]&SDS_TYPE_MASK;
+
+ test_cond("sdsMakeRoomFor() len", sdslen(x) == oldlen);
+ if (type != SDS_TYPE_5) {
+ test_cond("sdsMakeRoomFor() free", sdsavail(x) >= step);
+ oldfree = sdsavail(x);
+ }
+ p = x+oldlen;
+ for (j = 0; j < step; j++) {
+ p[j] = 'A'+j;
+ }
+ sdsIncrLen(x,step);
+ }
+ test_cond("sdsMakeRoomFor() content",
+ memcmp("0ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ",x,101) == 0);
+ test_cond("sdsMakeRoomFor() final length",sdslen(x)==101);
+
+ sdsfree(x);
}
}
test_report()
diff --git a/src/sds.h b/src/sds.h
index 93dd4f28e..1bdb60dec 100644
--- a/src/sds.h
+++ b/src/sds.h
@@ -1,6 +1,8 @@
-/* SDSLib, A C dynamic strings library
+/* SDSLib 2.0 -- A C dynamic strings library
*
- * Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Oran Agra
+ * Copyright (c) 2015, Redis Labs, Inc
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,35 +34,192 @@
#define __SDS_H
#define SDS_MAX_PREALLOC (1024*1024)
+const char *SDS_NOINIT;
#include <sys/types.h>
#include <stdarg.h>
+#include <stdint.h>
typedef char *sds;
-struct sdshdr {
- unsigned int len;
- unsigned int free;
+/* Note: sdshdr5 is never used, we just access the flags byte directly.
+ * However is here to document the layout of type 5 SDS strings. */
+struct __attribute__ ((__packed__)) sdshdr5 {
+ unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
+struct __attribute__ ((__packed__)) sdshdr8 {
+ uint8_t len; /* used */
+ uint8_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr16 {
+ uint16_t len; /* used */
+ uint16_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr32 {
+ uint32_t len; /* used */
+ uint32_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+struct __attribute__ ((__packed__)) sdshdr64 {
+ uint64_t len; /* used */
+ uint64_t alloc; /* excluding the header and null terminator */
+ unsigned char flags; /* 3 lsb of type, 5 unused bits */
+ char buf[];
+};
+
+#define SDS_TYPE_5 0
+#define SDS_TYPE_8 1
+#define SDS_TYPE_16 2
+#define SDS_TYPE_32 3
+#define SDS_TYPE_64 4
+#define SDS_TYPE_MASK 7
+#define SDS_TYPE_BITS 3
+#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
+#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
+#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
static inline size_t sdslen(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->len;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->len;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->len;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->len;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->len;
+ }
+ return 0;
}
static inline size_t sdsavail(const sds s) {
- struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
- return sh->free;
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5: {
+ return 0;
+ }
+ case SDS_TYPE_8: {
+ SDS_HDR_VAR(8,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_16: {
+ SDS_HDR_VAR(16,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_32: {
+ SDS_HDR_VAR(32,s);
+ return sh->alloc - sh->len;
+ }
+ case SDS_TYPE_64: {
+ SDS_HDR_VAR(64,s);
+ return sh->alloc - sh->len;
+ }
+ }
+ return 0;
+}
+
+static inline void sdssetlen(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len = newlen;
+ break;
+ }
+}
+
+static inline void sdsinclen(sds s, size_t inc) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ {
+ unsigned char *fp = ((unsigned char*)s)-1;
+ unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
+ *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
+ }
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->len += inc;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->len += inc;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->len += inc;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->len += inc;
+ break;
+ }
+}
+
+/* sdsalloc() = sdsavail() + sdslen() */
+static inline size_t sdsalloc(const sds s) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ return SDS_TYPE_5_LEN(flags);
+ case SDS_TYPE_8:
+ return SDS_HDR(8,s)->alloc;
+ case SDS_TYPE_16:
+ return SDS_HDR(16,s)->alloc;
+ case SDS_TYPE_32:
+ return SDS_HDR(32,s)->alloc;
+ case SDS_TYPE_64:
+ return SDS_HDR(64,s)->alloc;
+ }
+ return 0;
+}
+
+static inline void sdssetalloc(sds s, size_t newlen) {
+ unsigned char flags = s[-1];
+ switch(flags&SDS_TYPE_MASK) {
+ case SDS_TYPE_5:
+ /* Nothing to do, this type has no total allocation info. */
+ break;
+ case SDS_TYPE_8:
+ SDS_HDR(8,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_16:
+ SDS_HDR(16,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_32:
+ SDS_HDR(32,s)->alloc = newlen;
+ break;
+ case SDS_TYPE_64:
+ SDS_HDR(64,s)->alloc = newlen;
+ break;
+ }
}
sds sdsnewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
-size_t sdslen(const sds s);
sds sdsdup(const sds s);
void sdsfree(sds s);
-size_t sdsavail(const sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);
@@ -78,11 +237,11 @@ sds sdscatprintf(sds s, const char *fmt, ...);
sds sdscatfmt(sds s, char const *fmt, ...);
sds sdstrim(sds s, const char *cset);
-void sdsrange(sds s, int start, int end);
+void sdsrange(sds s, ssize_t start, ssize_t end);
void sdsupdatelen(sds s);
void sdsclear(sds s);
int sdscmp(const sds s1, const sds s2);
-sds *sdssplitlen(const char *s, int len, const char *sep, int seplen, int *count);
+sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
void sdsfreesplitres(sds *tokens, int count);
void sdstolower(sds s);
void sdstoupper(sds s);
@@ -91,12 +250,22 @@ sds sdscatrepr(sds s, const char *p, size_t len);
sds *sdssplitargs(const char *line, int *argc);
sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
+sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
-void sdsIncrLen(sds s, int incr);
+void sdsIncrLen(sds s, ssize_t incr);
sds sdsRemoveFreeSpace(sds s);
size_t sdsAllocSize(sds s);
+void *sdsAllocPtr(sds s);
+
+/* Export the allocator used by SDS to the program using SDS.
+ * Sometimes the program SDS is linked to, may use a different set of
+ * allocators, but may want to allocate or free things that SDS will
+ * respectively free or allocate. */
+void *sds_malloc(size_t size);
+void *sds_realloc(void *ptr, size_t size);
+void sds_free(void *ptr);
#ifdef REDIS_TEST
int sdsTest(int argc, char *argv[]);
diff --git a/src/sdsalloc.h b/src/sdsalloc.h
new file mode 100644
index 000000000..531d41929
--- /dev/null
+++ b/src/sdsalloc.h
@@ -0,0 +1,42 @@
+/* SDSLib 2.0 -- A C dynamic strings library
+ *
+ * Copyright (c) 2006-2015, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2015, Redis Labs, Inc
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* SDS allocator selection.
+ *
+ * This file is used in order to change the SDS allocator at compile time.
+ * Just define the following defines to what you want to use. Also add
+ * the include of your alternate allocator if needed (not needed in order
+ * to use the default libc allocator). */
+
+#include "zmalloc.h"
+#define s_malloc zmalloc
+#define s_realloc zrealloc
+#define s_free zfree
diff --git a/src/sentinel.c b/src/sentinel.c
index c693a5862..76e0eb750 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "hiredis.h"
#include "async.h"
@@ -54,19 +54,18 @@ typedef struct sentinelAddr {
#define SRI_MASTER (1<<0)
#define SRI_SLAVE (1<<1)
#define SRI_SENTINEL (1<<2)
-#define SRI_DISCONNECTED (1<<3)
-#define SRI_S_DOWN (1<<4) /* Subjectively down (no quorum). */
-#define SRI_O_DOWN (1<<5) /* Objectively down (confirmed by others). */
-#define SRI_MASTER_DOWN (1<<6) /* A Sentinel with this flag set thinks that
+#define SRI_S_DOWN (1<<3) /* Subjectively down (no quorum). */
+#define SRI_O_DOWN (1<<4) /* Objectively down (confirmed by others). */
+#define SRI_MASTER_DOWN (1<<5) /* A Sentinel with this flag set thinks that
its master is down. */
-#define SRI_FAILOVER_IN_PROGRESS (1<<7) /* Failover is in progress for
+#define SRI_FAILOVER_IN_PROGRESS (1<<6) /* Failover is in progress for
this master. */
-#define SRI_PROMOTED (1<<8) /* Slave selected for promotion. */
-#define SRI_RECONF_SENT (1<<9) /* SLAVEOF <newmaster> sent. */
-#define SRI_RECONF_INPROG (1<<10) /* Slave synchronization in progress. */
-#define SRI_RECONF_DONE (1<<11) /* Slave synchronized with new master. */
-#define SRI_FORCE_FAILOVER (1<<12) /* Force failover with master up. */
-#define SRI_SCRIPT_KILL_SENT (1<<13) /* SCRIPT KILL already sent on -BUSY */
+#define SRI_PROMOTED (1<<7) /* Slave selected for promotion. */
+#define SRI_RECONF_SENT (1<<8) /* SLAVEOF <newmaster> sent. */
+#define SRI_RECONF_INPROG (1<<9) /* Slave synchronization in progress. */
+#define SRI_RECONF_DONE (1<<10) /* Slave synchronized with new master. */
+#define SRI_FORCE_FAILOVER (1<<11) /* Force failover with master up. */
+#define SRI_SCRIPT_KILL_SENT (1<<12) /* SCRIPT KILL already sent on -BUSY */
/* Note: times are in milliseconds. */
#define SENTINEL_INFO_PERIOD 10000
@@ -85,6 +84,7 @@ typedef struct sentinelAddr {
#define SENTINEL_MAX_PENDING_COMMANDS 100
#define SENTINEL_ELECTION_TIMEOUT 10000
#define SENTINEL_MAX_DESYNC 1000
+#define SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG 1
/* Failover machine different states. */
#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
@@ -115,27 +115,59 @@ typedef struct sentinelAddr {
#define SENTINEL_SCRIPT_MAX_RETRY 10
#define SENTINEL_SCRIPT_RETRY_DELAY 30000 /* 30 seconds between retries. */
-typedef struct sentinelRedisInstance {
- int flags; /* See SRI_... defines */
- char *name; /* Master name from the point of view of this sentinel. */
- char *runid; /* run ID of this instance. */
- uint64_t config_epoch; /* Configuration epoch. */
- sentinelAddr *addr; /* Master host. */
+/* SENTINEL SIMULATE-FAILURE command flags. */
+#define SENTINEL_SIMFAILURE_NONE 0
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION (1<<0)
+#define SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION (1<<1)
+
+/* The link to a sentinelRedisInstance. When we have the same set of Sentinels
+ * monitoring many masters, we have different instances representing the
+ * same Sentinels, one per master, and we need to share the hiredis connections
+ * among them. Oherwise if 5 Sentinels are monitoring 100 masters we create
+ * 500 outgoing connections instead of 5.
+ *
+ * So this structure represents a reference counted link in terms of the two
+ * hiredis connections for commands and Pub/Sub, and the fields needed for
+ * failure detection, since the ping/pong time are now local to the link: if
+ * the link is available, the instance is avaialbe. This way we don't just
+ * have 5 connections instead of 500, we also send 5 pings instead of 500.
+ *
+ * Links are shared only for Sentinels: master and slave instances have
+ * a link with refcount = 1, always. */
+typedef struct instanceLink {
+ int refcount; /* Number of sentinelRedisInstance owners. */
+ int disconnected; /* Non-zero if we need to reconnect cc or pc. */
+ int pending_commands; /* Number of commands sent waiting for a reply. */
redisAsyncContext *cc; /* Hiredis context for commands. */
redisAsyncContext *pc; /* Hiredis context for Pub / Sub. */
- int pending_commands; /* Number of commands sent waiting for a reply. */
mstime_t cc_conn_time; /* cc connection time. */
mstime_t pc_conn_time; /* pc connection time. */
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
- mstime_t last_ping_time; /* Last time a pending ping was sent in the
- context of the current command connection
- with the instance. 0 if still not sent or
- if pong already received. */
+ mstime_t act_ping_time; /* Time at which the last pending ping (no pong
+ received after it) was sent. This field is
+ set to 0 when a pong is received, and set again
+ to the current time if the value is 0 and a new
+ ping is sent. */
+ mstime_t last_ping_time; /* Time at which we sent the last ping. This is
+ only used to avoid sending too many pings
+ during failure. Idle time is computed using
+ the act_ping_time field. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
+ mstime_t last_reconn_time; /* Last reconnection attempt performed when
+ the link was down. */
+} instanceLink;
+
+typedef struct sentinelRedisInstance {
+ int flags; /* See SRI_... defines */
+ char *name; /* Master name from the point of view of this sentinel. */
+ char *runid; /* Run ID of this instance, or unique ID if is a Sentinel.*/
+ uint64_t config_epoch; /* Configuration epoch. */
+ sentinelAddr *addr; /* Master host. */
+ instanceLink *link; /* Link to the instance, may be shared for Sentinels. */
mstime_t last_pub_time; /* Last time we sent hello via Pub/Sub. */
mstime_t last_hello_time; /* Only used if SRI_SENTINEL is set. Last time
we received a hello from this Sentinel
@@ -146,6 +178,10 @@ typedef struct sentinelRedisInstance {
mstime_t o_down_since_time; /* Objectively down since time. */
mstime_t down_after_period; /* Consider it down after that period. */
mstime_t info_refresh; /* Time at which we received INFO output from it. */
+ dict *renamed_commands; /* Commands renamed in this instance:
+ Sentinel will use the alternative commands
+ mapped on this table to send things like
+ SLAVEOF, CONFING, INFO, ... */
/* Role and the first time we observed it.
* This is useful in order to delay replacing what the instance reports
@@ -195,19 +231,23 @@ typedef struct sentinelRedisInstance {
/* Main state. */
struct sentinelState {
- uint64_t current_epoch; /* Current epoch. */
+ char myid[CONFIG_RUN_ID_SIZE+1]; /* This sentinel ID. */
+ uint64_t current_epoch; /* Current epoch. */
dict *masters; /* Dictionary of master sentinelRedisInstances.
Key is the instance name, value is the
sentinelRedisInstance structure pointer. */
int tilt; /* Are we in TILT mode? */
int running_scripts; /* Number of scripts in execution right now. */
- mstime_t tilt_start_time; /* When TITL started. */
- mstime_t previous_time; /* Last time we ran the time handler. */
- list *scripts_queue; /* Queue of user scripts to execute. */
- char *announce_ip; /* IP addr that is gossiped to other sentinels if
- not NULL. */
- int announce_port; /* Port that is gossiped to other sentinels if
- non zero. */
+ mstime_t tilt_start_time; /* When TITL started. */
+ mstime_t previous_time; /* Last time we ran the time handler. */
+ list *scripts_queue; /* Queue of user scripts to execute. */
+ char *announce_ip; /* IP addr that is gossiped to other sentinels if
+ not NULL. */
+ int announce_port; /* Port that is gossiped to other sentinels if
+ non zero. */
+ unsigned long simfailure_flags; /* Failures simulation. */
+ int deny_scripts_reconfig; /* Allow SENTINEL SET ... to change script
+ paths at runtime? */
} sentinel;
/* A script execution job. */
@@ -298,7 +338,7 @@ static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
/* Nothing should be attached when something is already attached */
if (ac->ev.data != NULL)
- return REDIS_ERR;
+ return C_ERR;
/* Create container for context and r/w events */
e = (redisAeEvents*)zmalloc(sizeof(*e));
@@ -315,7 +355,7 @@ static int redisAeAttach(aeEventLoop *loop, redisAsyncContext *ac) {
ac->ev.cleanup = redisAeCleanup;
ac->ev.data = e;
- return REDIS_OK;
+ return C_OK;
}
/* ============================= Prototypes ================================= */
@@ -327,8 +367,7 @@ sentinelRedisInstance *sentinelGetMasterByName(char *name);
char *sentinelGetSubjectiveLeader(sentinelRedisInstance *master);
char *sentinelGetObjectiveLeader(sentinelRedisInstance *master);
int yesnotoi(char *s);
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c);
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c);
+void instanceLinkConnectionError(const redisAsyncContext *c);
const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri);
void sentinelAbortFailover(sentinelRedisInstance *ri);
void sentinelEvent(int level, char *type, sentinelRedisInstance *ri, const char *fmt, ...);
@@ -342,15 +381,19 @@ void sentinelFlushConfig(void);
void sentinelGenerateInitialMonitorEvents(void);
int sentinelSendPing(sentinelRedisInstance *ri);
int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master);
+sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, char *ip, int port, char *runid);
+void sentinelSimFailureCrash(void);
/* ========================= Dictionary types =============================== */
-unsigned int dictSdsHash(const void *key);
+uint64_t dictSdsHash(const void *key);
+uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
+int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void releaseSentinelRedisInstance(sentinelRedisInstance *ri);
void dictInstancesValDestructor (void *privdata, void *obj) {
- REDIS_NOTUSED(privdata);
+ UNUSED(privdata);
releaseSentinelRedisInstance(obj);
}
@@ -380,13 +423,23 @@ dictType leaderVotesDictType = {
NULL /* val destructor */
};
+/* Instance renamed commands table. */
+dictType renamedCommandsDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor /* val destructor */
+};
+
/* =========================== Initialization =============================== */
-void sentinelCommand(redisClient *c);
-void sentinelInfoCommand(redisClient *c);
-void sentinelSetCommand(redisClient *c);
-void sentinelPublishCommand(redisClient *c);
-void sentinelRoleCommand(redisClient *c);
+void sentinelCommand(client *c);
+void sentinelInfoCommand(client *c);
+void sentinelSetCommand(client *c);
+void sentinelPublishCommand(client *c);
+void sentinelRoleCommand(client *c);
struct redisCommand sentinelcmds[] = {
{"ping",pingCommand,1,"",0,NULL,0,0,0,0,0},
@@ -398,6 +451,7 @@ struct redisCommand sentinelcmds[] = {
{"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0},
{"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0},
{"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}
};
@@ -419,7 +473,7 @@ void initSentinel(void) {
struct redisCommand *cmd = sentinelcmds+j;
retval = dictAdd(server.commands, sdsnew(cmd->name), cmd);
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
/* Initialize various data structures. */
@@ -432,24 +486,42 @@ void initSentinel(void) {
sentinel.scripts_queue = listCreate();
sentinel.announce_ip = NULL;
sentinel.announce_port = 0;
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ sentinel.deny_scripts_reconfig = SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG;
+ memset(sentinel.myid,0,sizeof(sentinel.myid));
}
/* This function gets called when the server is in Sentinel mode, started,
* loaded the configuration, and is ready for normal operations. */
void sentinelIsRunning(void) {
- redisLog(REDIS_WARNING,"Sentinel runid is %s", server.runid);
+ int j;
if (server.configfile == NULL) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel started without a config file. Exiting...");
exit(1);
} else if (access(server.configfile,W_OK) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel config file %s is not writable: %s. Exiting...",
server.configfile,strerror(errno));
exit(1);
}
+ /* If this Sentinel has yet no ID set in the configuration file, we
+ * pick a random one and persist the config on disk. From now on this
+ * will be this Sentinel ID across restarts. */
+ for (j = 0; j < CONFIG_RUN_ID_SIZE; j++)
+ if (sentinel.myid[j] != 0) break;
+
+ if (j == CONFIG_RUN_ID_SIZE) {
+ /* Pick ID and persist the config. */
+ getRandomHexChars(sentinel.myid,CONFIG_RUN_ID_SIZE);
+ sentinelFlushConfig();
+ }
+
+ /* Log its ID to make debugging of issues simpler. */
+ serverLog(LL_WARNING,"Sentinel ID is %s", sentinel.myid);
+
/* We want to generate a +monitor event for every configured master
* at startup. */
sentinelGenerateInitialMonitorEvents();
@@ -463,10 +535,10 @@ void sentinelIsRunning(void) {
* EINVAL: Invalid port number.
*/
sentinelAddr *createSentinelAddr(char *hostname, int port) {
- char ip[REDIS_IP_STR_LEN];
+ char ip[NET_IP_STR_LEN];
sentinelAddr *sa;
- if (port <= 0 || port > 65535) {
+ if (port < 0 || port > 65535) {
errno = EINVAL;
return NULL;
}
@@ -505,7 +577,7 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
/* Send an event to log, pub/sub, user notification script.
*
- * 'level' is the log level for logging. Only REDIS_WARNING events will trigger
+ * 'level' is the log level for logging. Only LL_WARNING events will trigger
* the execution of the user notification script.
*
* 'type' is the message type, also used as a pub/sub channel name.
@@ -530,7 +602,7 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
const char *fmt, ...) {
va_list ap;
- char msg[REDIS_MAX_LOGMSG_LEN];
+ char msg[LOG_MAX_LEN];
robj *channel, *payload;
/* Handle %@ */
@@ -562,10 +634,10 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
/* Log the message if the log level allows it to be logged. */
if (level >= server.verbosity)
- redisLog(level,"%s %s",type,msg);
+ serverLog(level,"%s %s",type,msg);
/* Publish the message via Pub/Sub if it's not a debugging one. */
- if (level != REDIS_DEBUG) {
+ if (level != LL_DEBUG) {
channel = createStringObject(type,strlen(type));
payload = createStringObject(msg,strlen(msg));
pubsubPublishMessage(channel,payload);
@@ -574,7 +646,7 @@ void sentinelEvent(int level, char *type, sentinelRedisInstance *ri,
}
/* Call the notification script if applicable. */
- if (level == REDIS_WARNING && ri != NULL) {
+ if (level == LL_WARNING && ri != NULL) {
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ?
ri : ri->master;
if (master && master->notification_script) {
@@ -595,7 +667,7 @@ void sentinelGenerateInitialMonitorEvents(void) {
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
- sentinelEvent(REDIS_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
}
dictReleaseIterator(di);
}
@@ -653,7 +725,7 @@ void sentinelScheduleScriptExecution(char *path, ...) {
sentinelReleaseScriptJob(sj);
break;
}
- redisAssert(listLength(sentinel.scripts_queue) <=
+ serverAssert(listLength(sentinel.scripts_queue) <=
SENTINEL_SCRIPT_MAX_QUEUE);
}
}
@@ -705,7 +777,7 @@ void sentinelRunPendingScripts(void) {
/* Parent (fork error).
* We report fork errors as signal 99, in order to unify the
* reporting with other kind of errors. */
- sentinelEvent(REDIS_WARNING,"-script-error",NULL,
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
"%s %d %d", sj->argv[0], 99, 0);
sj->flags &= ~SENTINEL_SCRIPT_RUNNING;
sj->pid = 0;
@@ -717,7 +789,7 @@ void sentinelRunPendingScripts(void) {
} else {
sentinel.running_scripts++;
sj->pid = pid;
- sentinelEvent(REDIS_DEBUG,"+script-child",NULL,"%ld",(long)pid);
+ sentinelEvent(LL_DEBUG,"+script-child",NULL,"%ld",(long)pid);
}
}
}
@@ -751,12 +823,12 @@ void sentinelCollectTerminatedScripts(void) {
sentinelScriptJob *sj;
if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
- sentinelEvent(REDIS_DEBUG,"-script-child",NULL,"%ld %d %d",
+ sentinelEvent(LL_DEBUG,"-script-child",NULL,"%ld %d %d",
(long)pid, exitcode, bysignal);
ln = sentinelGetScriptListNodeByPid(pid);
if (ln == NULL) {
- redisLog(REDIS_WARNING,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
+ serverLog(LL_WARNING,"wait3() returned a pid (%ld) we can't find in our scripts execution queue!", (long)pid);
continue;
}
sj = ln->value;
@@ -775,7 +847,7 @@ void sentinelCollectTerminatedScripts(void) {
/* Otherwise let's remove the script, but log the event if the
* execution did not terminated in the best of the ways. */
if (bysignal || exitcode != 0) {
- sentinelEvent(REDIS_WARNING,"-script-error",NULL,
+ sentinelEvent(LL_WARNING,"-script-error",NULL,
"%s %d %d", sj->argv[0], bysignal, exitcode);
}
listDelNode(sentinel.scripts_queue,ln);
@@ -799,7 +871,7 @@ void sentinelKillTimedoutScripts(void) {
if (sj->flags & SENTINEL_SCRIPT_RUNNING &&
(now - sj->start_time) > SENTINEL_SCRIPT_MAX_RUNTIME)
{
- sentinelEvent(REDIS_WARNING,"-script-timeout",NULL,"%s %ld",
+ sentinelEvent(LL_WARNING,"-script-timeout",NULL,"%s %ld",
sj->argv[0], (long)sj->pid);
kill(sj->pid,SIGKILL);
}
@@ -807,7 +879,7 @@ void sentinelKillTimedoutScripts(void) {
}
/* Implements SENTINEL PENDING-SCRIPTS command. */
-void sentinelPendingScriptsCommand(redisClient *c) {
+void sentinelPendingScriptsCommand(client *c) {
listNode *ln;
listIter li;
@@ -871,6 +943,201 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
state, from->ip, fromport, to->ip, toport, NULL);
}
+/* =============================== instanceLink ============================= */
+
+/* Create a not yet connected link object. */
+instanceLink *createInstanceLink(void) {
+ instanceLink *link = zmalloc(sizeof(*link));
+
+ link->refcount = 1;
+ link->disconnected = 1;
+ link->pending_commands = 0;
+ link->cc = NULL;
+ link->pc = NULL;
+ link->cc_conn_time = 0;
+ link->pc_conn_time = 0;
+ link->last_reconn_time = 0;
+ link->pc_last_activity = 0;
+ /* We set the act_ping_time to "now" even if we actually don't have yet
+ * a connection with the node, nor we sent a ping.
+ * This is useful to detect a timeout in case we'll not be able to connect
+ * with the node at all. */
+ link->act_ping_time = mstime();
+ link->last_ping_time = 0;
+ link->last_avail_time = mstime();
+ link->last_pong_time = mstime();
+ return link;
+}
+
+/* Disconnect an hiredis connection in the context of an instance link. */
+void instanceLinkCloseConnection(instanceLink *link, redisAsyncContext *c) {
+ if (c == NULL) return;
+
+ if (link->cc == c) {
+ link->cc = NULL;
+ link->pending_commands = 0;
+ }
+ if (link->pc == c) link->pc = NULL;
+ c->data = NULL;
+ link->disconnected = 1;
+ redisAsyncFree(c);
+}
+
+/* Decrement the refcount of a link object, if it drops to zero, actually
+ * free it and return NULL. Otherwise don't do anything and return the pointer
+ * to the object.
+ *
+ * If we are not going to free the link and ri is not NULL, we rebind all the
+ * pending requests in link->cc (hiredis connection for commands) to a
+ * callback that will just ignore them. This is useful to avoid processing
+ * replies for an instance that no longer exists. */
+instanceLink *releaseInstanceLink(instanceLink *link, sentinelRedisInstance *ri)
+{
+ serverAssert(link->refcount > 0);
+ link->refcount--;
+ if (link->refcount != 0) {
+ if (ri && ri->link->cc) {
+ /* This instance may have pending callbacks in the hiredis async
+ * context, having as 'privdata' the instance that we are going to
+ * free. Let's rewrite the callback list, directly exploiting
+ * hiredis internal data structures, in order to bind them with
+ * a callback that will ignore the reply at all. */
+ redisCallback *cb;
+ redisCallbackList *callbacks = &link->cc->replies;
+
+ cb = callbacks->head;
+ while(cb) {
+ if (cb->privdata == ri) {
+ cb->fn = sentinelDiscardReplyCallback;
+ cb->privdata = NULL; /* Not strictly needed. */
+ }
+ cb = cb->next;
+ }
+ }
+ return link; /* Other active users. */
+ }
+
+ instanceLinkCloseConnection(link,link->cc);
+ instanceLinkCloseConnection(link,link->pc);
+ zfree(link);
+ return NULL;
+}
+
+/* This function will attempt to share the instance link we already have
+ * for the same Sentinel in the context of a different master, with the
+ * instance we are passing as argument.
+ *
+ * This way multiple Sentinel objects that refer all to the same physical
+ * Sentinel instance but in the context of different masters will use
+ * a single connection, will send a single PING per second for failure
+ * detection and so forth.
+ *
+ * Return C_OK if a matching Sentinel was found in the context of a
+ * different master and sharing was performed. Otherwise C_ERR
+ * is returned. */
+int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+
+ if (ri->runid == NULL) return C_ERR; /* No way to identify it. */
+ if (ri->link->refcount > 1) return C_ERR; /* Already shared. */
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ /* We want to share with the same physical Sentinel referenced
+ * in other masters, so skip our master. */
+ if (master == ri->master) continue;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ if (match == NULL) continue; /* No match. */
+ if (match == ri) continue; /* Should never happen but... safer. */
+
+ /* We identified a matching Sentinel, great! Let's free our link
+ * and use the one of the matching Sentinel. */
+ releaseInstanceLink(ri->link,NULL);
+ ri->link = match->link;
+ match->link->refcount++;
+ return C_OK;
+ }
+ dictReleaseIterator(di);
+ return C_ERR;
+}
+
+/* When we detect a Sentinel to switch address (reporting a different IP/port
+ * pair in Hello messages), let's update all the matching Sentinels in the
+ * context of other masters as well and disconnect the links, so that everybody
+ * will be updated.
+ *
+ * Return the number of updated Sentinel addresses. */
+int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) {
+ serverAssert(ri->flags & SRI_SENTINEL);
+ dictIterator *di;
+ dictEntry *de;
+ int reconfigured = 0;
+
+ di = dictGetIterator(sentinel.masters);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *master = dictGetVal(de), *match;
+ match = getSentinelRedisInstanceByAddrAndRunID(master->sentinels,
+ NULL,0,ri->runid);
+ /* If there is no match, this master does not know about this
+ * Sentinel, try with the next one. */
+ if (match == NULL) continue;
+
+ /* Disconnect the old links if connected. */
+ if (match->link->cc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->cc);
+ if (match->link->pc != NULL)
+ instanceLinkCloseConnection(match->link,match->link->pc);
+
+ if (match == ri) continue; /* Address already updated for it. */
+
+ /* Update the address of the matching Sentinel by copying the address
+ * of the Sentinel object that received the address update. */
+ releaseSentinelAddr(match->addr);
+ match->addr = dupSentinelAddr(ri->addr);
+ reconfigured++;
+ }
+ dictReleaseIterator(di);
+ if (reconfigured)
+ sentinelEvent(LL_NOTICE,"+sentinel-address-update", ri,
+ "%@ %d additional matching instances", reconfigured);
+ return reconfigured;
+}
+
+/* This function is called when an hiredis connection reported an error.
+ * We set it to NULL and mark the link as disconnected so that it will be
+ * reconnected again.
+ *
+ * Note: we don't free the hiredis context as hiredis will do it for us
+ * for async connections. */
+void instanceLinkConnectionError(const redisAsyncContext *c) {
+ instanceLink *link = c->data;
+ int pubsub;
+
+ if (!link) return;
+
+ pubsub = (link->pc == c);
+ if (pubsub)
+ link->pc = NULL;
+ else
+ link->cc = NULL;
+ link->disconnected = 1;
+}
+
+/* Hiredis connection established / disconnected callbacks. We need them
+ * just to cleanup our link state. */
+void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
+ if (status != C_OK) instanceLinkConnectionError(c);
+}
+
+void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
+ UNUSED(status);
+ instanceLinkConnectionError(c);
+}
+
/* ========================== sentinelRedisInstance ========================= */
/* Create a redis instance, the following fields must be populated by the
@@ -892,22 +1159,24 @@ void sentinelCallClientReconfScript(sentinelRedisInstance *master, int role, cha
* createSentinelAddr() function.
*
* The function may also fail and return NULL with errno set to EBUSY if
- * a master or slave with the same name already exists. */
+ * a master with the same name, a slave with the same address, or a sentinel
+ * with the same ID already exists. */
+
sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *hostname, int port, int quorum, sentinelRedisInstance *master) {
sentinelRedisInstance *ri;
sentinelAddr *addr;
dict *table = NULL;
- char slavename[REDIS_PEER_ID_LEN], *sdsname;
+ char slavename[NET_PEER_ID_LEN], *sdsname;
- redisAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
- redisAssert((flags & SRI_MASTER) || master != NULL);
+ serverAssert(flags & (SRI_MASTER|SRI_SLAVE|SRI_SENTINEL));
+ serverAssert((flags & SRI_MASTER) || master != NULL);
/* Check address validity. */
addr = createSentinelAddr(hostname,port);
if (addr == NULL) return NULL;
- /* For slaves and sentinel we use ip:port as name. */
- if (flags & (SRI_SLAVE|SRI_SENTINEL)) {
+ /* For slaves use ip:port as name. */
+ if (flags & SRI_SLAVE) {
anetFormatAddr(slavename, sizeof(slavename), hostname, port);
name = slavename;
}
@@ -921,6 +1190,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
else if (flags & SRI_SENTINEL) table = master->sentinels;
sdsname = sdsnew(name);
if (dictFind(table,sdsname)) {
+ releaseSentinelAddr(addr);
sdsfree(sdsname);
errno = EBUSY;
return NULL;
@@ -930,24 +1200,12 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri = zmalloc(sizeof(*ri));
/* Note that all the instances are started in the disconnected state,
* the event loop will take care of connecting them. */
- ri->flags = flags | SRI_DISCONNECTED;
+ ri->flags = flags;
ri->name = sdsname;
ri->runid = NULL;
ri->config_epoch = 0;
ri->addr = addr;
- ri->cc = NULL;
- ri->pc = NULL;
- ri->pending_commands = 0;
- ri->cc_conn_time = 0;
- ri->pc_conn_time = 0;
- ri->pc_last_activity = 0;
- /* We set the last_ping_time to "now" even if we actually don't have yet
- * a connection with the node, nor we sent a ping.
- * This is useful to detect a timeout in case we'll not be able to connect
- * with the node at all. */
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link = createInstanceLink();
ri->last_pub_time = mstime();
ri->last_hello_time = mstime();
ri->last_master_down_reply_time = mstime();
@@ -969,6 +1227,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->master = master;
ri->slaves = dictCreate(&instancesDictType,NULL);
ri->info_refresh = 0;
+ ri->renamed_commands = dictCreate(&renamedCommandsDictType,NULL);
/* Failover state. */
ri->leader = NULL;
@@ -1003,9 +1262,8 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
dictRelease(ri->sentinels);
dictRelease(ri->slaves);
- /* Release hiredis connections. */
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
+ /* Disconnect the instance. */
+ releaseInstanceLink(ri->link,ri);
/* Free other resources. */
sdsfree(ri->name);
@@ -1017,6 +1275,7 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
sdsfree(ri->auth_pass);
sdsfree(ri->info);
releaseSentinelAddr(ri->addr);
+ dictRelease(ri->renamed_commands);
/* Clear state into the master if needed. */
if ((ri->flags & SRI_SLAVE) && (ri->flags & SRI_PROMOTED) && ri->master)
@@ -1031,9 +1290,9 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
{
sds key;
sentinelRedisInstance *slave;
- char buf[REDIS_PEER_ID_LEN];
+ char buf[NET_PEER_ID_LEN];
- redisAssert(ri->flags & SRI_MASTER);
+ serverAssert(ri->flags & SRI_MASTER);
anetFormatAddr(buf,sizeof(buf),ip,port);
key = sdsnew(buf);
slave = dictFetchValue(ri->slaves,key);
@@ -1049,35 +1308,29 @@ const char *sentinelRedisInstanceTypeStr(sentinelRedisInstance *ri) {
else return "unknown";
}
-/* This function removes all the instances found in the dictionary of
- * sentinels in the specified 'master', having either:
- *
- * 1) The same ip/port as specified.
- * 2) The same runid.
+/* This function remove the Sentinel with the specified ID from the
+ * specified master.
*
- * "1" and "2" don't need to verify at the same time, just one is enough.
- * If "runid" is NULL it is not checked.
- * Similarly if "ip" is NULL it is not checked.
+ * If "runid" is NULL the function returns ASAP.
*
- * This function is useful because every time we add a new Sentinel into
- * a master's Sentinels dictionary, we want to be very sure about not
- * having duplicated instances for any reason. This is important because
- * other sentinels are needed to reach ODOWN quorum, and later to get
- * voted for a given configuration epoch in order to perform the failover.
+ * This function is useful because on Sentinels address switch, we want to
+ * remove our old entry and add a new one for the same ID but with the new
+ * address.
*
- * The function returns the number of Sentinels removed. */
-int removeMatchingSentinelsFromMaster(sentinelRedisInstance *master, char *ip, int port, char *runid) {
+ * The function returns 1 if the matching Sentinel was removed, otherwise
+ * 0 if there was no Sentinel with this ID. */
+int removeMatchingSentinelFromMaster(sentinelRedisInstance *master, char *runid) {
dictIterator *di;
dictEntry *de;
int removed = 0;
+ if (runid == NULL) return 0;
+
di = dictGetSafeIterator(master->sentinels);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
- if ((ri->runid && runid && strcmp(ri->runid,runid) == 0) ||
- (ip && strcmp(ri->addr->ip,ip) == 0 && port == ri->addr->port))
- {
+ if (ri->runid && strcmp(ri->runid,runid) == 0) {
dictDelete(master->sentinels,ri->name);
removed++;
}
@@ -1097,7 +1350,7 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
dictEntry *de;
sentinelRedisInstance *instance = NULL;
- redisAssert(ip || runid); /* User must pass at least one search param. */
+ serverAssert(ip || runid); /* User must pass at least one search param. */
di = dictGetIterator(instances);
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *ri = dictGetVal(de);
@@ -1156,42 +1409,45 @@ void sentinelDelFlagsToDictOfRedisInstances(dict *instances, int flags) {
* 1) Remove all slaves.
* 2) Remove all sentinels.
* 3) Remove most of the flags resulting from runtime operations.
- * 4) Reset timers to their default value.
+ * 4) Reset timers to their default value. For example after a reset it will be
+ * possible to failover again the same master ASAP, without waiting the
+ * failover timeout delay.
* 5) In the process of doing this undo the failover if in progress.
* 6) Disconnect the connections with the master (will reconnect automatically).
*/
#define SENTINEL_RESET_NO_SENTINELS (1<<0)
void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
- redisAssert(ri->flags & SRI_MASTER);
+ serverAssert(ri->flags & SRI_MASTER);
dictRelease(ri->slaves);
ri->slaves = dictCreate(&instancesDictType,NULL);
if (!(flags & SENTINEL_RESET_NO_SENTINELS)) {
dictRelease(ri->sentinels);
ri->sentinels = dictCreate(&instancesDictType,NULL);
}
- if (ri->cc) sentinelKillLink(ri,ri->cc);
- if (ri->pc) sentinelKillLink(ri,ri->pc);
- ri->flags &= SRI_MASTER|SRI_DISCONNECTED;
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
+ ri->flags &= SRI_MASTER;
if (ri->leader) {
sdsfree(ri->leader);
ri->leader = NULL;
}
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
ri->failover_state_change_time = 0;
- ri->failover_start_time = 0;
+ ri->failover_start_time = 0; /* We can failover again ASAP. */
ri->promoted_slave = NULL;
sdsfree(ri->runid);
sdsfree(ri->slave_master_host);
ri->runid = NULL;
ri->slave_master_host = NULL;
- ri->last_ping_time = mstime();
- ri->last_avail_time = mstime();
- ri->last_pong_time = mstime();
+ ri->link->act_ping_time = mstime();
+ ri->link->last_ping_time = 0;
+ ri->link->last_avail_time = mstime();
+ ri->link->last_pong_time = mstime();
ri->role_reported_time = mstime();
ri->role_reported = SRI_MASTER;
if (flags & SENTINEL_GENERATE_EVENT)
- sentinelEvent(REDIS_WARNING,"+reset-master",ri,"%@");
+ sentinelEvent(LL_WARNING,"+reset-master",ri,"%@");
}
/* Call sentinelResetMaster() on every master with a name matching the specified
@@ -1221,8 +1477,8 @@ int sentinelResetMastersByPattern(char *pattern, int flags) {
*
* This is used to handle the +switch-master event.
*
- * The function returns REDIS_ERR if the address can't be resolved for some
- * reason. Otherwise REDIS_OK is returned. */
+ * The function returns C_ERR if the address can't be resolved for some
+ * reason. Otherwise C_OK is returned. */
int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip, int port) {
sentinelAddr *oldaddr, *newaddr;
sentinelAddr **slaves = NULL;
@@ -1231,7 +1487,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
dictEntry *de;
newaddr = createSentinelAddr(ip,port);
- if (newaddr == NULL) return REDIS_ERR;
+ if (newaddr == NULL) return C_ERR;
/* Make a list of slaves to add back after the reset.
* Don't include the one having the address we are switching to. */
@@ -1269,10 +1525,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
slave = createSentinelRedisInstance(NULL,SRI_SLAVE,slaves[j]->ip,
slaves[j]->port, master->quorum, master);
releaseSentinelAddr(slaves[j]);
- if (slave) {
- sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
- sentinelFlushConfig();
- }
+ if (slave) sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
}
zfree(slaves);
@@ -1280,7 +1533,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *ip,
* gets the master->addr->ip and master->addr->port as arguments. */
releaseSentinelAddr(oldaddr);
sentinelFlushConfig();
- return REDIS_OK;
+ return C_OK;
}
/* Return non-zero if there was no SDOWN or ODOWN error associated to this
@@ -1330,6 +1583,28 @@ void sentinelPropagateDownAfterPeriod(sentinelRedisInstance *master) {
}
}
+char *sentinelGetInstanceTypeString(sentinelRedisInstance *ri) {
+ if (ri->flags & SRI_MASTER) return "master";
+ else if (ri->flags & SRI_SLAVE) return "slave";
+ else if (ri->flags & SRI_SENTINEL) return "sentinel";
+ else return "unknown";
+}
+
+/* This function is used in order to send commands to Redis instances: the
+ * commands we send from Sentinel may be renamed, a common case is a master
+ * with CONFIG and SLAVEOF commands renamed for security concerns. In that
+ * case we check the ri->renamed_command table (or if the instance is a slave,
+ * we check the one of the master), and map the command that we should send
+ * to the set of renamed commads. However, if the command was not renamed,
+ * we just return "command" itself. */
+char *sentinelInstanceMapCommand(sentinelRedisInstance *ri, char *command) {
+ sds sc = sdsnew(command);
+ if (ri->master) ri = ri->master;
+ char *retval = dictFetchValue(ri->renamed_commands, sc);
+ sdsfree(sc);
+ return retval ? retval : command;
+}
+
/* ============================ Config handling ============================= */
char *sentinelHandleConfiguration(char **argv, int argc) {
sentinelRedisInstance *ri;
@@ -1393,6 +1668,10 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
unsigned long long current_epoch = strtoull(argv[1],NULL,10);
if (current_epoch > sentinel.current_epoch)
sentinel.current_epoch = current_epoch;
+ } else if (!strcasecmp(argv[0],"myid") && argc == 2) {
+ if (strlen(argv[1]) != CONFIG_RUN_ID_SIZE)
+ return "Malformed Sentinel id in myid option.";
+ memcpy(sentinel.myid,argv[1],CONFIG_RUN_ID_SIZE);
} else if (!strcasecmp(argv[0],"config-epoch") && argc == 3) {
/* config-epoch <name> <epoch> */
ri = sentinelGetMasterByName(argv[1]);
@@ -1423,15 +1702,29 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
(argc == 4 || argc == 5)) {
sentinelRedisInstance *si;
- /* known-sentinel <name> <ip> <port> [runid] */
+ if (argc == 5) { /* Ignore the old form without runid. */
+ /* known-sentinel <name> <ip> <port> [runid] */
+ ri = sentinelGetMasterByName(argv[1]);
+ if (!ri) return "No such master with specified name.";
+ if ((si = createSentinelRedisInstance(argv[4],SRI_SENTINEL,argv[2],
+ atoi(argv[3]), ri->quorum, ri)) == NULL)
+ {
+ return "Wrong hostname or port for sentinel.";
+ }
+ si->runid = sdsnew(argv[4]);
+ sentinelTryConnectionSharing(si);
+ }
+ } else if (!strcasecmp(argv[0],"rename-command") && argc == 4) {
+ /* rename-command <name> <command> <renamed-command> */
ri = sentinelGetMasterByName(argv[1]);
if (!ri) return "No such master with specified name.";
- if ((si = createSentinelRedisInstance(NULL,SRI_SENTINEL,argv[2],
- atoi(argv[3]), ri->quorum, ri)) == NULL)
- {
- return "Wrong hostname or port for sentinel.";
+ sds oldcmd = sdsnew(argv[2]);
+ sds newcmd = sdsnew(argv[3]);
+ if (dictAdd(ri->renamed_commands,oldcmd,newcmd) != DICT_OK) {
+ sdsfree(oldcmd);
+ sdsfree(newcmd);
+ return "Same command renamed multiple times with rename-command.";
}
- if (argc == 5) si->runid = sdsnew(argv[4]);
} else if (!strcasecmp(argv[0],"announce-ip") && argc == 2) {
/* announce-ip <ip-address> */
if (strlen(argv[1]))
@@ -1439,6 +1732,12 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
} else if (!strcasecmp(argv[0],"announce-port") && argc == 2) {
/* announce-port <port> */
sentinel.announce_port = atoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"deny-scripts-reconfig") && argc == 2) {
+ /* deny-scripts-reconfig <yes|no> */
+ if ((sentinel.deny_scripts_reconfig = yesnotoi(argv[1])) == -1) {
+ return "Please specify yes or no for the "
+ "deny-scripts-reconfig options.";
+ }
} else {
return "Unrecognized sentinel configuration statement.";
}
@@ -1455,6 +1754,16 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
dictEntry *de;
sds line;
+ /* sentinel unique ID. */
+ line = sdscatprintf(sdsempty(), "sentinel myid %s", sentinel.myid);
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+
+ /* sentinel deny-scripts-reconfig. */
+ line = sdscatprintf(sdsempty(), "sentinel deny-scripts-reconfig %s",
+ sentinel.deny_scripts_reconfig ? "yes" : "no");
+ rewriteConfigRewriteLine(state,"sentinel",line,
+ sentinel.deny_scripts_reconfig != SENTINEL_DEFAULT_DENY_SCRIPTS_RECONFIG);
+
/* For every master emit a "sentinel monitor" config entry. */
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -1546,7 +1855,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
slave_addr = master->addr;
line = sdscatprintf(sdsempty(),
"sentinel known-slave %s %s %d",
- master->name, ri->addr->ip, ri->addr->port);
+ master->name, slave_addr->ip, slave_addr->port);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
@@ -1555,11 +1864,22 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) {
di2 = dictGetIterator(master->sentinels);
while((de = dictNext(di2)) != NULL) {
ri = dictGetVal(de);
+ if (ri->runid == NULL) continue;
line = sdscatprintf(sdsempty(),
- "sentinel known-sentinel %s %s %d%s%s",
- master->name, ri->addr->ip, ri->addr->port,
- ri->runid ? " " : "",
- ri->runid ? ri->runid : "");
+ "sentinel known-sentinel %s %s %d %s",
+ master->name, ri->addr->ip, ri->addr->port, ri->runid);
+ rewriteConfigRewriteLine(state,"sentinel",line,1);
+ }
+ dictReleaseIterator(di2);
+
+ /* sentinel rename-command */
+ di2 = dictGetIterator(master->renamed_commands);
+ while((de = dictNext(di2)) != NULL) {
+ sds oldname = dictGetKey(de);
+ sds newname = dictGetVal(de);
+ line = sdscatprintf(sdsempty(),
+ "sentinel rename-command %s %s %s",
+ master->name, oldname, newname);
rewriteConfigRewriteLine(state,"sentinel",line,1);
}
dictReleaseIterator(di2);
@@ -1599,7 +1919,7 @@ void sentinelFlushConfig(void) {
int saved_hz = server.hz;
int rewrite_status;
- server.hz = REDIS_DEFAULT_HZ;
+ server.hz = CONFIG_DEFAULT_HZ;
rewrite_status = rewriteConfig(server.configfile);
server.hz = saved_hz;
@@ -1611,62 +1931,11 @@ void sentinelFlushConfig(void) {
werr:
if (fd != -1) close(fd);
- redisLog(REDIS_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
+ serverLog(LL_WARNING,"WARNING: Sentinel was not able to save the new configuration on disk!!!: %s", strerror(errno));
}
/* ====================== hiredis connection handling ======================= */
-/* Completely disconnect a hiredis link from an instance. */
-void sentinelKillLink(sentinelRedisInstance *ri, redisAsyncContext *c) {
- if (ri->cc == c) {
- ri->cc = NULL;
- ri->pending_commands = 0;
- }
- if (ri->pc == c) ri->pc = NULL;
- c->data = NULL;
- ri->flags |= SRI_DISCONNECTED;
- redisAsyncFree(c);
-}
-
-/* This function takes a hiredis context that is in an error condition
- * and make sure to mark the instance as disconnected performing the
- * cleanup needed.
- *
- * Note: we don't free the hiredis context as hiredis will do it for us
- * for async connections. */
-void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c) {
- sentinelRedisInstance *ri = c->data;
- int pubsub;
-
- if (ri == NULL) return; /* The instance no longer exists. */
-
- pubsub = (ri->pc == c);
- sentinelEvent(REDIS_DEBUG, pubsub ? "-pubsub-link" : "-cmd-link", ri,
- "%@ #%s", c->errstr);
- if (pubsub)
- ri->pc = NULL;
- else
- ri->cc = NULL;
- ri->flags |= SRI_DISCONNECTED;
-}
-
-void sentinelLinkEstablishedCallback(const redisAsyncContext *c, int status) {
- if (status != REDIS_OK) {
- sentinelDisconnectInstanceFromContext(c);
- } else {
- sentinelRedisInstance *ri = c->data;
- int pubsub = (ri->pc == c);
-
- sentinelEvent(REDIS_DEBUG, pubsub ? "+pubsub-link" : "+cmd-link", ri,
- "%@");
- }
-}
-
-void sentinelDisconnectCallback(const redisAsyncContext *c, int status) {
- REDIS_NOTUSED(status);
- sentinelDisconnectInstanceFromContext(c);
-}
-
/* Send the AUTH command with the specified master password if needed.
* Note that for slaves the password set for the master is used.
*
@@ -1678,8 +1947,9 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
ri->master->auth_pass;
if (auth_pass) {
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL, "AUTH %s",
- auth_pass) == REDIS_OK) ri->pending_commands++;
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri, "%s %s",
+ sentinelInstanceMapCommand(ri,"AUTH"),
+ auth_pass) == C_OK) ri->link->pending_commands++;
}
}
@@ -1692,77 +1962,87 @@ void sentinelSendAuthIfNeeded(sentinelRedisInstance *ri, redisAsyncContext *c) {
void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char *type) {
char name[64];
- snprintf(name,sizeof(name),"sentinel-%.8s-%s",server.runid,type);
- if (redisAsyncCommand(c, sentinelDiscardReplyCallback, NULL,
- "CLIENT SETNAME %s", name) == REDIS_OK)
+ snprintf(name,sizeof(name),"sentinel-%.8s-%s",sentinel.myid,type);
+ if (redisAsyncCommand(c, sentinelDiscardReplyCallback, ri,
+ "%s SETNAME %s",
+ sentinelInstanceMapCommand(ri,"CLIENT"),
+ name) == C_OK)
{
- ri->pending_commands++;
+ ri->link->pending_commands++;
}
}
-/* Create the async connections for the specified instance if the instance
- * is disconnected. Note that the SRI_DISCONNECTED flag is set even if just
+/* Create the async connections for the instance link if the link
+ * is disconnected. Note that link->disconnected is true even if just
* one of the two links (commands and pub/sub) is missing. */
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
- if (!(ri->flags & SRI_DISCONNECTED)) return;
+ if (ri->link->disconnected == 0) return;
+ if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
+ instanceLink *link = ri->link;
+ mstime_t now = mstime();
+
+ if (now - ri->link->last_reconn_time < SENTINEL_PING_PERIOD) return;
+ ri->link->last_reconn_time = now;
/* Commands connection. */
- if (ri->cc == NULL) {
- ri->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->cc->err) {
- sentinelEvent(REDIS_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
- ri->cc->errstr);
- sentinelKillLink(ri,ri->cc);
+ if (link->cc == NULL) {
+ link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (link->cc->err) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
+ link->cc->errstr);
+ instanceLinkCloseConnection(link,link->cc);
} else {
- ri->cc_conn_time = mstime();
- ri->cc->data = ri;
- redisAeAttach(server.el,ri->cc);
- redisAsyncSetConnectCallback(ri->cc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->cc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->cc);
- sentinelSetClientName(ri,ri->cc,"cmd");
+ link->pending_commands = 0;
+ link->cc_conn_time = mstime();
+ link->cc->data = link;
+ redisAeAttach(server.el,link->cc);
+ redisAsyncSetConnectCallback(link->cc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->cc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->cc);
+ sentinelSetClientName(ri,link->cc,"cmd");
/* Send a PING ASAP when reconnecting. */
sentinelSendPing(ri);
}
}
/* Pub / Sub */
- if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && ri->pc == NULL) {
- ri->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,REDIS_BIND_ADDR);
- if (ri->pc->err) {
- sentinelEvent(REDIS_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
- ri->pc->errstr);
- sentinelKillLink(ri,ri->pc);
+ if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
+ link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
+ if (link->pc->err) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
+ link->pc->errstr);
+ instanceLinkCloseConnection(link,link->pc);
} else {
int retval;
- ri->pc_conn_time = mstime();
- ri->pc->data = ri;
- redisAeAttach(server.el,ri->pc);
- redisAsyncSetConnectCallback(ri->pc,
- sentinelLinkEstablishedCallback);
- redisAsyncSetDisconnectCallback(ri->pc,
- sentinelDisconnectCallback);
- sentinelSendAuthIfNeeded(ri,ri->pc);
- sentinelSetClientName(ri,ri->pc,"pubsub");
+ link->pc_conn_time = mstime();
+ link->pc->data = link;
+ redisAeAttach(server.el,link->pc);
+ redisAsyncSetConnectCallback(link->pc,
+ sentinelLinkEstablishedCallback);
+ redisAsyncSetDisconnectCallback(link->pc,
+ sentinelDisconnectCallback);
+ sentinelSendAuthIfNeeded(ri,link->pc);
+ sentinelSetClientName(ri,link->pc,"pubsub");
/* Now we subscribe to the Sentinels "Hello" channel. */
- retval = redisAsyncCommand(ri->pc,
- sentinelReceiveHelloMessages, NULL, "SUBSCRIBE %s",
- SENTINEL_HELLO_CHANNEL);
- if (retval != REDIS_OK) {
+ retval = redisAsyncCommand(link->pc,
+ sentinelReceiveHelloMessages, ri, "%s %s",
+ sentinelInstanceMapCommand(ri,"SUBSCRIBE"),
+ SENTINEL_HELLO_CHANNEL);
+ if (retval != C_OK) {
/* If we can't subscribe, the Pub/Sub connection is useless
* and we can simply disconnect it and try again. */
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(link,link->pc);
return;
}
}
}
- /* Clear the DISCONNECTED flags only if we have both the connections
+ /* Clear the disconnected status only if we have both the connections
* (or just the commands connection if this is a sentinel instance). */
- if (ri->cc && (ri->flags & SRI_SENTINEL || ri->pc))
- ri->flags &= ~SRI_DISCONNECTED;
+ if (link->cc && (ri->flags & SRI_SENTINEL || link->pc))
+ link->disconnected = 0;
}
/* ======================== Redis instances pinging ======================== */
@@ -1806,7 +2086,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->runid = sdsnewlen(l+7,40);
} else {
if (strncmp(ri->runid,l+7,40) != 0) {
- sentinelEvent(REDIS_NOTICE,"+reboot",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+reboot",ri,"%@");
sdsfree(ri->runid);
ri->runid = sdsnewlen(l+7,40);
}
@@ -1847,7 +2127,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
if ((slave = createSentinelRedisInstance(NULL,SRI_SLAVE,ip,
atoi(port), ri->quorum, ri)) != NULL)
{
- sentinelEvent(REDIS_NOTICE,"+slave",slave,"%@");
+ sentinelEvent(LL_NOTICE,"+slave",slave,"%@");
+ sentinelFlushConfig();
}
}
}
@@ -1916,7 +2197,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
if (role == SRI_SLAVE) ri->slave_conf_change_time = mstime();
/* Log the event with +role-change if the new role is coherent or
* with -role-change if there is a mismatch with the current config. */
- sentinelEvent(REDIS_VERBOSE,
+ sentinelEvent(LL_VERBOSE,
((ri->flags & (SRI_MASTER|SRI_SLAVE)) == role) ?
"+role-change" : "-role-change",
ri, "%@ new reported role is %s",
@@ -1953,8 +2234,11 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES;
ri->master->failover_state_change_time = mstime();
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+promoted-slave",ri,"%@");
- sentinelEvent(REDIS_WARNING,"+failover-state-reconf-slaves",
+ sentinelEvent(LL_WARNING,"+promoted-slave",ri,"%@");
+ if (sentinel.simfailure_flags &
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION)
+ sentinelSimFailureCrash();
+ sentinelEvent(LL_WARNING,"+failover-state-reconf-slaves",
ri->master,"%@");
sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
"start",ri->master->addr,ri->addr);
@@ -1973,8 +2257,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int retval = sentinelSendSlaveOf(ri,
ri->master->addr->ip,
ri->master->addr->port);
- if (retval == REDIS_OK)
- sentinelEvent(REDIS_NOTICE,"+convert-to-slave",ri,"%@");
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+convert-to-slave",ri,"%@");
}
}
}
@@ -1996,8 +2280,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
int retval = sentinelSendSlaveOf(ri,
ri->master->addr->ip,
ri->master->addr->port);
- if (retval == REDIS_OK)
- sentinelEvent(REDIS_NOTICE,"+fix-slave-config",ri,"%@");
+ if (retval == C_OK)
+ sentinelEvent(LL_NOTICE,"+fix-slave-config",ri,"%@");
}
}
@@ -2015,7 +2299,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
{
ri->flags &= ~SRI_RECONF_SENT;
ri->flags |= SRI_RECONF_INPROG;
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-inprog",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-inprog",ri,"%@");
}
/* SRI_RECONF_INPROG -> SRI_RECONF_DONE */
@@ -2024,42 +2308,41 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
{
ri->flags &= ~SRI_RECONF_INPROG;
ri->flags |= SRI_RECONF_DONE;
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-done",ri,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-done",ri,"%@");
}
}
}
void sentinelInfoReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
- if (r->type == REDIS_REPLY_STRING) {
+ if (r->type == REDIS_REPLY_STRING)
sentinelRefreshInstanceInfo(ri,r->str);
- }
}
/* Just discard the reply. We use this when we are not monitoring the return
* value of the command but its effects directly. */
void sentinelDiscardReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
- REDIS_NOTUSED(reply);
- REDIS_NOTUSED(privdata);
+ instanceLink *link = c->data;
+ UNUSED(reply);
+ UNUSED(privdata);
- if (ri) ri->pending_commands--;
+ if (link) link->pending_commands--;
}
void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
if (r->type == REDIS_REPLY_STATUS ||
@@ -2070,8 +2353,8 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
strncmp(r->str,"LOADING",7) == 0 ||
strncmp(r->str,"MASTERDOWN",10) == 0)
{
- ri->last_avail_time = mstime();
- ri->last_ping_time = 0; /* Flag the pong as received. */
+ link->last_avail_time = mstime();
+ link->act_ping_time = 0; /* Flag the pong as received. */
} else {
/* Send a SCRIPT KILL command if the instance appears to be
* down because of a busy script. */
@@ -2079,26 +2362,29 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
(ri->flags & SRI_S_DOWN) &&
!(ri->flags & SRI_SCRIPT_KILL_SENT))
{
- if (redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL,
- "SCRIPT KILL") == REDIS_OK)
- ri->pending_commands++;
+ if (redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri,
+ "%s KILL",
+ sentinelInstanceMapCommand(ri,"SCRIPT")) == C_OK)
+ {
+ ri->link->pending_commands++;
+ }
ri->flags |= SRI_SCRIPT_KILL_SENT;
}
}
}
- ri->last_pong_time = mstime();
+ link->last_pong_time = mstime();
}
/* This is called when we get the reply about the PUBLISH command we send
* to the master to advertise this sentinel. */
void sentinelPublishReplyCallback(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Only update pub_time if we actually published our message. Otherwise
@@ -2136,25 +2422,39 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (!si) {
/* If not, remove all the sentinels that have the same runid
- * OR the same ip/port, because it's either a restart or a
- * network topology change. */
- removed = removeMatchingSentinelsFromMaster(master,token[0],port,
- token[2]);
+ * because there was an address change, and add the same Sentinel
+ * with the new address back. */
+ removed = removeMatchingSentinelFromMaster(master,token[2]);
if (removed) {
- sentinelEvent(REDIS_NOTICE,"-dup-sentinel",master,
- "%@ #duplicate of %s:%d or %s",
- token[0],port,token[2]);
+ sentinelEvent(LL_NOTICE,"+sentinel-address-switch",master,
+ "%@ ip %s port %d for %s", token[0],port,token[2]);
+ } else {
+ /* Check if there is another Sentinel with the same address this
+ * new one is reporting. What we do if this happens is to set its
+ * port to 0, to signal the address is invalid. We'll update it
+ * later if we get an HELLO message. */
+ sentinelRedisInstance *other =
+ getSentinelRedisInstanceByAddrAndRunID(
+ master->sentinels, token[0],port,NULL);
+ if (other) {
+ sentinelEvent(LL_NOTICE,"+sentinel-invalid-addr",other,"%@");
+ other->addr->port = 0; /* It means: invalid address. */
+ sentinelUpdateSentinelAddressInAllMasters(other);
+ }
}
/* Add the new sentinel. */
- si = createSentinelRedisInstance(NULL,SRI_SENTINEL,
+ si = createSentinelRedisInstance(token[2],SRI_SENTINEL,
token[0],port,master->quorum,master);
+
if (si) {
- sentinelEvent(REDIS_NOTICE,"+sentinel",si,"%@");
+ if (!removed) sentinelEvent(LL_NOTICE,"+sentinel",si,"%@");
/* The runid is NULL after a new instance creation and
* for Sentinels we don't have a later chance to fill it,
* so do it now. */
si->runid = sdsnew(token[2]);
+ sentinelTryConnectionSharing(si);
+ if (removed) sentinelUpdateSentinelAddressInAllMasters(si);
sentinelFlushConfig();
}
}
@@ -2163,20 +2463,20 @@ void sentinelProcessHelloMessage(char *hello, int hello_len) {
if (current_epoch > sentinel.current_epoch) {
sentinel.current_epoch = current_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
}
/* Update master info if received configuration is newer. */
- if (master->config_epoch < master_config_epoch) {
+ if (si && master->config_epoch < master_config_epoch) {
master->config_epoch = master_config_epoch;
if (master_port != master->addr->port ||
strcmp(master->addr->ip, token[5]))
{
sentinelAddr *old_addr;
- sentinelEvent(REDIS_WARNING,"+config-update-from",si,"%@");
- sentinelEvent(REDIS_WARNING,"+switch-master",
+ sentinelEvent(LL_WARNING,"+config-update-from",si,"%@");
+ sentinelEvent(LL_WARNING,"+switch-master",
master,"%s %s %d %s %d",
master->name,
master->addr->ip, master->addr->port,
@@ -2203,9 +2503,9 @@ cleanup:
/* This is our Pub/Sub callback for the Hello channel. It's useful in order
* to discover other sentinels attached at the same master. */
void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
redisReply *r;
- REDIS_NOTUSED(privdata);
+ UNUSED(c);
if (!reply || !ri) return;
r = reply;
@@ -2213,7 +2513,7 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
/* Update the last activity in the pubsub channel. Note that since we
* receive our messages as well this timestamp can be used to detect
* if the link is probably disconnected even if it seems otherwise. */
- ri->pc_last_activity = mstime();
+ ri->link->pc_last_activity = mstime();
/* Sanity check in the reply we expect, so that the code that follows
* can avoid to check for details. */
@@ -2225,13 +2525,13 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
strcmp(r->element[0]->str,"message") != 0) return;
/* We are not interested in meeting ourselves */
- if (strstr(r->element[2]->str,server.runid) != NULL) return;
+ if (strstr(r->element[2]->str,sentinel.myid) != NULL) return;
sentinelProcessHelloMessage(r->element[2]->str, r->element[2]->len);
}
/* Send an "Hello" message via Pub/Sub to the specified 'ri' Redis
- * instance in order to broadcast the current configuraiton for this
+ * instance in order to broadcast the current configuration for this
* master, and to advertise the existence of this Sentinel at the same time.
*
* The message has the following format:
@@ -2239,26 +2539,26 @@ void sentinelReceiveHelloMessages(redisAsyncContext *c, void *reply, void *privd
* sentinel_ip,sentinel_port,sentinel_runid,current_epoch,
* master_name,master_ip,master_port,master_config_epoch.
*
- * Returns REDIS_OK if the PUBLISH was queued correctly, otherwise
- * REDIS_ERR is returned. */
+ * Returns C_OK if the PUBLISH was queued correctly, otherwise
+ * C_ERR is returned. */
int sentinelSendHello(sentinelRedisInstance *ri) {
- char ip[REDIS_IP_STR_LEN];
- char payload[REDIS_IP_STR_LEN+1024];
+ char ip[NET_IP_STR_LEN];
+ char payload[NET_IP_STR_LEN+1024];
int retval;
char *announce_ip;
int announce_port;
sentinelRedisInstance *master = (ri->flags & SRI_MASTER) ? ri : ri->master;
sentinelAddr *master_addr = sentinelGetCurrentMasterAddress(master);
- if (ri->flags & SRI_DISCONNECTED) return REDIS_ERR;
+ if (ri->link->disconnected) return C_ERR;
/* Use the specified announce address if specified, otherwise try to
* obtain our own IP address. */
if (sentinel.announce_ip) {
announce_ip = sentinel.announce_ip;
} else {
- if (anetSockName(ri->cc->c.fd,ip,sizeof(ip),NULL) == -1)
- return REDIS_ERR;
+ if (anetSockName(ri->link->cc->c.fd,ip,sizeof(ip),NULL) == -1)
+ return C_ERR;
announce_ip = ip;
}
announce_port = sentinel.announce_port ?
@@ -2268,17 +2568,18 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
snprintf(payload,sizeof(payload),
"%s,%d,%s,%llu," /* Info about this sentinel. */
"%s,%s,%d,%llu", /* Info about current master. */
- announce_ip, announce_port, server.runid,
+ announce_ip, announce_port, sentinel.myid,
(unsigned long long) sentinel.current_epoch,
/* --- */
master->name,master_addr->ip,master_addr->port,
(unsigned long long) master->config_epoch);
- retval = redisAsyncCommand(ri->cc,
- sentinelPublishReplyCallback, NULL, "PUBLISH %s %s",
- SENTINEL_HELLO_CHANNEL,payload);
- if (retval != REDIS_OK) return REDIS_ERR;
- ri->pending_commands++;
- return REDIS_OK;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelPublishReplyCallback, ri, "%s %s %s",
+ sentinelInstanceMapCommand(ri,"PUBLISH"),
+ SENTINEL_HELLO_CHANNEL,payload);
+ if (retval != C_OK) return C_ERR;
+ ri->link->pending_commands++;
+ return C_OK;
}
/* Reset last_pub_time in all the instances in the specified dictionary
@@ -2305,28 +2606,31 @@ void sentinelForceHelloUpdateDictOfRedisInstances(dict *instances) {
* Sentinel upgrades a configuration it is a good idea to deliever an update
* to the other Sentinels ASAP. */
int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
- if (!(master->flags & SRI_MASTER)) return REDIS_ERR;
+ if (!(master->flags & SRI_MASTER)) return C_ERR;
if (master->last_pub_time >= (SENTINEL_PUBLISH_PERIOD+1))
master->last_pub_time -= (SENTINEL_PUBLISH_PERIOD+1);
sentinelForceHelloUpdateDictOfRedisInstances(master->sentinels);
sentinelForceHelloUpdateDictOfRedisInstances(master->slaves);
- return REDIS_OK;
+ return C_OK;
}
-/* Send a PING to the specified instance and refresh the last_ping_time
+/* Send a PING to the specified instance and refresh the act_ping_time
* if it is zero (that is, if we received a pong for the previous ping).
*
* On error zero is returned, and we can't consider the PING command
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
- int retval = redisAsyncCommand(ri->cc,
- sentinelPingReplyCallback, NULL, "PING");
- if (retval == REDIS_OK) {
- ri->pending_commands++;
- /* We update the ping time only if we received the pong for
- * the previous ping, otherwise we are technically waiting
- * since the first ping that did not received a reply. */
- if (ri->last_ping_time == 0) ri->last_ping_time = mstime();
+ int retval = redisAsyncCommand(ri->link->cc,
+ sentinelPingReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"PING"));
+ if (retval == C_OK) {
+ ri->link->pending_commands++;
+ ri->link->last_ping_time = mstime();
+ /* We update the active ping time only if we received the pong for
+ * the previous ping, otherwise we are technically waiting since the
+ * first ping that did not received a reply. */
+ if (ri->link->act_ping_time == 0)
+ ri->link->act_ping_time = ri->link->last_ping_time;
return 1;
} else {
return 0;
@@ -2342,7 +2646,7 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
/* Return ASAP if we have already a PING or INFO already pending, or
* in the case the instance is not properly connected. */
- if (ri->flags & SRI_DISCONNECTED) return;
+ if (ri->link->disconnected) return;
/* For INFO, PING, PUBLISH that are not critical commands to send we
* also have a limit of SENTINEL_MAX_PENDING_COMMANDS. We don't
@@ -2350,14 +2654,21 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
* properly (note that anyway there is a redundant protection about this,
* that is, the link will be disconnected and reconnected if a long
* timeout condition is detected. */
- if (ri->pending_commands >= SENTINEL_MAX_PENDING_COMMANDS) return;
+ if (ri->link->pending_commands >=
+ SENTINEL_MAX_PENDING_COMMANDS * ri->link->refcount) return;
/* If this is a slave of a master in O_DOWN condition we start sending
* it INFO every second, instead of the usual SENTINEL_INFO_PERIOD
* period. In this state we want to closely monitor slaves in case they
- * are turned into masters by another Sentinel, or by the sysadmin. */
+ * are turned into masters by another Sentinel, or by the sysadmin.
+ *
+ * Similarly we monitor the INFO output more often if the slave reports
+ * to be disconnected from the master, so that we can have a fresh
+ * disconnection time figure. */
if ((ri->flags & SRI_SLAVE) &&
- (ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS))) {
+ ((ri->master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS)) ||
+ (ri->master_link_down_time != 0)))
+ {
info_period = 1000;
} else {
info_period = SENTINEL_INFO_PERIOD;
@@ -2369,19 +2680,25 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
ping_period = ri->down_after_period;
if (ping_period > SENTINEL_PING_PERIOD) ping_period = SENTINEL_PING_PERIOD;
+ /* Send INFO to masters and slaves, not sentinels. */
if ((ri->flags & SRI_SENTINEL) == 0 &&
(ri->info_refresh == 0 ||
(now - ri->info_refresh) > info_period))
{
- /* Send INFO to masters and slaves, not sentinels. */
- retval = redisAsyncCommand(ri->cc,
- sentinelInfoReplyCallback, NULL, "INFO");
- if (retval == REDIS_OK) ri->pending_commands++;
- } else if ((now - ri->last_pong_time) > ping_period) {
- /* Send PING to all the three kinds of instances. */
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelInfoReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"INFO"));
+ if (retval == C_OK) ri->link->pending_commands++;
+ }
+
+ /* Send PING to all the three kinds of instances. */
+ if ((now - ri->link->last_pong_time) > ping_period &&
+ (now - ri->link->last_ping_time) > ping_period/2) {
sentinelSendPing(ri);
- } else if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
- /* PUBLISH hello messages to all the three kinds of instances. */
+ }
+
+ /* PUBLISH hello messages to all the three kinds of instances. */
+ if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
sentinelSendHello(ri);
}
}
@@ -2402,7 +2719,7 @@ const char *sentinelFailoverStateStr(int state) {
}
/* Redis instance to Redis protocol representation. */
-void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
+void addReplySentinelRedisInstance(client *c, sentinelRedisInstance *ri) {
char *flags = sdsempty();
void *mbl;
int fields = 0;
@@ -2431,7 +2748,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
if (ri->flags & SRI_MASTER) flags = sdscat(flags,"master,");
if (ri->flags & SRI_SLAVE) flags = sdscat(flags,"slave,");
if (ri->flags & SRI_SENTINEL) flags = sdscat(flags,"sentinel,");
- if (ri->flags & SRI_DISCONNECTED) flags = sdscat(flags,"disconnected,");
+ if (ri->link->disconnected) flags = sdscat(flags,"disconnected,");
if (ri->flags & SRI_MASTER_DOWN) flags = sdscat(flags,"master_down,");
if (ri->flags & SRI_FAILOVER_IN_PROGRESS)
flags = sdscat(flags,"failover_in_progress,");
@@ -2445,8 +2762,12 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
sdsfree(flags);
fields++;
- addReplyBulkCString(c,"pending-commands");
- addReplyBulkLongLong(c,ri->pending_commands);
+ addReplyBulkCString(c,"link-pending-commands");
+ addReplyBulkLongLong(c,ri->link->pending_commands);
+ fields++;
+
+ addReplyBulkCString(c,"link-refcount");
+ addReplyBulkLongLong(c,ri->link->refcount);
fields++;
if (ri->flags & SRI_FAILOVER_IN_PROGRESS) {
@@ -2457,15 +2778,15 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
addReplyBulkCString(c,"last-ping-sent");
addReplyBulkLongLong(c,
- ri->last_ping_time ? (mstime() - ri->last_ping_time) : 0);
+ ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
fields++;
addReplyBulkCString(c,"last-ok-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_avail_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_avail_time);
fields++;
addReplyBulkCString(c,"last-ping-reply");
- addReplyBulkLongLong(c,mstime() - ri->last_pong_time);
+ addReplyBulkLongLong(c,mstime() - ri->link->last_pong_time);
fields++;
if (ri->flags & SRI_S_DOWN) {
@@ -2589,7 +2910,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
/* Output a number of instances contained inside a dictionary as
* Redis protocol. */
-void addReplyDictOfRedisInstances(redisClient *c, dict *instances) {
+void addReplyDictOfRedisInstances(client *c, dict *instances) {
dictIterator *di;
dictEntry *de;
@@ -2606,7 +2927,7 @@ void addReplyDictOfRedisInstances(redisClient *c, dict *instances) {
/* Lookup the named master into sentinel.masters.
* If the master is not found reply to the client with an error and returns
* NULL. */
-sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
+sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(client *c,
robj *name)
{
sentinelRedisInstance *ri;
@@ -2619,7 +2940,32 @@ sentinelRedisInstance *sentinelGetMasterByNameOrReplyError(redisClient *c,
return ri;
}
-void sentinelCommand(redisClient *c) {
+#define SENTINEL_ISQR_OK 0
+#define SENTINEL_ISQR_NOQUORUM (1<<0)
+#define SENTINEL_ISQR_NOAUTH (1<<1)
+int sentinelIsQuorumReachable(sentinelRedisInstance *master, int *usableptr) {
+ dictIterator *di;
+ dictEntry *de;
+ int usable = 1; /* Number of usable Sentinels. Init to 1 to count myself. */
+ int result = SENTINEL_ISQR_OK;
+ int voters = dictSize(master->sentinels)+1; /* Known Sentinels + myself. */
+
+ di = dictGetIterator(master->sentinels);
+ while((de = dictNext(di)) != NULL) {
+ sentinelRedisInstance *ri = dictGetVal(de);
+
+ if (ri->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ usable++;
+ }
+ dictReleaseIterator(di);
+
+ if (usable < (int)master->quorum) result |= SENTINEL_ISQR_NOQUORUM;
+ if (usable < voters/2+1) result |= SENTINEL_ISQR_NOAUTH;
+ if (usableptr) *usableptr = usable;
+ return result;
+}
+
+void sentinelCommand(client *c) {
if (!strcasecmp(c->argv[1]->ptr,"masters")) {
/* SENTINEL MASTERS */
if (c->argc != 2) goto numargserr;
@@ -2649,7 +2995,23 @@ void sentinelCommand(redisClient *c) {
return;
addReplyDictOfRedisInstances(c,ri->sentinels);
} else if (!strcasecmp(c->argv[1]->ptr,"is-master-down-by-addr")) {
- /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>*/
+ /* SENTINEL IS-MASTER-DOWN-BY-ADDR <ip> <port> <current-epoch> <runid>
+ *
+ * Arguments:
+ *
+ * ip and port are the ip and port of the master we want to be
+ * checked by Sentinel. Note that the command will not check by
+ * name but just by master, in theory different Sentinels may monitor
+ * differnet masters with the same name.
+ *
+ * current-epoch is needed in order to understand if we are allowed
+ * to vote for a failover leader or not. Each Sentinel can vote just
+ * one time per epoch.
+ *
+ * runid is "*" if we are not seeking for a vote from the Sentinel
+ * in order to elect the failover leader. Otherwise it is set to the
+ * runid we want the Sentinel to vote if it did not already voted.
+ */
sentinelRedisInstance *ri;
long long req_epoch;
uint64_t leader_epoch = 0;
@@ -2658,9 +3020,9 @@ void sentinelCommand(redisClient *c) {
int isdown = 0;
if (c->argc != 6) goto numargserr;
- if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != REDIS_OK ||
+ if (getLongFromObjectOrReply(c,c->argv[3],&port,NULL) != C_OK ||
getLongLongFromObjectOrReply(c,c->argv[4],&req_epoch,NULL)
- != REDIS_OK)
+ != C_OK)
return;
ri = getSentinelRedisInstanceByAddrAndRunID(sentinel.masters,
c->argv[2]->ptr,port,NULL);
@@ -2720,7 +3082,7 @@ void sentinelCommand(redisClient *c) {
addReplySds(c,sdsnew("-NOGOODSLAVE No suitable slave to promote\r\n"));
return;
}
- redisLog(REDIS_WARNING,"Executing user requested FAILOVER of '%s'",
+ serverLog(LL_WARNING,"Executing user requested FAILOVER of '%s'",
ri->name);
sentinelStartFailover(ri);
ri->flags |= SRI_FORCE_FAILOVER;
@@ -2734,13 +3096,19 @@ void sentinelCommand(redisClient *c) {
/* SENTINEL MONITOR <name> <ip> <port> <quorum> */
sentinelRedisInstance *ri;
long quorum, port;
- char ip[REDIS_IP_STR_LEN];
+ char ip[NET_IP_STR_LEN];
if (c->argc != 6) goto numargserr;
if (getLongFromObjectOrReply(c,c->argv[5],&quorum,"Invalid quorum")
- != REDIS_OK) return;
+ != C_OK) return;
if (getLongFromObjectOrReply(c,c->argv[4],&port,"Invalid port")
- != REDIS_OK) return;
+ != C_OK) return;
+
+ if (quorum <= 0) {
+ addReplyError(c, "Quorum must be 1 or greater.");
+ return;
+ }
+
/* Make sure the IP field is actually a valid IP before passing it
* to createSentinelRedisInstance(), otherwise we may trigger a
* DNS lookup at runtime. */
@@ -2766,23 +3134,57 @@ void sentinelCommand(redisClient *c) {
}
} else {
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
+ sentinelEvent(LL_WARNING,"+monitor",ri,"%@ quorum %d",ri->quorum);
addReply(c,shared.ok);
}
+ } else if (!strcasecmp(c->argv[1]->ptr,"flushconfig")) {
+ if (c->argc != 2) goto numargserr;
+ sentinelFlushConfig();
+ addReply(c,shared.ok);
+ return;
} else if (!strcasecmp(c->argv[1]->ptr,"remove")) {
/* SENTINEL REMOVE <name> */
sentinelRedisInstance *ri;
+ if (c->argc != 3) goto numargserr;
if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
== NULL) return;
- sentinelEvent(REDIS_WARNING,"-monitor",ri,"%@");
+ sentinelEvent(LL_WARNING,"-monitor",ri,"%@");
dictDelete(sentinel.masters,c->argv[2]->ptr);
sentinelFlushConfig();
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"ckquorum")) {
+ /* SENTINEL CKQUORUM <name> */
+ sentinelRedisInstance *ri;
+ int usable;
+
+ if (c->argc != 3) goto numargserr;
+ if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
+ == NULL) return;
+ int result = sentinelIsQuorumReachable(ri,&usable);
+ if (result == SENTINEL_ISQR_OK) {
+ addReplySds(c, sdscatfmt(sdsempty(),
+ "+OK %i usable Sentinels. Quorum and failover authorization "
+ "can be reached\r\n",usable));
+ } else {
+ sds e = sdscatfmt(sdsempty(),
+ "-NOQUORUM %i usable Sentinels. ",usable);
+ if (result & SENTINEL_ISQR_NOQUORUM)
+ e = sdscat(e,"Not enough available Sentinels to reach the"
+ " specified quorum for this master");
+ if (result & SENTINEL_ISQR_NOAUTH) {
+ if (result & SENTINEL_ISQR_NOQUORUM) e = sdscat(e,". ");
+ e = sdscat(e, "Not enough available Sentinels to reach the"
+ " majority and authorize a failover");
+ }
+ e = sdscat(e,"\r\n");
+ addReplySds(c,e);
+ }
} else if (!strcasecmp(c->argv[1]->ptr,"set")) {
- if (c->argc < 3 || c->argc % 2 == 0) goto numargserr;
+ if (c->argc < 3) goto numargserr;
sentinelSetCommand(c);
} else if (!strcasecmp(c->argv[1]->ptr,"info-cache")) {
+ /* SENTINEL INFO-CACHE <name> */
if (c->argc < 2) goto numargserr;
mstime_t now = mstime();
@@ -2843,6 +3245,33 @@ void sentinelCommand(redisClient *c) {
}
dictReleaseIterator(di);
if (masters_local != sentinel.masters) dictRelease(masters_local);
+ } else if (!strcasecmp(c->argv[1]->ptr,"simulate-failure")) {
+ /* SENTINEL SIMULATE-FAILURE <flag> <flag> ... <flag> */
+ int j;
+
+ sentinel.simfailure_flags = SENTINEL_SIMFAILURE_NONE;
+ for (j = 2; j < c->argc; j++) {
+ if (!strcasecmp(c->argv[j]->ptr,"crash-after-election")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after being successfully elected as failover "
+ "leader");
+ } else if (!strcasecmp(c->argv[j]->ptr,"crash-after-promotion")) {
+ sentinel.simfailure_flags |=
+ SENTINEL_SIMFAILURE_CRASH_AFTER_PROMOTION;
+ serverLog(LL_WARNING,"Failure simulation: this Sentinel "
+ "will crash after promoting the selected slave to master");
+ } else if (!strcasecmp(c->argv[j]->ptr,"help")) {
+ addReplyMultiBulkLen(c,2);
+ addReplyBulkCString(c,"crash-after-election");
+ addReplyBulkCString(c,"crash-after-promotion");
+ } else {
+ addReplyError(c,"Unknown failure simulation specified");
+ return;
+ }
+ }
+ addReply(c,shared.ok);
} else {
addReplyErrorFormat(c,"Unknown sentinel subcommand '%s'",
(char*)c->argv[1]->ptr);
@@ -2854,26 +3283,41 @@ numargserr:
(char*)c->argv[1]->ptr);
}
-/* SENTINEL INFO [section] */
-void sentinelInfoCommand(redisClient *c) {
- char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
- sds info = sdsempty();
- int defsections = !strcasecmp(section,"default");
- int sections = 0;
+#define info_section_from_redis(section_name) do { \
+ if (defsections || allsections || !strcasecmp(section,section_name)) { \
+ sds redissection; \
+ if (sections++) info = sdscat(info,"\r\n"); \
+ redissection = genRedisInfoString(section_name); \
+ info = sdscatlen(info,redissection,sdslen(redissection)); \
+ sdsfree(redissection); \
+ } \
+} while(0)
+/* SENTINEL INFO [section] */
+void sentinelInfoCommand(client *c) {
if (c->argc > 2) {
addReply(c,shared.syntaxerr);
return;
}
- if (!strcasecmp(section,"server") || defsections) {
- if (sections++) info = sdscat(info,"\r\n");
- sds serversection = genRedisInfoString("server");
- info = sdscatlen(info,serversection,sdslen(serversection));
- sdsfree(serversection);
+ int defsections = 0, allsections = 0;
+ char *section = c->argc == 2 ? c->argv[1]->ptr : NULL;
+ if (section) {
+ allsections = !strcasecmp(section,"all");
+ defsections = !strcasecmp(section,"default");
+ } else {
+ defsections = 1;
}
- if (!strcasecmp(section,"sentinel") || defsections) {
+ int sections = 0;
+ sds info = sdsempty();
+
+ info_section_from_redis("server");
+ info_section_from_redis("clients");
+ info_section_from_redis("cpu");
+ info_section_from_redis("stats");
+
+ if (defsections || allsections || !strcasecmp(section,"sentinel")) {
dictIterator *di;
dictEntry *de;
int master_id = 0;
@@ -2884,11 +3328,13 @@ void sentinelInfoCommand(redisClient *c) {
"sentinel_masters:%lu\r\n"
"sentinel_tilt:%d\r\n"
"sentinel_running_scripts:%d\r\n"
- "sentinel_scripts_queue_length:%ld\r\n",
+ "sentinel_scripts_queue_length:%ld\r\n"
+ "sentinel_simulate_failure_flags:%lu\r\n",
dictSize(sentinel.masters),
sentinel.tilt,
sentinel.running_scripts,
- listLength(sentinel.scripts_queue));
+ listLength(sentinel.scripts_queue),
+ sentinel.simfailure_flags);
di = dictGetIterator(sentinel.masters);
while((de = dictNext(di)) != NULL) {
@@ -2911,9 +3357,9 @@ void sentinelInfoCommand(redisClient *c) {
addReplyBulkSds(c, info);
}
-/* Implements Sentinel verison of the ROLE command. The output is
+/* Implements Sentinel version of the ROLE command. The output is
* "sentinel" and the list of currently monitored master names. */
-void sentinelRoleCommand(redisClient *c) {
+void sentinelRoleCommand(client *c) {
dictIterator *di;
dictEntry *de;
@@ -2931,42 +3377,61 @@ void sentinelRoleCommand(redisClient *c) {
}
/* SENTINEL SET <mastername> [<option> <value> ...] */
-void sentinelSetCommand(redisClient *c) {
+void sentinelSetCommand(client *c) {
sentinelRedisInstance *ri;
int j, changes = 0;
- char *option, *value;
+ int badarg = 0; /* Bad argument position for error reporting. */
+ char *option;
if ((ri = sentinelGetMasterByNameOrReplyError(c,c->argv[2]))
== NULL) return;
/* Process option - value pairs. */
- for (j = 3; j < c->argc; j += 2) {
+ for (j = 3; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j;
option = c->argv[j]->ptr;
- value = c->argv[j+1]->ptr;
- robj *o = c->argv[j+1];
long long ll;
+ int old_j = j; /* Used to know what to log as an event. */
- if (!strcasecmp(option,"down-after-milliseconds")) {
+ if (!strcasecmp(option,"down-after-milliseconds") && moreargs > 0) {
/* down-after-millisecodns <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
goto badfmt;
+ }
ri->down_after_period = ll;
sentinelPropagateDownAfterPeriod(ri);
changes++;
- } else if (!strcasecmp(option,"failover-timeout")) {
+ } else if (!strcasecmp(option,"failover-timeout") && moreargs > 0) {
/* failover-timeout <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
goto badfmt;
+ }
ri->failover_timeout = ll;
changes++;
- } else if (!strcasecmp(option,"parallel-syncs")) {
+ } else if (!strcasecmp(option,"parallel-syncs") && moreargs > 0) {
/* parallel-syncs <milliseconds> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
goto badfmt;
+ }
ri->parallel_syncs = ll;
changes++;
- } else if (!strcasecmp(option,"notification-script")) {
+ } else if (!strcasecmp(option,"notification-script") && moreargs > 0) {
/* notification-script <path> */
+ char *value = c->argv[++j]->ptr;
+ if (sentinel.deny_scripts_reconfig) {
+ addReplyError(c,
+ "Reconfiguration of scripts path is denied for "
+ "security reasons. Check the deny-scripts-reconfig "
+ "configuration directive in your Sentinel configuration");
+ return;
+ }
+
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Notification script seems non existing or non executable");
@@ -2976,8 +3441,17 @@ void sentinelSetCommand(redisClient *c) {
sdsfree(ri->notification_script);
ri->notification_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
- } else if (!strcasecmp(option,"client-reconfig-script")) {
+ } else if (!strcasecmp(option,"client-reconfig-script") && moreargs > 0) {
/* client-reconfig-script <path> */
+ char *value = c->argv[++j]->ptr;
+ if (sentinel.deny_scripts_reconfig) {
+ addReplyError(c,
+ "Reconfiguration of scripts path is denied for "
+ "security reasons. Check the deny-scripts-reconfig "
+ "configuration directive in your Sentinel configuration");
+ return;
+ }
+
if (strlen(value) && access(value,X_OK) == -1) {
addReplyError(c,
"Client reconfiguration script seems non existing or "
@@ -2988,24 +3462,65 @@ void sentinelSetCommand(redisClient *c) {
sdsfree(ri->client_reconfig_script);
ri->client_reconfig_script = strlen(value) ? sdsnew(value) : NULL;
changes++;
- } else if (!strcasecmp(option,"auth-pass")) {
+ } else if (!strcasecmp(option,"auth-pass") && moreargs > 0) {
/* auth-pass <password> */
+ char *value = c->argv[++j]->ptr;
sdsfree(ri->auth_pass);
ri->auth_pass = strlen(value) ? sdsnew(value) : NULL;
changes++;
- } else if (!strcasecmp(option,"quorum")) {
+ } else if (!strcasecmp(option,"quorum") && moreargs > 0) {
/* quorum <count> */
- if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0)
+ robj *o = c->argv[++j];
+ if (getLongLongFromObject(o,&ll) == C_ERR || ll <= 0) {
+ badarg = j;
goto badfmt;
+ }
ri->quorum = ll;
changes++;
+ } else if (!strcasecmp(option,"rename-command") && moreargs > 1) {
+ /* rename-command <oldname> <newname> */
+ sds oldname = c->argv[++j]->ptr;
+ sds newname = c->argv[++j]->ptr;
+
+ if ((sdslen(oldname) == 0) || (sdslen(newname) == 0)) {
+ badarg = sdslen(newname) ? j-1 : j;
+ goto badfmt;
+ }
+
+ /* Remove any older renaming for this command. */
+ dictDelete(ri->renamed_commands,oldname);
+
+ /* If the target name is the same as the source name there
+ * is no need to add an entry mapping to itself. */
+ if (!dictSdsKeyCaseCompare(NULL,oldname,newname)) {
+ oldname = sdsdup(oldname);
+ newname = sdsdup(newname);
+ dictAdd(ri->renamed_commands,oldname,newname);
+ }
+ changes++;
} else {
- addReplyErrorFormat(c,"Unknown option '%s' for SENTINEL SET",
- option);
+ addReplyErrorFormat(c,"Unknown option or number of arguments for "
+ "SENTINEL SET '%s'", option);
if (changes) sentinelFlushConfig();
return;
}
- sentinelEvent(REDIS_WARNING,"+set",ri,"%@ %s %s",option,value);
+
+ /* Log the event. */
+ int numargs = j-old_j+1;
+ switch(numargs) {
+ case 2:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s",c->argv[old_j]->ptr,
+ c->argv[old_j+1]->ptr);
+ break;
+ case 3:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s %s %s",c->argv[old_j]->ptr,
+ c->argv[old_j+1]->ptr,
+ c->argv[old_j+2]->ptr);
+ break;
+ default:
+ sentinelEvent(LL_WARNING,"+set",ri,"%@ %s",c->argv[old_j]->ptr);
+ break;
+ }
}
if (changes) sentinelFlushConfig();
@@ -3015,7 +3530,7 @@ void sentinelSetCommand(redisClient *c) {
badfmt: /* Bad format errors */
if (changes) sentinelFlushConfig();
addReplyErrorFormat(c,"Invalid argument '%s' for SENTINEL SET '%s'",
- value, option);
+ (char*)c->argv[badarg]->ptr,option);
}
/* Our fake PUBLISH command: it is actually useful only to receive hello messages
@@ -3024,7 +3539,7 @@ badfmt: /* Bad format errors */
*
* Because we have a Sentinel PUBLISH, the code to send hello messages is the same
* for all the three kind of instances: masters, slaves, sentinels. */
-void sentinelPublishCommand(redisClient *c) {
+void sentinelPublishCommand(client *c) {
if (strcmp(c->argv[1]->ptr,SENTINEL_HELLO_CHANNEL)) {
addReplyError(c, "Only HELLO messages are accepted by Sentinel instances.");
return;
@@ -3039,8 +3554,10 @@ void sentinelPublishCommand(redisClient *c) {
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = 0;
- if (ri->last_ping_time)
- elapsed = mstime() - ri->last_ping_time;
+ if (ri->link->act_ping_time)
+ elapsed = mstime() - ri->link->act_ping_time;
+ else if (ri->link->disconnected)
+ elapsed = mstime() - ri->link->last_avail_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
@@ -3048,15 +3565,16 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* 1) Check if the command link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
* pending ping for more than half the timeout. */
- if (ri->cc &&
- (mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- ri->last_ping_time != 0 && /* Ther is a pending ping... */
+ if (ri->link->cc &&
+ (mstime() - ri->link->cc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ ri->link->act_ping_time != 0 && /* There is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
- (mstime() - ri->last_ping_time) > (ri->down_after_period/2) &&
- (mstime() - ri->last_pong_time) > (ri->down_after_period/2))
+ (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
+ (mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
{
- sentinelKillLink(ri,ri->cc);
+ instanceLinkCloseConnection(ri->link,ri->link->cc);
}
/* 2) Check if the pubsub link seems connected, was connected not less
@@ -3064,11 +3582,12 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
* activity in the Pub/Sub channel for more than
* SENTINEL_PUBLISH_PERIOD * 3.
*/
- if (ri->pc &&
- (mstime() - ri->pc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- (mstime() - ri->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
+ if (ri->link->pc &&
+ (mstime() - ri->link->pc_conn_time) >
+ SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
+ (mstime() - ri->link->pc_last_activity) > (SENTINEL_PUBLISH_PERIOD*3))
{
- sentinelKillLink(ri,ri->pc);
+ instanceLinkCloseConnection(ri->link,ri->link->pc);
}
/* Update the SDOWN flag. We believe the instance is SDOWN if:
@@ -3085,14 +3604,14 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
{
/* Is subjectively down */
if ((ri->flags & SRI_S_DOWN) == 0) {
- sentinelEvent(REDIS_WARNING,"+sdown",ri,"%@");
+ sentinelEvent(LL_WARNING,"+sdown",ri,"%@");
ri->s_down_since_time = mstime();
ri->flags |= SRI_S_DOWN;
}
} else {
/* Is subjectively up */
if (ri->flags & SRI_S_DOWN) {
- sentinelEvent(REDIS_WARNING,"-sdown",ri,"%@");
+ sentinelEvent(LL_WARNING,"-sdown",ri,"%@");
ri->flags &= ~(SRI_S_DOWN|SRI_SCRIPT_KILL_SENT);
}
}
@@ -3126,14 +3645,14 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
/* Set the flag accordingly to the outcome. */
if (odown) {
if ((master->flags & SRI_O_DOWN) == 0) {
- sentinelEvent(REDIS_WARNING,"+odown",master,"%@ #quorum %d/%d",
+ sentinelEvent(LL_WARNING,"+odown",master,"%@ #quorum %d/%d",
quorum, master->quorum);
master->flags |= SRI_O_DOWN;
master->o_down_since_time = mstime();
}
} else {
if (master->flags & SRI_O_DOWN) {
- sentinelEvent(REDIS_WARNING,"-odown",master,"%@");
+ sentinelEvent(LL_WARNING,"-odown",master,"%@");
master->flags &= ~SRI_O_DOWN;
}
}
@@ -3142,12 +3661,12 @@ void sentinelCheckObjectivelyDown(sentinelRedisInstance *master) {
/* Receive the SENTINEL is-master-down-by-addr reply, see the
* sentinelAskMasterStateToOtherSentinels() function for more information. */
void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *privdata) {
- sentinelRedisInstance *ri = c->data;
+ sentinelRedisInstance *ri = privdata;
+ instanceLink *link = c->data;
redisReply *r;
- REDIS_NOTUSED(privdata);
- if (ri) ri->pending_commands--;
- if (!reply || !ri) return;
+ if (!reply || !link) return;
+ link->pending_commands--;
r = reply;
/* Ignore every error or unexpected reply.
@@ -3169,7 +3688,7 @@ void sentinelReceiveIsMasterDownReply(redisAsyncContext *c, void *reply, void *p
* replied with a vote. */
sdsfree(ri->leader);
if ((long long)ri->leader_epoch != r->element[2]->integer)
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"%s voted for %s %llu", ri->name,
r->element[1]->str,
(unsigned long long) r->element[2]->integer);
@@ -3208,29 +3727,37 @@ void sentinelAskMasterStateToOtherSentinels(sentinelRedisInstance *master, int f
* 2) Sentinel is connected.
* 3) We did not received the info within SENTINEL_ASK_PERIOD ms. */
if ((master->flags & SRI_S_DOWN) == 0) continue;
- if (ri->flags & SRI_DISCONNECTED) continue;
+ if (ri->link->disconnected) continue;
if (!(flags & SENTINEL_ASK_FORCED) &&
mstime() - ri->last_master_down_reply_time < SENTINEL_ASK_PERIOD)
continue;
/* Ask */
ll2string(port,sizeof(port),master->addr->port);
- retval = redisAsyncCommand(ri->cc,
- sentinelReceiveIsMasterDownReply, NULL,
- "SENTINEL is-master-down-by-addr %s %s %llu %s",
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelReceiveIsMasterDownReply, ri,
+ "%s is-master-down-by-addr %s %s %llu %s",
+ sentinelInstanceMapCommand(ri,"SENTINEL"),
master->addr->ip, port,
sentinel.current_epoch,
(master->failover_state > SENTINEL_FAILOVER_STATE_NONE) ?
- server.runid : "*");
- if (retval == REDIS_OK) ri->pending_commands++;
+ sentinel.myid : "*");
+ if (retval == C_OK) ri->link->pending_commands++;
}
dictReleaseIterator(di);
}
/* =============================== FAILOVER ================================= */
+/* Crash because of user request via SENTINEL simulate-failure command. */
+void sentinelSimFailureCrash(void) {
+ serverLog(LL_WARNING,
+ "Sentinel CRASH because of SENTINEL simulate-failure");
+ exit(99);
+}
+
/* Vote for the sentinel with 'req_runid' or return the old vote if already
- * voted for the specifed 'req_epoch' or one greater.
+ * voted for the specified 'req_epoch' or one greater.
*
* If a vote is not available returns NULL, otherwise return the Sentinel
* runid and populate the leader_epoch with the epoch of the vote. */
@@ -3238,7 +3765,7 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
if (req_epoch > sentinel.current_epoch) {
sentinel.current_epoch = req_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
}
@@ -3248,12 +3775,12 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
master->leader = sdsnew(req_runid);
master->leader_epoch = sentinel.current_epoch;
sentinelFlushConfig();
- sentinelEvent(REDIS_WARNING,"+vote-for-leader",master,"%s %llu",
+ sentinelEvent(LL_WARNING,"+vote-for-leader",master,"%s %llu",
master->leader, (unsigned long long) master->leader_epoch);
/* If we did not voted for ourselves, set the master failover start
* time to now, in order to force a delay before we can start a
* failover for the same master. */
- if (strcasecmp(master->leader,server.runid))
+ if (strcasecmp(master->leader,sentinel.myid))
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
}
@@ -3269,16 +3796,16 @@ struct sentinelLeader {
/* Helper function for sentinelGetLeader, increment the counter
* relative to the specified runid. */
int sentinelLeaderIncr(dict *counters, char *runid) {
- dictEntry *de = dictFind(counters,runid);
+ dictEntry *existing, *de;
uint64_t oldval;
- if (de) {
- oldval = dictGetUnsignedIntegerVal(de);
- dictSetUnsignedIntegerVal(de,oldval+1);
+ de = dictAddRaw(counters,runid,&existing);
+ if (existing) {
+ oldval = dictGetUnsignedIntegerVal(existing);
+ dictSetUnsignedIntegerVal(existing,oldval+1);
return oldval+1;
} else {
- de = dictAddRaw(counters,runid);
- redisAssert(de != NULL);
+ serverAssert(de != NULL);
dictSetUnsignedIntegerVal(de,1);
return 1;
}
@@ -3300,10 +3827,10 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
uint64_t leader_epoch;
uint64_t max_votes = 0;
- redisAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
+ serverAssert(master->flags & (SRI_O_DOWN|SRI_FAILOVER_IN_PROGRESS));
counters = dictCreate(&leaderVotesDictType,NULL);
- voters = dictSize(master->sentinels)+1; /* All the other sentinels and me. */
+ voters = dictSize(master->sentinels)+1; /* All the other sentinels and me.*/
/* Count other sentinels votes */
di = dictGetIterator(master->sentinels);
@@ -3334,7 +3861,7 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
if (winner)
myvote = sentinelVoteLeader(master,epoch,winner,&leader_epoch);
else
- myvote = sentinelVoteLeader(master,epoch,server.runid,&leader_epoch);
+ myvote = sentinelVoteLeader(master,epoch,sentinel.myid,&leader_epoch);
if (myvote && leader_epoch == epoch) {
uint64_t votes = sentinelLeaderIncr(counters,myvote);
@@ -3362,8 +3889,8 @@ char *sentinelGetLeader(sentinelRedisInstance *master, uint64_t epoch) {
*
* If Host is NULL the function sends "SLAVEOF NO ONE".
*
- * The command returns REDIS_OK if the SLAVEOF command was accepted for
- * (later) delivery otherwise REDIS_ERR. The command replies are just
+ * The command returns C_OK if the SLAVEOF command was accepted for
+ * (later) delivery otherwise C_ERR. The command replies are just
* discarded. */
int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
char portstr[32];
@@ -3381,56 +3908,62 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
/* In order to send SLAVEOF in a safe way, we send a transaction performing
* the following tasks:
* 1) Reconfigure the instance according to the specified host/port params.
- * 2) Rewrite the configuraiton.
+ * 2) Rewrite the configuration.
* 3) Disconnect all clients (but this one sending the commnad) in order
* to trigger the ask-master-on-reconnection protocol for connected
* clients.
*
* Note that we don't check the replies returned by commands, since we
* will observe instead the effects in the next INFO output. */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "MULTI");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
-
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
-
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"MULTI"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s %s %s",
+ sentinelInstanceMapCommand(ri,"SLAVEOF"),
+ host, portstr);
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s REWRITE",
+ sentinelInstanceMapCommand(ri,"CONFIG"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
/* CLIENT KILL TYPE <type> is only supported starting from Redis 2.8.12,
* however sending it to an instance not understanding this command is not
* an issue because CLIENT is variadic command, so Redis will not
* recognized as a syntax error, and the transaction will not fail (but
* only the unsupported command will fail). */
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "CLIENT KILL TYPE normal");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
-
- retval = redisAsyncCommand(ri->cc,
- sentinelDiscardReplyCallback, NULL, "EXEC");
- if (retval == REDIS_ERR) return retval;
- ri->pending_commands++;
-
- return REDIS_OK;
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s KILL TYPE normal",
+ sentinelInstanceMapCommand(ri,"CLIENT"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ retval = redisAsyncCommand(ri->link->cc,
+ sentinelDiscardReplyCallback, ri, "%s",
+ sentinelInstanceMapCommand(ri,"EXEC"));
+ if (retval == C_ERR) return retval;
+ ri->link->pending_commands++;
+
+ return C_OK;
}
/* Setup the master state to start a failover. */
void sentinelStartFailover(sentinelRedisInstance *master) {
- redisAssert(master->flags & SRI_MASTER);
+ serverAssert(master->flags & SRI_MASTER);
master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
master->flags |= SRI_FAILOVER_IN_PROGRESS;
master->failover_epoch = ++sentinel.current_epoch;
- sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
+ sentinelEvent(LL_WARNING,"+new-epoch",master,"%llu",
(unsigned long long) sentinel.current_epoch);
- sentinelEvent(REDIS_WARNING,"+try-failover",master,"%@");
+ sentinelEvent(LL_WARNING,"+try-failover",master,"%@");
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
master->failover_state_change_time = mstime();
}
@@ -3465,7 +3998,7 @@ int sentinelStartFailoverIfNeeded(sentinelRedisInstance *master) {
ctime_r(&clock,ctimebuf);
ctimebuf[24] = '\0'; /* Remove newline. */
master->failover_delay_logged = master->failover_start_time;
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Next failover delay: I will not start a failover before %s",
ctimebuf);
}
@@ -3517,11 +4050,11 @@ int compareSlavesForPromotion(const void *a, const void *b) {
return (*sa)->slave_priority - (*sb)->slave_priority;
/* If priority is the same, select the slave with greater replication
- * offset (processed more data frmo the master). */
+ * offset (processed more data from the master). */
if ((*sa)->slave_repl_offset > (*sb)->slave_repl_offset) {
return -1; /* a < b */
} else if ((*sa)->slave_repl_offset < (*sb)->slave_repl_offset) {
- return 1; /* b > a */
+ return 1; /* a > b */
}
/* If the replication offset is the same select the slave with that has
@@ -3554,8 +4087,9 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
mstime_t info_validity_time;
- if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN|SRI_DISCONNECTED)) continue;
- if (mstime() - slave->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
+ if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
+ if (slave->link->disconnected) continue;
+ if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
if (slave->slave_priority == 0) continue;
/* If the master is in SDOWN state we get INFO for slaves every second.
@@ -3586,7 +4120,7 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
/* Check if we are the leader for the failover epoch. */
leader = sentinelGetLeader(ri, ri->failover_epoch);
- isleader = leader && strcasecmp(leader,server.runid) == 0;
+ isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
sdsfree(leader);
/* If I'm not the leader, and it is not a forced failover via
@@ -3600,15 +4134,17 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
election_timeout = ri->failover_timeout;
/* Abort the failover if I'm not the leader after some time. */
if (mstime() - ri->failover_start_time > election_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-not-elected",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-not-elected",ri,"%@");
sentinelAbortFailover(ri);
}
return;
}
- sentinelEvent(REDIS_WARNING,"+elected-leader",ri,"%@");
+ sentinelEvent(LL_WARNING,"+elected-leader",ri,"%@");
+ if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION)
+ sentinelSimFailureCrash();
ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
ri->failover_state_change_time = mstime();
- sentinelEvent(REDIS_WARNING,"+failover-state-select-slave",ri,"%@");
+ sentinelEvent(LL_WARNING,"+failover-state-select-slave",ri,"%@");
}
void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
@@ -3617,15 +4153,15 @@ void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
/* We don't handle the timeout in this state as the function aborts
* the failover or go forward in the next state. */
if (slave == NULL) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-no-good-slave",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-no-good-slave",ri,"%@");
sentinelAbortFailover(ri);
} else {
- sentinelEvent(REDIS_WARNING,"+selected-slave",slave,"%@");
+ sentinelEvent(LL_WARNING,"+selected-slave",slave,"%@");
slave->flags |= SRI_PROMOTED;
ri->promoted_slave = slave;
ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
ri->failover_state_change_time = mstime();
- sentinelEvent(REDIS_NOTICE,"+failover-state-send-slaveof-noone",
+ sentinelEvent(LL_NOTICE,"+failover-state-send-slaveof-noone",
slave, "%@");
}
}
@@ -3636,9 +4172,9 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
/* We can't send the command to the promoted slave if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover. */
- if (ri->promoted_slave->flags & SRI_DISCONNECTED) {
+ if (ri->promoted_slave->link->disconnected) {
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
}
return;
@@ -3649,8 +4185,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
* really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave). */
retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
- if (retval != REDIS_OK) return;
- sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
+ if (retval != C_OK) return;
+ sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
ri->failover_state_change_time = mstime();
@@ -3662,7 +4198,7 @@ void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
/* Just handle the timeout. Switching to the next state is handled
* by the function parsing the INFO command of the promoted slave. */
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
- sentinelEvent(REDIS_WARNING,"-failover-abort-slave-timeout",ri,"%@");
+ sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
}
}
@@ -3694,11 +4230,11 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (elapsed > master->failover_timeout) {
not_reconfigured = 0;
timeout = 1;
- sentinelEvent(REDIS_WARNING,"+failover-end-for-timeout",master,"%@");
+ sentinelEvent(LL_WARNING,"+failover-end-for-timeout",master,"%@");
}
if (not_reconfigured == 0) {
- sentinelEvent(REDIS_WARNING,"+failover-end",master,"%@");
+ sentinelEvent(LL_WARNING,"+failover-end",master,"%@");
master->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG;
master->failover_state_change_time = mstime();
}
@@ -3715,14 +4251,14 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
sentinelRedisInstance *slave = dictGetVal(de);
int retval;
- if (slave->flags &
- (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
+ if (slave->flags & (SRI_RECONF_DONE|SRI_RECONF_SENT)) continue;
+ if (slave->link->disconnected) continue;
retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->ip,
master->promoted_slave->addr->port);
- if (retval == REDIS_OK) {
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
+ if (retval == C_OK) {
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent-be",slave,"%@");
slave->flags |= SRI_RECONF_SENT;
}
}
@@ -3764,24 +4300,24 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
(mstime() - slave->slave_reconf_sent_time) >
SENTINEL_SLAVE_RECONF_TIMEOUT)
{
- sentinelEvent(REDIS_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
+ sentinelEvent(LL_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
slave->flags &= ~SRI_RECONF_SENT;
slave->flags |= SRI_RECONF_DONE;
}
/* Nothing to do for instances that are disconnected or already
* in RECONF_SENT state. */
- if (slave->flags & (SRI_DISCONNECTED|SRI_RECONF_SENT|SRI_RECONF_INPROG))
- continue;
+ if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
+ if (slave->link->disconnected) continue;
/* Send SLAVEOF <new master>. */
retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->ip,
master->promoted_slave->addr->port);
- if (retval == REDIS_OK) {
+ if (retval == C_OK) {
slave->flags |= SRI_RECONF_SENT;
slave->slave_reconf_sent_time = mstime();
- sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
+ sentinelEvent(LL_NOTICE,"+slave-reconf-sent",slave,"%@");
in_progress++;
}
}
@@ -3798,7 +4334,7 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
sentinelRedisInstance *ref = master->promoted_slave ?
master->promoted_slave : master;
- sentinelEvent(REDIS_WARNING,"+switch-master",master,"%s %s %d %s %d",
+ sentinelEvent(LL_WARNING,"+switch-master",master,"%s %s %d %s %d",
master->name, master->addr->ip, master->addr->port,
ref->addr->ip, ref->addr->port);
@@ -3806,7 +4342,7 @@ void sentinelFailoverSwitchToPromotedSlave(sentinelRedisInstance *master) {
}
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
- redisAssert(ri->flags & SRI_MASTER);
+ serverAssert(ri->flags & SRI_MASTER);
if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
@@ -3835,8 +4371,8 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
* the slave -> master switch. Otherwise the failover can't be aborted and
* will reach its end (possibly by timeout). */
void sentinelAbortFailover(sentinelRedisInstance *ri) {
- redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
- redisAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
+ serverAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
+ serverAssert(ri->failover_state <= SENTINEL_FAILOVER_STATE_WAIT_PROMOTION);
ri->flags &= ~(SRI_FAILOVER_IN_PROGRESS|SRI_FORCE_FAILOVER);
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
@@ -3866,7 +4402,7 @@ void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
if (sentinel.tilt) {
if (mstime()-sentinel.tilt_start_time < SENTINEL_TILT_PERIOD) return;
sentinel.tilt = 0;
- sentinelEvent(REDIS_WARNING,"-tilt",NULL,"#tilt mode exited");
+ sentinelEvent(LL_WARNING,"-tilt",NULL,"#tilt mode exited");
}
/* Every kind of instance */
@@ -3939,7 +4475,7 @@ void sentinelCheckTiltCondition(void) {
if (delta < 0 || delta > SENTINEL_TILT_TRIGGER) {
sentinel.tilt = 1;
sentinel.tilt_start_time = mstime();
- sentinelEvent(REDIS_WARNING,"+tilt",NULL,"#tilt mode entered");
+ sentinelEvent(LL_WARNING,"+tilt",NULL,"#tilt mode entered");
}
sentinel.previous_time = mstime();
}
@@ -3957,6 +4493,6 @@ void sentinelTimer(void) {
* exactly continue to stay synchronized asking to be voted at the
* same time again and again (resulting in nobody likely winning the
* election because of split brain voting). */
- server.hz = REDIS_DEFAULT_HZ + rand() % REDIS_DEFAULT_HZ;
+ server.hz = CONFIG_DEFAULT_HZ + rand() % CONFIG_DEFAULT_HZ;
}
diff --git a/src/redis.c b/src/server.c
index 13df8d28e..b537ee04a 100644
--- a/src/redis.c
+++ b/src/server.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,11 +27,12 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include "cluster.h"
#include "slowlog.h"
#include "bio.h"
#include "latency.h"
+#include "atomicvar.h"
#include <time.h>
#include <signal.h>
@@ -53,7 +54,7 @@
#include <sys/resource.h>
#include <sys/utsname.h>
#include <locale.h>
-#include <sys/sysctl.h>
+#include <sys/socket.h>
/* Our shared "common" objects */
@@ -68,7 +69,8 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan;
/*================================= Globals ================================= */
/* Global vars */
-struct redisServer server; /* server global state */
+struct redisServer server; /* Server global state */
+volatile unsigned long lru_clock; /* Server global current LRU time. */
/* Our command table.
*
@@ -123,6 +125,7 @@ struct redisServer server; /* server global state */
* are not fast commands.
*/
struct redisCommand redisCommandTable[] = {
+ {"module",moduleCommand,-2,"as",0,NULL,0,0,0,0,0},
{"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
{"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
{"setnx",setnxCommand,3,"wmF",0,NULL,1,1,1,0,0},
@@ -131,23 +134,25 @@ struct redisCommand redisCommandTable[] = {
{"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
{"strlen",strlenCommand,2,"rF",0,NULL,1,1,1,0,0},
{"del",delCommand,-2,"w",0,NULL,1,-1,1,0,0},
- {"exists",existsCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"unlink",unlinkCommand,-2,"wF",0,NULL,1,-1,1,0,0},
+ {"exists",existsCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getbit",getbitCommand,3,"rF",0,NULL,1,1,1,0,0},
+ {"bitfield",bitfieldCommand,-2,"wm",0,NULL,1,1,1,0,0},
{"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
{"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
{"incr",incrCommand,2,"wmF",0,NULL,1,1,1,0,0},
{"decr",decrCommand,2,"wmF",0,NULL,1,1,1,0,0},
- {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
+ {"mget",mgetCommand,-2,"rF",0,NULL,1,-1,1,0,0},
{"rpush",rpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"lpush",lpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
- {"rpushx",rpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
- {"lpushx",lpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
+ {"rpushx",rpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
+ {"lpushx",lpushxCommand,-3,"wmF",0,NULL,1,1,1,0,0},
{"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
{"rpop",rpopCommand,2,"wF",0,NULL,1,1,1,0,0},
{"lpop",lpopCommand,2,"wF",0,NULL,1,1,1,0,0},
- {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
+ {"brpop",brpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
{"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
{"llen",llenCommand,2,"rF",0,NULL,1,1,1,0,0},
@@ -162,7 +167,7 @@ struct redisCommand redisCommandTable[] = {
{"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
{"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"spop",spopCommand,-2,"wRsF",0,NULL,1,1,1,0,0},
+ {"spop",spopCommand,-2,"wRF",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
@@ -193,15 +198,20 @@ struct redisCommand redisCommandTable[] = {
{"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
{"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
- {"hset",hsetCommand,4,"wmF",0,NULL,1,1,1,0,0},
+ {"zpopmin",zpopminCommand,-2,"wF",0,NULL,1,1,1,0,0},
+ {"zpopmax",zpopmaxCommand,-2,"wF",0,NULL,1,1,1,0,0},
+ {"bzpopmin",bzpopminCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
+ {"bzpopmax",bzpopmaxCommand,-2,"wsF",0,NULL,1,-2,1,0,0},
+ {"hset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
{"hsetnx",hsetnxCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hget",hgetCommand,3,"rF",0,NULL,1,1,1,0,0},
- {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
- {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
+ {"hmset",hsetCommand,-4,"wmF",0,NULL,1,1,1,0,0},
+ {"hmget",hmgetCommand,-3,"rF",0,NULL,1,1,1,0,0},
{"hincrby",hincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hincrbyfloat",hincrbyfloatCommand,4,"wmF",0,NULL,1,1,1,0,0},
{"hdel",hdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
{"hlen",hlenCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"hstrlen",hstrlenCommand,3,"rF",0,NULL,1,1,1,0,0},
{"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
{"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
@@ -214,7 +224,8 @@ struct redisCommand redisCommandTable[] = {
{"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
{"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
- {"select",selectCommand,2,"rlF",0,NULL,0,0,0,0,0},
+ {"select",selectCommand,2,"lF",0,NULL,0,0,0,0,0},
+ {"swapdb",swapdbCommand,3,"wF",0,NULL,0,0,0,0,0},
{"move",moveCommand,3,"wF",0,NULL,1,1,1,0,0},
{"rename",renameCommand,3,"w",0,NULL,1,2,1,0,0},
{"renamenx",renamenxCommand,3,"wF",0,NULL,1,2,1,0,0},
@@ -225,81 +236,108 @@ struct redisCommand redisCommandTable[] = {
{"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
{"scan",scanCommand,-2,"rR",0,NULL,0,0,0,0,0},
{"dbsize",dbsizeCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"auth",authCommand,2,"rsltF",0,NULL,0,0,0,0,0},
- {"ping",pingCommand,-1,"rtF",0,NULL,0,0,0,0,0},
- {"echo",echoCommand,2,"rF",0,NULL,0,0,0,0,0},
- {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0},
- {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0},
- {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0},
- {"shutdown",shutdownCommand,-1,"arlt",0,NULL,0,0,0,0,0},
- {"lastsave",lastsaveCommand,1,"rRF",0,NULL,0,0,0,0,0},
+ {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0},
+ {"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
+ {"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
+ {"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
+ {"bgsave",bgsaveCommand,-1,"a",0,NULL,0,0,0,0,0},
+ {"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0},
+ {"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0},
+ {"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
{"type",typeCommand,2,"rF",0,NULL,1,1,1,0,0},
- {"multi",multiCommand,1,"rsF",0,NULL,0,0,0,0,0},
+ {"multi",multiCommand,1,"sF",0,NULL,0,0,0,0,0},
{"exec",execCommand,1,"sM",0,NULL,0,0,0,0,0},
- {"discard",discardCommand,1,"rsF",0,NULL,0,0,0,0,0},
+ {"discard",discardCommand,1,"sF",0,NULL,0,0,0,0,0},
{"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
{"psync",syncCommand,3,"ars",0,NULL,0,0,0,0,0},
- {"replconf",replconfCommand,-1,"arslt",0,NULL,0,0,0,0,0},
- {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
- {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
+ {"replconf",replconfCommand,-1,"aslt",0,NULL,0,0,0,0,0},
+ {"flushdb",flushdbCommand,-1,"w",0,NULL,0,0,0,0,0},
+ {"flushall",flushallCommand,-1,"w",0,NULL,0,0,0,0,0},
{"sort",sortCommand,-2,"wm",0,sortGetKeys,1,1,1,0,0},
- {"info",infoCommand,-1,"rlt",0,NULL,0,0,0,0,0},
- {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0},
+ {"info",infoCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"monitor",monitorCommand,1,"as",0,NULL,0,0,0,0,0},
{"ttl",ttlCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"touch",touchCommand,-2,"rF",0,NULL,1,1,1,0,0},
{"pttl",pttlCommand,2,"rF",0,NULL,1,1,1,0,0},
{"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
{"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
{"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
{"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0},
- {"config",configCommand,-2,"art",0,NULL,0,0,0,0,0},
- {"subscribe",subscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
- {"unsubscribe",unsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
- {"psubscribe",psubscribeCommand,-2,"rpslt",0,NULL,0,0,0,0,0},
- {"punsubscribe",punsubscribeCommand,-1,"rpslt",0,NULL,0,0,0,0,0},
- {"publish",publishCommand,3,"pltrF",0,NULL,0,0,0,0,0},
- {"pubsub",pubsubCommand,-2,"pltrR",0,NULL,0,0,0,0,0},
- {"watch",watchCommand,-2,"rsF",0,NULL,1,-1,1,0,0},
- {"unwatch",unwatchCommand,1,"rsF",0,NULL,0,0,0,0,0},
- {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0},
+ {"config",configCommand,-2,"lat",0,NULL,0,0,0,0,0},
+ {"subscribe",subscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
+ {"unsubscribe",unsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
+ {"psubscribe",psubscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
+ {"punsubscribe",punsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
+ {"publish",publishCommand,3,"pltF",0,NULL,0,0,0,0,0},
+ {"pubsub",pubsubCommand,-2,"pltR",0,NULL,0,0,0,0,0},
+ {"watch",watchCommand,-2,"sF",0,NULL,1,-1,1,0,0},
+ {"unwatch",unwatchCommand,1,"sF",0,NULL,0,0,0,0,0},
+ {"cluster",clusterCommand,-2,"a",0,NULL,0,0,0,0,0},
{"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
{"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
- {"migrate",migrateCommand,-6,"w",0,NULL,0,0,0,0,0},
- {"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0},
- {"readonly",readonlyCommand,1,"rF",0,NULL,0,0,0,0,0},
- {"readwrite",readwriteCommand,1,"rF",0,NULL,0,0,0,0,0},
+ {"migrate",migrateCommand,-6,"w",0,migrateGetKeys,0,0,0,0,0},
+ {"asking",askingCommand,1,"F",0,NULL,0,0,0,0,0},
+ {"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0},
+ {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0},
{"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0},
- {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0},
- {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0},
+ {"object",objectCommand,-2,"r",0,NULL,2,2,1,0,0},
+ {"memory",memoryCommand,-2,"r",0,NULL,0,0,0,0,0},
+ {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0},
{"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
{"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
- {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0},
- {"script",scriptCommand,-2,"rs",0,NULL,0,0,0,0,0},
- {"time",timeCommand,1,"rRF",0,NULL,0,0,0,0,0},
+ {"slowlog",slowlogCommand,-2,"a",0,NULL,0,0,0,0,0},
+ {"script",scriptCommand,-2,"s",0,NULL,0,0,0,0,0},
+ {"time",timeCommand,1,"RF",0,NULL,0,0,0,0,0},
{"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
{"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
{"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
- {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0},
- {"command",commandCommand,0,"rlt",0,NULL,0,0,0,0,0},
- {"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0},
+ {"wait",waitCommand,3,"s",0,NULL,0,0,0,0,0},
+ {"command",commandCommand,0,"lt",0,NULL,0,0,0,0,0},
+ {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0},
+ {"georadius",georadiusCommand,-6,"w",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadius_ro",georadiusroCommand,-6,"r",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadiusbymember",georadiusbymemberCommand,-5,"w",0,georadiusGetKeys,1,1,1,0,0},
+ {"georadiusbymember_ro",georadiusbymemberroCommand,-5,"r",0,georadiusGetKeys,1,1,1,0,0},
+ {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0},
+ {"pfselftest",pfselftestCommand,1,"a",0,NULL,0,0,0,0,0},
{"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
- {"pfcount",pfcountCommand,-2,"r",0,NULL,1,1,1,0,0},
+ {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0},
{"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
{"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
- {"latency",latencyCommand,-2,"arslt",0,NULL,0,0,0,0,0}
+ {"xadd",xaddCommand,-5,"wmF",0,NULL,1,1,1,0,0},
+ {"xrange",xrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
+ {"xrevrange",xrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
+ {"xlen",xlenCommand,2,"rF",0,NULL,1,1,1,0,0},
+ {"xread",xreadCommand,-4,"rs",0,xreadGetKeys,1,1,1,0,0},
+ {"xreadgroup",xreadCommand,-7,"ws",0,xreadGetKeys,1,1,1,0,0},
+ {"xgroup",xgroupCommand,-2,"wm",0,NULL,2,2,1,0,0},
+ {"xack",xackCommand,-4,"wF",0,NULL,1,1,1,0,0},
+ {"xpending",xpendingCommand,-3,"r",0,NULL,1,1,1,0,0},
+ {"xclaim",xclaimCommand,-6,"wF",0,NULL,1,1,1,0,0},
+ {"xinfo",xinfoCommand,-2,"r",0,NULL,2,2,1,0,0},
+ {"xdel",xdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
+ {"xtrim",xtrimCommand,-2,"wF",0,NULL,1,1,1,0,0},
+ {"post",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"host:",securityWarningCommand,-1,"lt",0,NULL,0,0,0,0,0},
+ {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0}
};
-struct evictionPoolEntry *evictionPoolAlloc(void);
-
/*============================ Utility functions ============================ */
+/* We use a private localtime implementation which is fork-safe. The logging
+ * function of Redis may be called from other threads. */
+void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
+
/* Low level logging. To use only for very big messages, otherwise
- * redisLog() is to prefer. */
-void redisLogRaw(int level, const char *msg) {
+ * serverLog() is to prefer. */
+void serverLogRaw(int level, const char *msg) {
const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
const char *c = ".-*#";
FILE *fp;
char buf[64];
- int rawmode = (level & REDIS_LOG_RAW);
+ int rawmode = (level & LL_RAW);
int log_to_stdout = server.logfile[0] == '\0';
level &= 0xff; /* clear flags */
@@ -317,7 +355,9 @@ void redisLogRaw(int level, const char *msg) {
pid_t pid = getpid();
gettimeofday(&tv,NULL);
- off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
+ struct tm tm;
+ nolocks_localtime(&tm,tv.tv_sec,server.timezone,server.daylight_active);
+ off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm);
snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
if (server.sentinel_mode) {
role_char = 'X'; /* Sentinel. */
@@ -335,12 +375,12 @@ void redisLogRaw(int level, const char *msg) {
if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
}
-/* Like redisLogRaw() but with printf-alike support. This is the function that
+/* Like serverLogRaw() but with printf-alike support. This is the function that
* is used across the code. The raw version is only used in order to dump
* the INFO output on crash. */
-void redisLog(int level, const char *fmt, ...) {
+void serverLog(int level, const char *fmt, ...) {
va_list ap;
- char msg[REDIS_MAX_LOGMSG_LEN];
+ char msg[LOG_MAX_LEN];
if ((level&0xff) < server.verbosity) return;
@@ -348,7 +388,7 @@ void redisLog(int level, const char *fmt, ...) {
vsnprintf(msg, sizeof(msg), fmt, ap);
va_end(ap);
- redisLogRaw(level,msg);
+ serverLogRaw(level,msg);
}
/* Log a fixed message without printf-alike capabilities, in a way that is
@@ -356,8 +396,8 @@ void redisLog(int level, const char *fmt, ...) {
*
* We actually use this only for signals that are not fatal from the point
* of view of Redis. Signals that are going to kill the server anyway and
- * where we need printf-alike features are served by redisLog(). */
-void redisLogFromHandler(int level, const char *msg) {
+ * where we need printf-alike features are served by serverLog(). */
+void serverLogFromHandler(int level, const char *msg) {
int fd;
int log_to_stdout = server.logfile[0] == '\0';
char buf[64];
@@ -391,7 +431,7 @@ long long ustime(void) {
}
/* Return the UNIX time in milliseconds */
-long long mstime(void) {
+mstime_t mstime(void) {
return ustime()/1000;
}
@@ -447,11 +487,11 @@ int dictSdsKeyCaseCompare(void *privdata, const void *key1,
return strcasecmp(key1, key2) == 0;
}
-void dictRedisObjectDestructor(void *privdata, void *val)
+void dictObjectDestructor(void *privdata, void *val)
{
DICT_NOTUSED(privdata);
- if (val == NULL) return; /* Values of swapped out keys as set to NULL */
+ if (val == NULL) return; /* Lazy freeing will set value to NULL. */
decrRefCount(val);
}
@@ -469,16 +509,16 @@ int dictObjKeyCompare(void *privdata, const void *key1,
return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
}
-unsigned int dictObjHash(const void *key) {
+uint64_t dictObjHash(const void *key) {
const robj *o = key;
return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
}
-unsigned int dictSdsHash(const void *key) {
+uint64_t dictSdsHash(const void *key) {
return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}
-unsigned int dictSdsCaseHash(const void *key) {
+uint64_t dictSdsCaseHash(const void *key) {
return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
}
@@ -488,8 +528,8 @@ int dictEncObjKeyCompare(void *privdata, const void *key1,
robj *o1 = (robj*) key1, *o2 = (robj*) key2;
int cmp;
- if (o1->encoding == REDIS_ENCODING_INT &&
- o2->encoding == REDIS_ENCODING_INT)
+ if (o1->encoding == OBJ_ENCODING_INT &&
+ o2->encoding == OBJ_ENCODING_INT)
return o1->ptr == o2->ptr;
o1 = getDecodedObject(o1);
@@ -500,20 +540,20 @@ int dictEncObjKeyCompare(void *privdata, const void *key1,
return cmp;
}
-unsigned int dictEncObjHash(const void *key) {
+uint64_t dictEncObjHash(const void *key) {
robj *o = (robj*) key;
if (sdsEncodedObject(o)) {
return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
} else {
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
char buf[32];
int len;
len = ll2string(buf,32,(long)o->ptr);
return dictGenHashFunction((unsigned char*)buf, len);
} else {
- unsigned int hash;
+ uint64_t hash;
o = getDecodedObject(o);
hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
@@ -523,23 +563,45 @@ unsigned int dictEncObjHash(const void *key) {
}
}
-/* Sets type hash table */
-dictType setDictType = {
+/* Generic hash table type where keys are Redis Objects, Values
+ * dummy pointers. */
+dictType objectKeyPointerValueDictType = {
dictEncObjHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictObjectDestructor, /* key destructor */
NULL /* val destructor */
};
-/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
-dictType zsetDictType = {
+/* Like objectKeyPointerValueDictType(), but values can be destroyed, if
+ * not NULL, calling zfree(). */
+dictType objectKeyHeapPointerValueDictType = {
dictEncObjHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictObjectDestructor, /* key destructor */
+ dictVanillaFree /* val destructor */
+};
+
+/* Set dictionary type. Keys are SDS strings, values are ot used. */
+dictType setDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
+};
+
+/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
+dictType zsetDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* Note: SDS string shared & freed by skiplist */
NULL /* val destructor */
};
@@ -550,7 +612,7 @@ dictType dbDictType = {
NULL, /* val dup */
dictSdsKeyCompare, /* key compare */
dictSdsDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictObjectDestructor /* val destructor */
};
/* server.lua_scripts sha (as sds string) -> scripts (as robj) cache. */
@@ -560,37 +622,37 @@ dictType shaScriptObjectDictType = {
NULL, /* val dup */
dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictObjectDestructor /* val destructor */
};
/* Db->expires */
dictType keyptrDictType = {
- dictSdsHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCompare, /* key compare */
- NULL, /* key destructor */
- NULL /* val destructor */
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
};
/* Command table. sds string -> command struct pointer. */
dictType commandTableDictType = {
- dictSdsCaseHash, /* hash function */
- NULL, /* key dup */
- NULL, /* val dup */
- dictSdsKeyCaseCompare, /* key compare */
- dictSdsDestructor, /* key destructor */
- NULL /* val destructor */
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
};
/* Hash type hash table (note that small hashes are represented with ziplists) */
dictType hashDictType = {
- dictEncObjHash, /* hash function */
+ dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
- dictEncObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
- dictRedisObjectDestructor /* val destructor */
+ dictSdsKeyCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ dictSdsDestructor /* val destructor */
};
/* Keylist hash table type has unencoded redis objects as keys and
@@ -601,7 +663,7 @@ dictType keylistDictType = {
NULL, /* key dup */
NULL, /* val dup */
dictObjKeyCompare, /* key compare */
- dictRedisObjectDestructor, /* key destructor */
+ dictObjectDestructor, /* key destructor */
dictListDestructor /* val destructor */
};
@@ -628,6 +690,18 @@ dictType clusterNodesBlackListDictType = {
NULL /* val destructor */
};
+/* Cluster re-addition blacklist. This maps node IDs to the time
+ * we can re-add this node. The goal is to avoid readding a removed
+ * node for some time. */
+dictType modulesDictType = {
+ dictSdsCaseHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCaseCompare, /* key compare */
+ dictSdsDestructor, /* key destructor */
+ NULL /* val destructor */
+};
+
/* Migrate cache dict type. */
dictType migrateCacheDictType = {
dictSdsHash, /* hash function */
@@ -655,11 +729,11 @@ int htNeedsResize(dict *dict) {
size = dictSlots(dict);
used = dictSize(dict);
- return (size && used && size > DICT_HT_INITIAL_SIZE &&
- (used*100/size < REDIS_HT_MINFILL));
+ return (size > DICT_HT_INITIAL_SIZE &&
+ (used*100/size < HASHTABLE_MIN_FILL));
}
-/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
+/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
* we resize the hash table to save memory */
void tryResizeHashTables(int dbid) {
if (htNeedsResize(server.db[dbid].dict))
@@ -704,179 +778,6 @@ void updateDictResizePolicy(void) {
/* ======================= Cron: called every 100 ms ======================== */
-/* Helper function for the activeExpireCycle() function.
- * This function will try to expire the key that is stored in the hash table
- * entry 'de' of the 'expires' hash table of a Redis database.
- *
- * If the key is found to be expired, it is removed from the database and
- * 1 is returned. Otherwise no operation is performed and 0 is returned.
- *
- * When a key is expired, server.stat_expiredkeys is incremented.
- *
- * The parameter 'now' is the current time in milliseconds as is passed
- * to the function to avoid too many gettimeofday() syscalls. */
-int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
- long long t = dictGetSignedIntegerVal(de);
- if (now > t) {
- sds key = dictGetKey(de);
- robj *keyobj = createStringObject(key,sdslen(key));
-
- propagateExpire(db,keyobj);
- dbDelete(db,keyobj);
- notifyKeyspaceEvent(REDIS_NOTIFY_EXPIRED,
- "expired",keyobj,db->id);
- decrRefCount(keyobj);
- server.stat_expiredkeys++;
- return 1;
- } else {
- return 0;
- }
-}
-
-/* Try to expire a few timed out keys. The algorithm used is adaptive and
- * will use few CPU cycles if there are few expiring keys, otherwise
- * it will get more aggressive to avoid that too much memory is used by
- * keys that can be removed from the keyspace.
- *
- * No more than REDIS_DBCRON_DBS_PER_CALL databases are tested at every
- * iteration.
- *
- * This kind of call is used when Redis detects that timelimit_exit is
- * true, so there is more work to do, and we do it more incrementally from
- * the beforeSleep() function of the event loop.
- *
- * Expire cycle type:
- *
- * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
- * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
- * microseconds, and is not repeated again before the same amount of time.
- *
- * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
- * executed, where the time limit is a percentage of the REDIS_HZ period
- * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */
-
-void activeExpireCycle(int type) {
- /* This function has some global state in order to continue the work
- * incrementally across calls. */
- static unsigned int current_db = 0; /* Last DB tested. */
- static int timelimit_exit = 0; /* Time limit hit in previous call? */
- static long long last_fast_cycle = 0; /* When last fast cycle ran. */
-
- int j, iteration = 0;
- int dbs_per_call = REDIS_DBCRON_DBS_PER_CALL;
- long long start = ustime(), timelimit;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
- /* Don't start a fast cycle if the previous cycle did not exited
- * for time limt. Also don't repeat a fast cycle for the same period
- * as the fast cycle total duration itself. */
- if (!timelimit_exit) return;
- if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
- last_fast_cycle = start;
- }
-
- /* We usually should test REDIS_DBCRON_DBS_PER_CALL per iteration, with
- * two exceptions:
- *
- * 1) Don't test more DBs than we have.
- * 2) If last time we hit the time limit, we want to scan all DBs
- * in this iteration, as there is work to do in some DB and we don't want
- * expired keys to use memory for too much time. */
- if (dbs_per_call > server.dbnum || timelimit_exit)
- dbs_per_call = server.dbnum;
-
- /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
- * per iteration. Since this function gets called with a frequency of
- * server.hz times per second, the following is the max amount of
- * microseconds we can spend in this function. */
- timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
- timelimit_exit = 0;
- if (timelimit <= 0) timelimit = 1;
-
- if (type == ACTIVE_EXPIRE_CYCLE_FAST)
- timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
-
- for (j = 0; j < dbs_per_call; j++) {
- int expired;
- redisDb *db = server.db+(current_db % server.dbnum);
-
- /* Increment the DB now so we are sure if we run out of time
- * in the current DB we'll restart from the next. This allows to
- * distribute the time evenly across DBs. */
- current_db++;
-
- /* Continue to expire if at the end of the cycle more than 25%
- * of the keys were expired. */
- do {
- unsigned long num, slots;
- long long now, ttl_sum;
- int ttl_samples;
-
- /* If there is nothing to expire try next DB ASAP. */
- if ((num = dictSize(db->expires)) == 0) {
- db->avg_ttl = 0;
- break;
- }
- slots = dictSlots(db->expires);
- now = mstime();
-
- /* When there are less than 1% filled slots getting random
- * keys is expensive, so stop here waiting for better times...
- * The dictionary will be resized asap. */
- if (num && slots > DICT_HT_INITIAL_SIZE &&
- (num*100/slots < 1)) break;
-
- /* The main collection cycle. Sample random keys among keys
- * with an expire set, checking for expired ones. */
- expired = 0;
- ttl_sum = 0;
- ttl_samples = 0;
-
- if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
- num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
-
- while (num--) {
- dictEntry *de;
- long long ttl;
-
- if ((de = dictGetRandomKey(db->expires)) == NULL) break;
- ttl = dictGetSignedIntegerVal(de)-now;
- if (activeExpireCycleTryExpire(db,de,now)) expired++;
- if (ttl < 0) ttl = 0;
- ttl_sum += ttl;
- ttl_samples++;
- }
-
- /* Update the average TTL stats for this database. */
- if (ttl_samples) {
- long long avg_ttl = ttl_sum/ttl_samples;
-
- if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
- /* Smooth the value averaging with the previous one. */
- db->avg_ttl = (db->avg_ttl+avg_ttl)/2;
- }
-
- /* We can't block forever here even if there are many keys to
- * expire. So after a given amount of milliseconds return to the
- * caller waiting for the other active expire cycle. */
- iteration++;
- if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
- long long elapsed = ustime()-start;
-
- latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
- if (elapsed > timelimit) timelimit_exit = 1;
- }
- if (timelimit_exit) return;
- /* We don't repeat the cycle if there are less than 25% of keys
- * found expired in the current DB. */
- } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
- }
-}
-
-unsigned int getLRUClock(void) {
- return (mstime()/REDIS_LRU_CLOCK_RESOLUTION) & REDIS_LRU_CLOCK_MAX;
-}
-
/* Add a sample to the operations per second array of samples. */
void trackInstantaneousMetric(int metric, long long current_reading) {
long long t = mstime() - server.inst_metric[metric].last_sample_time;
@@ -889,7 +790,7 @@ void trackInstantaneousMetric(int metric, long long current_reading) {
server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
ops_sec;
server.inst_metric[metric].idx++;
- server.inst_metric[metric].idx %= REDIS_METRIC_SAMPLES;
+ server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
server.inst_metric[metric].last_sample_time = mstime();
server.inst_metric[metric].last_sample_count = current_reading;
}
@@ -899,34 +800,42 @@ long long getInstantaneousMetric(int metric) {
int j;
long long sum = 0;
- for (j = 0; j < REDIS_METRIC_SAMPLES; j++)
+ for (j = 0; j < STATS_METRIC_SAMPLES; j++)
sum += server.inst_metric[metric].samples[j];
- return sum / REDIS_METRIC_SAMPLES;
+ return sum / STATS_METRIC_SAMPLES;
}
-/* Check for timeouts. Returns non-zero if the client was terminated */
-int clientsCronHandleTimeout(redisClient *c) {
- time_t now = server.unixtime;
+/* Check for timeouts. Returns non-zero if the client was terminated.
+ * The function gets the current time in milliseconds as argument since
+ * it gets called multiple times in a loop, so calling gettimeofday() for
+ * each iteration would be costly without any actual gain. */
+int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
+ time_t now = now_ms/1000;
if (server.maxidletime &&
- !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
- !(c->flags & REDIS_MASTER) && /* no timeout for masters */
- !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */
- !(c->flags & REDIS_PUBSUB) && /* no timeout for Pub/Sub clients */
+ !(c->flags & CLIENT_SLAVE) && /* no timeout for slaves */
+ !(c->flags & CLIENT_MASTER) && /* no timeout for masters */
+ !(c->flags & CLIENT_BLOCKED) && /* no timeout for BLPOP */
+ !(c->flags & CLIENT_PUBSUB) && /* no timeout for Pub/Sub clients */
(now - c->lastinteraction > server.maxidletime))
{
- redisLog(REDIS_VERBOSE,"Closing idle client");
+ serverLog(LL_VERBOSE,"Closing idle client");
freeClient(c);
return 1;
- } else if (c->flags & REDIS_BLOCKED) {
+ } else if (c->flags & CLIENT_BLOCKED) {
/* Blocked OPS timeout is handled with milliseconds resolution.
* However note that the actual resolution is limited by
* server.hz. */
- mstime_t now_ms = mstime();
if (c->bpop.timeout != 0 && c->bpop.timeout < now_ms) {
+ /* Handle blocking operation specific timeout. */
replyToBlockedClientTimedOut(c);
unblockClient(c);
+ } else if (server.cluster_enabled) {
+ /* Cluster: handle unblock & redirect of clients blocked
+ * into keys no longer served by this server. */
+ if (clusterRedirectBlockedClientIfNeeded(c))
+ unblockClient(c);
}
}
return 0;
@@ -936,41 +845,135 @@ int clientsCronHandleTimeout(redisClient *c) {
* free space not used, this function reclaims space if needed.
*
* The function always returns 0 as it never terminates the client. */
-int clientsCronResizeQueryBuffer(redisClient *c) {
+int clientsCronResizeQueryBuffer(client *c) {
size_t querybuf_size = sdsAllocSize(c->querybuf);
time_t idletime = server.unixtime - c->lastinteraction;
/* There are two conditions to resize the query buffer:
* 1) Query buffer is > BIG_ARG and too big for latest peak.
- * 2) Client is inactive and the buffer is bigger than 1k. */
- if (((querybuf_size > REDIS_MBULK_BIG_ARG) &&
- (querybuf_size/(c->querybuf_peak+1)) > 2) ||
- (querybuf_size > 1024 && idletime > 2))
+ * 2) Query buffer is > BIG_ARG and client is idle. */
+ if (querybuf_size > PROTO_MBULK_BIG_ARG &&
+ ((querybuf_size/(c->querybuf_peak+1)) > 2 ||
+ idletime > 2))
{
- /* Only resize the query buffer if it is actually wasting space. */
- if (sdsavail(c->querybuf) > 1024) {
+ /* Only resize the query buffer if it is actually wasting
+ * at least a few kbytes. */
+ if (sdsavail(c->querybuf) > 1024*4) {
c->querybuf = sdsRemoveFreeSpace(c->querybuf);
}
}
/* Reset the peak again to capture the peak memory usage in the next
* cycle. */
c->querybuf_peak = 0;
+
+ /* Clients representing masters also use a "pending query buffer" that
+ * is the yet not applied part of the stream we are reading. Such buffer
+ * also needs resizing from time to time, otherwise after a very large
+ * transfer (a huge value or a big MIGRATE operation) it will keep using
+ * a lot of memory. */
+ if (c->flags & CLIENT_MASTER) {
+ /* There are two conditions to resize the pending query buffer:
+ * 1) Pending Query buffer is > LIMIT_PENDING_QUERYBUF.
+ * 2) Used length is smaller than pending_querybuf_size/2 */
+ size_t pending_querybuf_size = sdsAllocSize(c->pending_querybuf);
+ if(pending_querybuf_size > LIMIT_PENDING_QUERYBUF &&
+ sdslen(c->pending_querybuf) < (pending_querybuf_size/2))
+ {
+ c->pending_querybuf = sdsRemoveFreeSpace(c->pending_querybuf);
+ }
+ }
return 0;
}
+/* This function is used in order to track clients using the biggest amount
+ * of memory in the latest few seconds. This way we can provide such information
+ * in the INFO output (clients section), without having to do an O(N) scan for
+ * all the clients.
+ *
+ * This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots
+ * where we track, for each, the biggest client output and input buffers we
+ * saw in that slot. Every slot correspond to one of the latest seconds, since
+ * the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS.
+ *
+ * When we want to know what was recently the peak memory usage, we just scan
+ * such few slots searching for the maximum value. */
+#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8
+size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS];
+size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS];
+
+int clientsCronTrackExpansiveClients(client *c) {
+ size_t in_usage = sdsAllocSize(c->querybuf);
+ size_t out_usage = getClientOutputBufferMemoryUsage(c);
+ int i = server.unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS;
+ int zeroidx = (i+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS;
+
+ /* Always zero the next sample, so that when we switch to that second, we'll
+ * only register samples that are greater in that second without considering
+ * the history of such slot.
+ *
+ * Note: our index may jump to any random position if serverCron() is not
+ * called for some reason with the normal frequency, for instance because
+ * some slow command is called taking multiple seconds to execute. In that
+ * case our array may end containing data which is potentially older
+ * than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem
+ * since here we want just to track if "recently" there were very expansive
+ * clients from the POV of memory usage. */
+ ClientsPeakMemInput[zeroidx] = 0;
+ ClientsPeakMemOutput[zeroidx] = 0;
+
+ /* Track the biggest values observed so far in this slot. */
+ if (in_usage > ClientsPeakMemInput[i]) ClientsPeakMemInput[i] = in_usage;
+ if (out_usage > ClientsPeakMemOutput[i]) ClientsPeakMemOutput[i] = out_usage;
+
+ return 0; /* This function never terminates the client. */
+}
+
+/* Return the max samples in the memory usage of clients tracked by
+ * the function clientsCronTrackExpansiveClients(). */
+void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
+ size_t i = 0, o = 0;
+ for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) {
+ if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j];
+ if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j];
+ }
+ *in_usage = i;
+ *out_usage = o;
+}
+
+/* This function is called by serverCron() and is used in order to perform
+ * operations on clients that are important to perform constantly. For instance
+ * we use this function in order to disconnect clients after a timeout, including
+ * clients blocked in some blocking command with a non-zero timeout.
+ *
+ * The function makes some effort to process all the clients every second, even
+ * if this cannot be strictly guaranteed, since serverCron() may be called with
+ * an actual frequency lower than server.hz in case of latency events like slow
+ * commands.
+ *
+ * It is very important for this function, and the functions it calls, to be
+ * very fast: sometimes Redis has tens of hundreds of connected clients, and the
+ * default server.hz value is 10, so sometimes here we need to process thousands
+ * of clients per second, turning this function into a source of latency.
+ */
+#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
- /* Make sure to process at least 1/(server.hz*10) of clients per call.
- * Since this function is called server.hz times per second we are sure that
- * in the worst case we process all the clients in 10 seconds.
- * In normal conditions (a reasonable number of clients) we process
- * all the clients in a shorter time. */
+ /* Try to process at least numclients/server.hz of clients
+ * per call. Since normally (if there are no big latency events) this
+ * function is called server.hz times per second, in the average case we
+ * process all the clients in 1 second. */
int numclients = listLength(server.clients);
- int iterations = numclients/(server.hz*10);
+ int iterations = numclients/server.hz;
+ mstime_t now = mstime();
+
+ /* Process at least a few clients while we are at it, even if we need
+ * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
+ * of processing each client once per second. */
+ if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
+ iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
+ numclients : CLIENTS_CRON_MIN_ITERATIONS;
- if (iterations < 50)
- iterations = (numclients < 50) ? numclients : 50;
while(listLength(server.clients) && iterations--) {
- redisClient *c;
+ client *c;
listNode *head;
/* Rotate the list, take the current head, process.
@@ -982,8 +985,9 @@ void clientsCron(void) {
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
- if (clientsCronHandleTimeout(c)) continue;
+ if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
+ if (clientsCronTrackExpansiveClients(c)) continue;
}
}
@@ -993,8 +997,15 @@ void clientsCron(void) {
void databasesCron(void) {
/* Expire keys by random sampling. Not required for slaves
* as master will synthesize DELs for us. */
- if (server.active_expire_enabled && server.masterhost == NULL)
+ if (server.active_expire_enabled && server.masterhost == NULL) {
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
+ } else if (server.masterhost != NULL) {
+ expireSlaveKeys();
+ }
+
+ /* Defrag keys gradually. */
+ if (server.active_defrag_enabled)
+ activeDefragCycle();
/* Perform hash tables rehashing if needed, but only if there are no
* other processes saving the DB on disk. Otherwise rehashing is bad
@@ -1005,7 +1016,7 @@ void databasesCron(void) {
* cron loop iteration. */
static unsigned int resize_db = 0;
static unsigned int rehash_db = 0;
- int dbs_per_call = REDIS_DBCRON_DBS_PER_CALL;
+ int dbs_per_call = CRON_DBS_PER_CALL;
int j;
/* Don't test more DBs than we have. */
@@ -1020,12 +1031,15 @@ void databasesCron(void) {
/* Rehash */
if (server.activerehashing) {
for (j = 0; j < dbs_per_call; j++) {
- int work_done = incrementallyRehash(rehash_db % server.dbnum);
- rehash_db++;
+ int work_done = incrementallyRehash(rehash_db);
if (work_done) {
/* If the function did some work, stop here, we'll do
* more at the next cron loop. */
break;
+ } else {
+ /* If this db didn't need rehash, we'll try the next one. */
+ rehash_db++;
+ rehash_db %= server.dbnum;
}
}
}
@@ -1037,8 +1051,17 @@ void databasesCron(void) {
* every object access, and accuracy is not needed. To access a global var is
* a lot faster than calling time(NULL) */
void updateCachedTime(void) {
- server.unixtime = time(NULL);
+ time_t unixtime = time(NULL);
+ atomicSet(server.unixtime,unixtime);
server.mstime = mstime();
+
+ /* To get information about daylight saving time, we need to call localtime_r
+ * and cache the result. However calling localtime_r in this context is safe
+ * since we will never fork() while here, in the main thread. The logging
+ * function will call a thread safe version of localtime that has no locks. */
+ struct tm tm;
+ localtime_r(&server.unixtime,&tm);
+ server.daylight_active = tm.tm_isdst;
}
/* This is our timer interrupt, called server.hz times per second.
@@ -1062,9 +1085,9 @@ void updateCachedTime(void) {
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
int j;
- REDIS_NOTUSED(eventLoop);
- REDIS_NOTUSED(id);
- REDIS_NOTUSED(clientData);
+ UNUSED(eventLoop);
+ UNUSED(id);
+ UNUSED(clientData);
/* Software watchdog: deliver the SIGALRM that will reach the signal
* handler if we don't return here fast enough. */
@@ -1073,15 +1096,30 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Update the time cache. */
updateCachedTime();
+ server.hz = server.config_hz;
+ /* Adapt the server.hz value to the number of configured clients. If we have
+ * many clients, we want to call serverCron() with an higher frequency. */
+ if (server.dynamic_hz) {
+ while (listLength(server.clients) / server.hz >
+ MAX_CLIENTS_PER_CLOCK_TICK)
+ {
+ server.hz *= 2;
+ if (server.hz > CONFIG_MAX_HZ) {
+ server.hz = CONFIG_MAX_HZ;
+ break;
+ }
+ }
+ }
+
run_with_period(100) {
- trackInstantaneousMetric(REDIS_METRIC_COMMAND,server.stat_numcommands);
- trackInstantaneousMetric(REDIS_METRIC_NET_INPUT,
+ trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);
+ trackInstantaneousMetric(STATS_METRIC_NET_INPUT,
server.stat_net_input_bytes);
- trackInstantaneousMetric(REDIS_METRIC_NET_OUTPUT,
+ trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
server.stat_net_output_bytes);
}
- /* We have just REDIS_LRU_BITS bits per object for LRU information.
+ /* We have just LRU_BITS bits per object for LRU information.
* So we use an (eventually wrapping) LRU clock.
*
* Note that even if the counter wraps it's not a big problem,
@@ -1091,21 +1129,47 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* not likely.
*
* Note that you can change the resolution altering the
- * REDIS_LRU_CLOCK_RESOLUTION define. */
- server.lruclock = getLRUClock();
+ * LRU_CLOCK_RESOLUTION define. */
+ unsigned long lruclock = getLRUClock();
+ atomicSet(server.lruclock,lruclock);
/* Record the max memory used since the server was started. */
if (zmalloc_used_memory() > server.stat_peak_memory)
server.stat_peak_memory = zmalloc_used_memory();
- /* Sample the RSS here since this is a relatively slow call. */
- server.resident_set_size = zmalloc_get_rss();
+ run_with_period(100) {
+ /* Sample the RSS and other metrics here since this is a relatively slow call.
+ * We must sample the zmalloc_used at the same time we take the rss, otherwise
+ * the frag ratio calculate may be off (ratio of two samples at different times) */
+ server.cron_malloc_stats.process_rss = zmalloc_get_rss();
+ server.cron_malloc_stats.zmalloc_used = zmalloc_used_memory();
+ /* Sampling the allcator info can be slow too.
+ * The fragmentation ratio it'll show is potentically more accurate
+ * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
+ * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
+ zmalloc_get_allocator_info(&server.cron_malloc_stats.allocator_allocated,
+ &server.cron_malloc_stats.allocator_active,
+ &server.cron_malloc_stats.allocator_resident);
+ /* in case the allocator isn't providing these stats, fake them so that
+ * fragmention info still shows some (inaccurate metrics) */
+ if (!server.cron_malloc_stats.allocator_resident) {
+ /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS,
+ * so we must desuct it in order to be able to calculate correct
+ * "allocator fragmentation" ratio */
+ size_t lua_memory = lua_gc(server.lua,LUA_GCCOUNT,0)*1024LL;
+ server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss - lua_memory;
+ }
+ if (!server.cron_malloc_stats.allocator_active)
+ server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
+ if (!server.cron_malloc_stats.allocator_allocated)
+ server.cron_malloc_stats.allocator_allocated = server.cron_malloc_stats.zmalloc_used;
+ }
/* We received a SIGTERM, shutting down here in a safe way, as it is
* not ok doing so inside the signal handler. */
if (server.shutdown_asap) {
- if (prepareForShutdown(0) == REDIS_OK) exit(0);
- redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
+ if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) exit(0);
+ serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
server.shutdown_asap = 0;
}
@@ -1118,7 +1182,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
used = dictSize(server.db[j].dict);
vkeys = dictSize(server.db[j].expires);
if (used || vkeys) {
- redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
+ serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
/* dictPrintStats(server.dict); */
}
}
@@ -1127,7 +1191,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Show information about connected clients */
if (!server.sentinel_mode) {
run_with_period(5000) {
- redisLog(REDIS_VERBOSE,
+ serverLog(LL_VERBOSE,
"%lu clients connected (%lu slaves), %zu bytes in use",
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
@@ -1150,7 +1214,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
/* Check if a background saving or AOF rewrite in progress terminated. */
- if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) {
+ if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||
+ ldbPendingChildren())
+ {
int statloc;
pid_t pid;
@@ -1160,54 +1226,68 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
- if (pid == server.rdb_child_pid) {
+ if (pid == -1) {
+ serverLog(LL_WARNING,"wait3() returned an error: %s. "
+ "rdb_child_pid = %d, aof_child_pid = %d",
+ strerror(errno),
+ (int) server.rdb_child_pid,
+ (int) server.aof_child_pid);
+ } else if (pid == server.rdb_child_pid) {
backgroundSaveDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
} else if (pid == server.aof_child_pid) {
backgroundRewriteDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
} else {
- redisLog(REDIS_WARNING,
- "Warning, detected child with unmatched pid: %ld",
- (long)pid);
+ if (!ldbRemoveChild(pid)) {
+ serverLog(LL_WARNING,
+ "Warning, detected child with unmatched pid: %ld",
+ (long)pid);
+ }
}
updateDictResizePolicy();
+ closeChildInfoPipe();
}
} else {
/* If there is not a background saving/rewrite in progress check if
- * we have to save/rewrite now */
- for (j = 0; j < server.saveparamslen; j++) {
+ * we have to save/rewrite now. */
+ for (j = 0; j < server.saveparamslen; j++) {
struct saveparam *sp = server.saveparams+j;
/* Save if we reached the given amount of changes,
* the given amount of seconds, and if the latest bgsave was
* successful or if, in case of an error, at least
- * REDIS_BGSAVE_RETRY_DELAY seconds already elapsed. */
+ * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
if (server.dirty >= sp->changes &&
server.unixtime-server.lastsave > sp->seconds &&
(server.unixtime-server.lastbgsave_try >
- REDIS_BGSAVE_RETRY_DELAY ||
- server.lastbgsave_status == REDIS_OK))
+ CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
{
- redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
+ serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
- rdbSaveBackground(server.rdb_filename);
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ rdbSaveBackground(server.rdb_filename,rsiptr);
break;
}
- }
-
- /* Trigger an AOF rewrite if needed */
- if (server.rdb_child_pid == -1 &&
- server.aof_child_pid == -1 &&
- server.aof_rewrite_perc &&
- server.aof_current_size > server.aof_rewrite_min_size)
- {
+ }
+
+ /* Trigger an AOF rewrite if needed. */
+ if (server.aof_state == AOF_ON &&
+ server.rdb_child_pid == -1 &&
+ server.aof_child_pid == -1 &&
+ server.aof_rewrite_perc &&
+ server.aof_current_size > server.aof_rewrite_min_size)
+ {
long long base = server.aof_rewrite_base_size ?
- server.aof_rewrite_base_size : 1;
+ server.aof_rewrite_base_size : 1;
long long growth = (server.aof_current_size*100/base) - 100;
if (growth >= server.aof_rewrite_perc) {
- redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
+ serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
rewriteAppendOnlyFileBackground();
}
- }
+ }
}
@@ -1220,7 +1300,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* however to try every second is enough in case of 'hz' is set to
* an higher frequency. */
run_with_period(1000) {
- if (server.aof_last_write_status == REDIS_ERR)
+ if (server.aof_last_write_status == C_ERR)
flushAppendOnlyFile(0);
}
@@ -1228,10 +1308,10 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
freeClientsInAsyncFreeQueue();
/* Clear the paused clients flag if needed. */
- clientsArePaused(); /* Don't check return value, just use the side effect. */
+ clientsArePaused(); /* Don't check return value, just use the side effect.*/
- /* Replication cron function -- used to reconnect to master and
- * to detect transfer failures. */
+ /* Replication cron function -- used to reconnect to master,
+ * detect transfer failures, start background RDB transfers and so forth. */
run_with_period(1000) replicationCron();
/* Run the Redis Cluster cron. */
@@ -1249,6 +1329,24 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
migrateCloseTimedoutSockets();
}
+ /* Start a scheduled BGSAVE if the corresponding flag is set. This is
+ * useful when we are forced to postpone a BGSAVE because an AOF
+ * rewrite is in progress.
+ *
+ * Note: this code must be after the replicationCron() call above so
+ * make sure when refactoring this file to keep this order. This is useful
+ * because we want to give priority to RDB savings for replication. */
+ if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
+ server.rdb_bgsave_scheduled &&
+ (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
+ server.lastbgsave_status == C_OK))
+ {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK)
+ server.rdb_bgsave_scheduled = 0;
+ }
+
server.cronloops++;
return 1000/server.hz;
}
@@ -1257,7 +1355,13 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors. */
void beforeSleep(struct aeEventLoop *eventLoop) {
- REDIS_NOTUSED(eventLoop);
+ UNUSED(eventLoop);
+
+ /* Call the Redis Cluster before sleep function. Note that this function
+ * may change the state of Redis Cluster (from ok to fail or vice versa),
+ * so it's a good idea to call it before serving the unblocked clients
+ * later in this function. */
+ if (server.cluster_enabled) clusterBeforeSleep();
/* Run a fast expire cycle (the called function will return
* ASAP if a fast cycle is not needed). */
@@ -1284,6 +1388,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
if (listLength(server.clients_waiting_acks))
processClientsWaitingReplicas();
+ /* Check if there are clients unblocked by modules that implement
+ * blocking commands. */
+ moduleHandleBlockedClients();
+
/* Try to process pending commands for clients that were just unblocked. */
if (listLength(server.unblocked_clients))
processUnblockedClients();
@@ -1291,8 +1399,21 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
/* Write the AOF buffer on disk */
flushAppendOnlyFile(0);
- /* Call the Redis Cluster before sleep function. */
- if (server.cluster_enabled) clusterBeforeSleep();
+ /* Handle writes with pending output buffers. */
+ handleClientsWithPendingWrites();
+
+ /* Before we are going to sleep, let the threads access the dataset by
+ * releasing the GIL. Redis main thread will not touch anything at this
+ * time. */
+ if (moduleCount()) moduleReleaseGIL();
+}
+
+/* This function is called immadiately after the event loop multiplexing
+ * API returned, and the control is going to soon return to Redis by invoking
+ * the different events callbacks. */
+void afterSleep(struct aeEventLoop *eventLoop) {
+ UNUSED(eventLoop);
+ if (moduleCount()) moduleAcquireGIL();
}
/* =========================== Server initialization ======================== */
@@ -1300,61 +1421,61 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
void createSharedObjects(void) {
int j;
- shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
- shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
- shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
- shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
- shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
- shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
- shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n"));
- shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
- shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
- shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
- shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
- shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
- shared.emptyscan = createObject(REDIS_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
- shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
+ shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
+ shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
+ shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
+ shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
+ shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
+ shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
+ shared.cnegone = createObject(OBJ_STRING,sdsnew(":-1\r\n"));
+ shared.nullbulk = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
+ shared.nullmultibulk = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
+ shared.emptymultibulk = createObject(OBJ_STRING,sdsnew("*0\r\n"));
+ shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
+ shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
+ shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
+ shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
"-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
- shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
+ shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
"-ERR no such key\r\n"));
- shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
+ shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
"-ERR syntax error\r\n"));
- shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
+ shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
"-ERR source and destination objects are the same\r\n"));
- shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
+ shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
"-ERR index out of range\r\n"));
- shared.noscripterr = createObject(REDIS_STRING,sdsnew(
+ shared.noscripterr = createObject(OBJ_STRING,sdsnew(
"-NOSCRIPT No matching script. Please use EVAL.\r\n"));
- shared.loadingerr = createObject(REDIS_STRING,sdsnew(
+ shared.loadingerr = createObject(OBJ_STRING,sdsnew(
"-LOADING Redis is loading the dataset in memory\r\n"));
- shared.slowscripterr = createObject(REDIS_STRING,sdsnew(
+ shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
"-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
- shared.masterdownerr = createObject(REDIS_STRING,sdsnew(
+ shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
"-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
- shared.bgsaveerr = createObject(REDIS_STRING,sdsnew(
- "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
- shared.roslaveerr = createObject(REDIS_STRING,sdsnew(
+ shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
+ "-MISCONF Redis is configured to save RDB snapshots, but it is currently not able to persist on disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n"));
+ shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
"-READONLY You can't write against a read only slave.\r\n"));
- shared.noautherr = createObject(REDIS_STRING,sdsnew(
+ shared.noautherr = createObject(OBJ_STRING,sdsnew(
"-NOAUTH Authentication required.\r\n"));
- shared.oomerr = createObject(REDIS_STRING,sdsnew(
+ shared.oomerr = createObject(OBJ_STRING,sdsnew(
"-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
- shared.execaborterr = createObject(REDIS_STRING,sdsnew(
+ shared.execaborterr = createObject(OBJ_STRING,sdsnew(
"-EXECABORT Transaction discarded because of previous errors.\r\n"));
- shared.noreplicaserr = createObject(REDIS_STRING,sdsnew(
+ shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
"-NOREPLICAS Not enough good slaves to write.\r\n"));
- shared.busykeyerr = createObject(REDIS_STRING,sdsnew(
+ shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
"-BUSYKEY Target key name already exists.\r\n"));
- shared.space = createObject(REDIS_STRING,sdsnew(" "));
- shared.colon = createObject(REDIS_STRING,sdsnew(":"));
- shared.plus = createObject(REDIS_STRING,sdsnew("+"));
+ shared.space = createObject(OBJ_STRING,sdsnew(" "));
+ shared.colon = createObject(OBJ_STRING,sdsnew(":"));
+ shared.plus = createObject(OBJ_STRING,sdsnew("+"));
- for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) {
+ for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
char dictid_str[64];
int dictid_len;
dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
- shared.select[j] = createObject(REDIS_STRING,
+ shared.select[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),
"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
dictid_len, dictid_str));
@@ -1366,150 +1487,192 @@ void createSharedObjects(void) {
shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
shared.del = createStringObject("DEL",3);
+ shared.unlink = createStringObject("UNLINK",6);
shared.rpop = createStringObject("RPOP",4);
shared.lpop = createStringObject("LPOP",4);
shared.lpush = createStringObject("LPUSH",5);
- for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
- shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
- shared.integers[j]->encoding = REDIS_ENCODING_INT;
+ shared.zpopmin = createStringObject("ZPOPMIN",7);
+ shared.zpopmax = createStringObject("ZPOPMAX",7);
+ for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
+ shared.integers[j] =
+ makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
+ shared.integers[j]->encoding = OBJ_ENCODING_INT;
}
- for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) {
- shared.mbulkhdr[j] = createObject(REDIS_STRING,
+ for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
+ shared.mbulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"*%d\r\n",j));
- shared.bulkhdr[j] = createObject(REDIS_STRING,
+ shared.bulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"$%d\r\n",j));
}
/* The following two shared objects, minstring and maxstrings, are not
* actually used for their value but as a special object meaning
* respectively the minimum possible string and the maximum possible
* string in string comparisons for the ZRANGEBYLEX command. */
- shared.minstring = createStringObject("minstring",9);
- shared.maxstring = createStringObject("maxstring",9);
+ shared.minstring = sdsnew("minstring");
+ shared.maxstring = sdsnew("maxstring");
}
void initServerConfig(void) {
int j;
- getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE);
+ pthread_mutex_init(&server.next_client_id_mutex,NULL);
+ pthread_mutex_init(&server.lruclock_mutex,NULL);
+ pthread_mutex_init(&server.unixtime_mutex,NULL);
+
+ updateCachedTime();
+ getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
+ server.runid[CONFIG_RUN_ID_SIZE] = '\0';
+ changeReplicationId();
+ clearReplicationId2();
+ server.timezone = timezone; /* Initialized by tzset(). */
server.configfile = NULL;
- server.hz = REDIS_DEFAULT_HZ;
- server.runid[REDIS_RUN_ID_SIZE] = '\0';
+ server.executable = NULL;
+ server.config_hz = CONFIG_DEFAULT_HZ;
+ server.dynamic_hz = CONFIG_DEFAULT_DYNAMIC_HZ;
server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
- server.port = REDIS_SERVERPORT;
- server.tcp_backlog = REDIS_TCP_BACKLOG;
+ server.port = CONFIG_DEFAULT_SERVER_PORT;
+ server.tcp_backlog = CONFIG_DEFAULT_TCP_BACKLOG;
server.bindaddr_count = 0;
server.unixsocket = NULL;
- server.unixsocketperm = REDIS_DEFAULT_UNIX_SOCKET_PERM;
+ server.unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM;
server.ipfd_count = 0;
server.sofd = -1;
- server.dbnum = REDIS_DEFAULT_DBNUM;
- server.verbosity = REDIS_DEFAULT_VERBOSITY;
- server.maxidletime = REDIS_MAXIDLETIME;
- server.tcpkeepalive = REDIS_DEFAULT_TCP_KEEPALIVE;
+ server.protected_mode = CONFIG_DEFAULT_PROTECTED_MODE;
+ server.dbnum = CONFIG_DEFAULT_DBNUM;
+ server.verbosity = CONFIG_DEFAULT_VERBOSITY;
+ server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT;
+ server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE;
server.active_expire_enabled = 1;
- server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN;
+ server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG;
+ server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES;
+ server.active_defrag_threshold_lower = CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER;
+ server.active_defrag_threshold_upper = CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER;
+ server.active_defrag_cycle_min = CONFIG_DEFAULT_DEFRAG_CYCLE_MIN;
+ server.active_defrag_cycle_max = CONFIG_DEFAULT_DEFRAG_CYCLE_MAX;
+ server.active_defrag_max_scan_fields = CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS;
+ server.proto_max_bulk_len = CONFIG_DEFAULT_PROTO_MAX_BULK_LEN;
+ server.client_max_querybuf_len = PROTO_MAX_QUERYBUF_LEN;
server.saveparams = NULL;
server.loading = 0;
- server.logfile = zstrdup(REDIS_DEFAULT_LOGFILE);
- server.syslog_enabled = REDIS_DEFAULT_SYSLOG_ENABLED;
- server.syslog_ident = zstrdup(REDIS_DEFAULT_SYSLOG_IDENT);
+ server.logfile = zstrdup(CONFIG_DEFAULT_LOGFILE);
+ server.syslog_enabled = CONFIG_DEFAULT_SYSLOG_ENABLED;
+ server.syslog_ident = zstrdup(CONFIG_DEFAULT_SYSLOG_IDENT);
server.syslog_facility = LOG_LOCAL0;
- server.daemonize = REDIS_DEFAULT_DAEMONIZE;
+ server.daemonize = CONFIG_DEFAULT_DAEMONIZE;
server.supervised = 0;
- server.supervised_mode = REDIS_SUPERVISED_NONE;
- server.aof_state = REDIS_AOF_OFF;
- server.aof_fsync = REDIS_DEFAULT_AOF_FSYNC;
- server.aof_no_fsync_on_rewrite = REDIS_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
- server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC;
- server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE;
+ server.supervised_mode = SUPERVISED_NONE;
+ server.aof_state = AOF_OFF;
+ server.aof_fsync = CONFIG_DEFAULT_AOF_FSYNC;
+ server.aof_no_fsync_on_rewrite = CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
+ server.aof_rewrite_perc = AOF_REWRITE_PERC;
+ server.aof_rewrite_min_size = AOF_REWRITE_MIN_SIZE;
server.aof_rewrite_base_size = 0;
server.aof_rewrite_scheduled = 0;
server.aof_last_fsync = time(NULL);
server.aof_rewrite_time_last = -1;
server.aof_rewrite_time_start = -1;
- server.aof_lastbgrewrite_status = REDIS_OK;
+ server.aof_lastbgrewrite_status = C_OK;
server.aof_delayed_fsync = 0;
server.aof_fd = -1;
server.aof_selected_db = -1; /* Make sure the first time will not match */
server.aof_flush_postponed_start = 0;
- server.aof_rewrite_incremental_fsync = REDIS_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
- server.aof_load_truncated = REDIS_DEFAULT_AOF_LOAD_TRUNCATED;
+ server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
+ server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC;
+ server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
+ server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE;
server.pidfile = NULL;
- server.rdb_filename = zstrdup(REDIS_DEFAULT_RDB_FILENAME);
- server.aof_filename = zstrdup(REDIS_DEFAULT_AOF_FILENAME);
+ server.rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME);
+ server.aof_filename = zstrdup(CONFIG_DEFAULT_AOF_FILENAME);
server.requirepass = NULL;
- server.rdb_compression = REDIS_DEFAULT_RDB_COMPRESSION;
- server.rdb_checksum = REDIS_DEFAULT_RDB_CHECKSUM;
- server.stop_writes_on_bgsave_err = REDIS_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR;
- server.activerehashing = REDIS_DEFAULT_ACTIVE_REHASHING;
+ server.rdb_compression = CONFIG_DEFAULT_RDB_COMPRESSION;
+ server.rdb_checksum = CONFIG_DEFAULT_RDB_CHECKSUM;
+ server.stop_writes_on_bgsave_err = CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR;
+ server.activerehashing = CONFIG_DEFAULT_ACTIVE_REHASHING;
+ server.active_defrag_running = 0;
server.notify_keyspace_events = 0;
- server.maxclients = REDIS_MAX_CLIENTS;
- server.bpop_blocked_clients = 0;
- server.maxmemory = REDIS_DEFAULT_MAXMEMORY;
- server.maxmemory_policy = REDIS_DEFAULT_MAXMEMORY_POLICY;
- server.maxmemory_samples = REDIS_DEFAULT_MAXMEMORY_SAMPLES;
- server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES;
- server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE;
- server.list_max_ziplist_size = REDIS_LIST_MAX_ZIPLIST_SIZE;
- server.list_compress_depth = REDIS_LIST_COMPRESS_DEPTH;
- server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES;
- server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES;
- server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE;
- server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES;
+ server.maxclients = CONFIG_DEFAULT_MAX_CLIENTS;
+ server.blocked_clients = 0;
+ memset(server.blocked_clients_by_type,0,
+ sizeof(server.blocked_clients_by_type));
+ server.maxmemory = CONFIG_DEFAULT_MAXMEMORY;
+ server.maxmemory_policy = CONFIG_DEFAULT_MAXMEMORY_POLICY;
+ server.maxmemory_samples = CONFIG_DEFAULT_MAXMEMORY_SAMPLES;
+ server.lfu_log_factor = CONFIG_DEFAULT_LFU_LOG_FACTOR;
+ server.lfu_decay_time = CONFIG_DEFAULT_LFU_DECAY_TIME;
+ server.hash_max_ziplist_entries = OBJ_HASH_MAX_ZIPLIST_ENTRIES;
+ server.hash_max_ziplist_value = OBJ_HASH_MAX_ZIPLIST_VALUE;
+ server.list_max_ziplist_size = OBJ_LIST_MAX_ZIPLIST_SIZE;
+ server.list_compress_depth = OBJ_LIST_COMPRESS_DEPTH;
+ server.set_max_intset_entries = OBJ_SET_MAX_INTSET_ENTRIES;
+ server.zset_max_ziplist_entries = OBJ_ZSET_MAX_ZIPLIST_ENTRIES;
+ server.zset_max_ziplist_value = OBJ_ZSET_MAX_ZIPLIST_VALUE;
+ server.hll_sparse_max_bytes = CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES;
+ server.stream_node_max_bytes = OBJ_STREAM_NODE_MAX_BYTES;
+ server.stream_node_max_entries = OBJ_STREAM_NODE_MAX_ENTRIES;
server.shutdown_asap = 0;
- server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD;
- server.repl_timeout = REDIS_REPL_TIMEOUT;
- server.repl_min_slaves_to_write = REDIS_DEFAULT_MIN_SLAVES_TO_WRITE;
- server.repl_min_slaves_max_lag = REDIS_DEFAULT_MIN_SLAVES_MAX_LAG;
server.cluster_enabled = 0;
- server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
- server.cluster_migration_barrier = REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER;
- server.cluster_slave_validity_factor = REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY;
- server.cluster_require_full_coverage = REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
- server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE);
- server.lua_caller = NULL;
- server.lua_time_limit = REDIS_LUA_TIME_LIMIT;
- server.lua_client = NULL;
- server.lua_timedout = 0;
+ server.cluster_node_timeout = CLUSTER_DEFAULT_NODE_TIMEOUT;
+ server.cluster_migration_barrier = CLUSTER_DEFAULT_MIGRATION_BARRIER;
+ server.cluster_slave_validity_factor = CLUSTER_DEFAULT_SLAVE_VALIDITY;
+ server.cluster_require_full_coverage = CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
+ server.cluster_slave_no_failover = CLUSTER_DEFAULT_SLAVE_NO_FAILOVER;
+ server.cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
+ server.cluster_announce_ip = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP;
+ server.cluster_announce_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT;
+ server.cluster_announce_bus_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT;
server.migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL);
server.next_client_id = 1; /* Client IDs, start from 1 .*/
server.loading_process_events_interval_bytes = (1024*1024*2);
-
- server.lruclock = getLRUClock();
+ server.lazyfree_lazy_eviction = CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION;
+ server.lazyfree_lazy_expire = CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE;
+ server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL;
+ server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO;
+ server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT;
+
+ unsigned int lruclock = getLRUClock();
+ atomicSet(server.lruclock,lruclock);
resetServerSaveParams();
appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
+
/* Replication related */
server.masterauth = NULL;
server.masterhost = NULL;
server.masterport = 6379;
server.master = NULL;
server.cached_master = NULL;
- server.repl_master_initial_offset = -1;
- server.repl_state = REDIS_REPL_NONE;
- server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT;
- server.repl_serve_stale_data = REDIS_DEFAULT_SLAVE_SERVE_STALE_DATA;
- server.repl_slave_ro = REDIS_DEFAULT_SLAVE_READ_ONLY;
+ server.master_initial_offset = -1;
+ server.repl_state = REPL_STATE_NONE;
+ server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
+ server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA;
+ server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY;
+ server.repl_slave_lazy_flush = CONFIG_DEFAULT_SLAVE_LAZY_FLUSH;
server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
- server.repl_disable_tcp_nodelay = REDIS_DEFAULT_REPL_DISABLE_TCP_NODELAY;
- server.repl_diskless_sync = REDIS_DEFAULT_REPL_DISKLESS_SYNC;
- server.repl_diskless_sync_delay = REDIS_DEFAULT_REPL_DISKLESS_SYNC_DELAY;
- server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY;
+ server.repl_disable_tcp_nodelay = CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY;
+ server.repl_diskless_sync = CONFIG_DEFAULT_REPL_DISKLESS_SYNC;
+ server.repl_diskless_sync_delay = CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY;
+ server.repl_ping_slave_period = CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD;
+ server.repl_timeout = CONFIG_DEFAULT_REPL_TIMEOUT;
+ server.repl_min_slaves_to_write = CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE;
+ server.repl_min_slaves_max_lag = CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG;
+ server.slave_priority = CONFIG_DEFAULT_SLAVE_PRIORITY;
+ server.slave_announce_ip = CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP;
+ server.slave_announce_port = CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT;
server.master_repl_offset = 0;
/* Replication partial resync backlog */
server.repl_backlog = NULL;
- server.repl_backlog_size = REDIS_DEFAULT_REPL_BACKLOG_SIZE;
+ server.repl_backlog_size = CONFIG_DEFAULT_REPL_BACKLOG_SIZE;
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
server.repl_backlog_off = 0;
- server.repl_backlog_time_limit = REDIS_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
+ server.repl_backlog_time_limit = CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
server.repl_no_slaves_since = time(NULL);
/* Client output buffer limits */
- for (j = 0; j < REDIS_CLIENT_TYPE_COUNT; j++)
+ for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
/* Double constants initialization */
@@ -1529,13 +1692,20 @@ void initServerConfig(void) {
server.lpushCommand = lookupCommandByCString("lpush");
server.lpopCommand = lookupCommandByCString("lpop");
server.rpopCommand = lookupCommandByCString("rpop");
+ server.zpopminCommand = lookupCommandByCString("zpopmin");
+ server.zpopmaxCommand = lookupCommandByCString("zpopmax");
+ server.sremCommand = lookupCommandByCString("srem");
+ server.execCommand = lookupCommandByCString("exec");
+ server.expireCommand = lookupCommandByCString("expire");
+ server.pexpireCommand = lookupCommandByCString("pexpire");
+ server.xclaimCommand = lookupCommandByCString("xclaim");
/* Slow log */
- server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN;
- server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN;
+ server.slowlog_log_slower_than = CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN;
+ server.slowlog_max_len = CONFIG_DEFAULT_SLOWLOG_MAX_LEN;
/* Latency monitor */
- server.latency_monitor_threshold = REDIS_DEFAULT_LATENCY_MONITOR_THRESHOLD;
+ server.latency_monitor_threshold = CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD;
/* Debugging */
server.assert_failed = "<no assertion failed>";
@@ -1545,22 +1715,88 @@ void initServerConfig(void) {
server.watchdog_period = 0;
}
+extern char **environ;
+
+/* Restart the server, executing the same executable that started this
+ * instance, with the same arguments and configuration file.
+ *
+ * The function is designed to directly call execve() so that the new
+ * server instance will retain the PID of the previous one.
+ *
+ * The list of flags, that may be bitwise ORed together, alter the
+ * behavior of this function:
+ *
+ * RESTART_SERVER_NONE No flags.
+ * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting.
+ * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting.
+ *
+ * On success the function does not return, because the process turns into
+ * a different process. On error C_ERR is returned. */
+int restartServer(int flags, mstime_t delay) {
+ int j;
+
+ /* Check if we still have accesses to the executable that started this
+ * server instance. */
+ if (access(server.executable,X_OK) == -1) {
+ serverLog(LL_WARNING,"Can't restart: this process has no "
+ "permissions to execute %s", server.executable);
+ return C_ERR;
+ }
+
+ /* Config rewriting. */
+ if (flags & RESTART_SERVER_CONFIG_REWRITE &&
+ server.configfile &&
+ rewriteConfig(server.configfile) == -1)
+ {
+ serverLog(LL_WARNING,"Can't restart: configuration rewrite process "
+ "failed");
+ return C_ERR;
+ }
+
+ /* Perform a proper shutdown. */
+ if (flags & RESTART_SERVER_GRACEFULLY &&
+ prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK)
+ {
+ serverLog(LL_WARNING,"Can't restart: error preparing for shutdown");
+ return C_ERR;
+ }
+
+ /* Close all file descriptors, with the exception of stdin, stdout, strerr
+ * which are useful if we restart a Redis server which is not daemonized. */
+ for (j = 3; j < (int)server.maxclients + 1024; j++) {
+ /* Test the descriptor validity before closing it, otherwise
+ * Valgrind issues a warning on close(). */
+ if (fcntl(j,F_GETFD) != -1) close(j);
+ }
+
+ /* Execute the server with the original command line. */
+ if (delay) usleep(delay*1000);
+ zfree(server.exec_argv[0]);
+ server.exec_argv[0] = zstrdup(server.executable);
+ execve(server.executable,server.exec_argv,environ);
+
+ /* If an error occurred here, there is nothing we can do, but exit. */
+ _exit(1);
+
+ return C_ERR; /* Never reached. */
+}
+
/* This function will try to raise the max number of open files accordingly to
* the configured max number of clients. It also reserves a number of file
- * descriptors (REDIS_MIN_RESERVED_FDS) for extra operations of
+ * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
* persistence, listening sockets, log files and so forth.
*
* If it will not be possible to set the limit accordingly to the configured
* max number of clients, the function will do the reverse setting
* server.maxclients to the value that we can actually handle. */
void adjustOpenFilesLimit(void) {
- rlim_t maxfiles = server.maxclients+REDIS_MIN_RESERVED_FDS;
+ rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
struct rlimit limit;
if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
- redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
+ serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
strerror(errno));
- server.maxclients = 1024-REDIS_MIN_RESERVED_FDS;
+ server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
} else {
rlim_t oldlimit = limit.rlim_cur;
@@ -1592,31 +1828,34 @@ void adjustOpenFilesLimit(void) {
if (bestlimit < oldlimit) bestlimit = oldlimit;
if (bestlimit < maxfiles) {
- int old_maxclients = server.maxclients;
- server.maxclients = bestlimit-REDIS_MIN_RESERVED_FDS;
- if (server.maxclients < 1) {
- redisLog(REDIS_WARNING,"Your current 'ulimit -n' "
- "of %llu is not enough for Redis to start. "
+ unsigned int old_maxclients = server.maxclients;
+ server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
+ /* maxclients is unsigned so may overflow: in order
+ * to check if maxclients is now logically less than 1
+ * we test indirectly via bestlimit. */
+ if (bestlimit <= CONFIG_MIN_RESERVED_FDS) {
+ serverLog(LL_WARNING,"Your current 'ulimit -n' "
+ "of %llu is not enough for the server to start. "
"Please increase your open file limit to at least "
"%llu. Exiting.",
(unsigned long long) oldlimit,
(unsigned long long) maxfiles);
exit(1);
}
- redisLog(REDIS_WARNING,"You requested maxclients of %d "
+ serverLog(LL_WARNING,"You requested maxclients of %d "
"requiring at least %llu max file descriptors.",
old_maxclients,
(unsigned long long) maxfiles);
- redisLog(REDIS_WARNING,"Redis can't set maximum open files "
+ serverLog(LL_WARNING,"Server can't set maximum open files "
"to %llu because of OS error: %s.",
(unsigned long long) maxfiles, strerror(setrlimit_error));
- redisLog(REDIS_WARNING,"Current maximum open files is %llu. "
+ serverLog(LL_WARNING,"Current maximum open files is %llu. "
"maxclients has been reduced to %d to compensate for "
"low ulimit. "
"If you need higher maxclients increase 'ulimit -n'.",
(unsigned long long) bestlimit, server.maxclients);
} else {
- redisLog(REDIS_NOTICE,"Increased maximum number of open files "
+ serverLog(LL_NOTICE,"Increased maximum number of open files "
"to %llu (it was originally set to %llu).",
(unsigned long long) maxfiles,
(unsigned long long) oldlimit);
@@ -1635,7 +1874,7 @@ void checkTcpBacklogSettings(void) {
if (fgets(buf,sizeof(buf),fp) != NULL) {
int somaxconn = atoi(buf);
if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
- redisLog(REDIS_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
+ serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
}
}
fclose(fp);
@@ -1653,9 +1892,9 @@ void checkTcpBacklogSettings(void) {
* contains no specific addresses to bind, this function will try to
* bind * (all addresses) for both the IPv4 and IPv6 protocols.
*
- * On success the function returns REDIS_OK.
+ * On success the function returns C_OK.
*
- * On error the function returns REDIS_ERR. For the function to be on
+ * On error the function returns C_ERR. For the function to be on
* error, at least one of the server.bindaddr addresses was
* impossible to bind, or no bind addresses were specified in the server
* configuration but the function is not able to bind * for at least
@@ -1668,6 +1907,7 @@ int listenToPort(int port, int *fds, int *count) {
if (server.bindaddr_count == 0) server.bindaddr[0] = NULL;
for (j = 0; j < server.bindaddr_count || j == 0; j++) {
if (server.bindaddr[j] == NULL) {
+ int unsupported = 0;
/* Bind * for both IPv6 and IPv4, we enter here only if
* server.bindaddr_count == 0. */
fds[*count] = anetTcp6Server(server.neterr,port,NULL,
@@ -1675,17 +1915,27 @@ int listenToPort(int port, int *fds, int *count) {
if (fds[*count] != ANET_ERR) {
anetNonBlock(NULL,fds[*count]);
(*count)++;
+ } else if (errno == EAFNOSUPPORT) {
+ unsupported++;
+ serverLog(LL_WARNING,"Not listening to IPv6: unsupproted");
}
- fds[*count] = anetTcpServer(server.neterr,port,NULL,
- server.tcp_backlog);
- if (fds[*count] != ANET_ERR) {
- anetNonBlock(NULL,fds[*count]);
- (*count)++;
+
+ if (*count == 1 || unsupported) {
+ /* Bind the IPv4 address as well. */
+ fds[*count] = anetTcpServer(server.neterr,port,NULL,
+ server.tcp_backlog);
+ if (fds[*count] != ANET_ERR) {
+ anetNonBlock(NULL,fds[*count]);
+ (*count)++;
+ } else if (errno == EAFNOSUPPORT) {
+ unsupported++;
+ serverLog(LL_WARNING,"Not listening to IPv4: unsupproted");
+ }
}
- /* Exit the loop if we were able to bind * on IPv4 or IPv6,
+ /* Exit the loop if we were able to bind * on IPv4 and IPv6,
* otherwise fds[*count] will be ANET_ERR and we'll print an
* error and return to the caller with an error. */
- if (*count) break;
+ if (*count + unsupported == 2) break;
} else if (strchr(server.bindaddr[j],':')) {
/* Bind IPv6 address. */
fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j],
@@ -1696,16 +1946,16 @@ int listenToPort(int port, int *fds, int *count) {
server.tcp_backlog);
}
if (fds[*count] == ANET_ERR) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Creating Server TCP listening socket %s:%d: %s",
server.bindaddr[j] ? server.bindaddr[j] : "*",
port, server.neterr);
- return REDIS_ERR;
+ return C_ERR;
}
anetNonBlock(NULL,fds[*count]);
(*count)++;
}
- return REDIS_OK;
+ return C_OK;
}
/* Resets the stats that we expose via INFO or other means that we want
@@ -1717,16 +1967,23 @@ void resetServerStats(void) {
server.stat_numcommands = 0;
server.stat_numconnections = 0;
server.stat_expiredkeys = 0;
+ server.stat_expired_stale_perc = 0;
+ server.stat_expired_time_cap_reached_count = 0;
server.stat_evictedkeys = 0;
server.stat_keyspace_misses = 0;
server.stat_keyspace_hits = 0;
+ server.stat_active_defrag_hits = 0;
+ server.stat_active_defrag_misses = 0;
+ server.stat_active_defrag_key_hits = 0;
+ server.stat_active_defrag_key_misses = 0;
+ server.stat_active_defrag_scanned = 0;
server.stat_fork_time = 0;
server.stat_fork_rate = 0;
server.stat_rejected_conn = 0;
server.stat_sync_full = 0;
server.stat_sync_partial_ok = 0;
server.stat_sync_partial_err = 0;
- for (j = 0; j < REDIS_METRIC_COUNT; j++) {
+ for (j = 0; j < STATS_METRIC_COUNT; j++) {
server.inst_metric[j].idx = 0;
server.inst_metric[j].last_sample_time = mstime();
server.inst_metric[j].last_sample_count = 0;
@@ -1735,6 +1992,7 @@ void resetServerStats(void) {
}
server.stat_net_input_bytes = 0;
server.stat_net_output_bytes = 0;
+ server.aof_delayed_fsync = 0;
}
void initServer(void) {
@@ -1749,12 +2007,15 @@ void initServer(void) {
server.syslog_facility);
}
+ server.hz = server.config_hz;
server.pid = getpid();
server.current_client = NULL;
server.clients = listCreate();
+ server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
+ server.clients_pending_write = listCreate();
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
server.unblocked_clients = listCreate();
server.ready_keys = listCreate();
@@ -1765,12 +2026,18 @@ void initServer(void) {
createSharedObjects();
adjustOpenFilesLimit();
- server.el = aeCreateEventLoop(server.maxclients+REDIS_EVENTLOOP_FDSET_INCR);
+ server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
+ if (server.el == NULL) {
+ serverLog(LL_WARNING,
+ "Failed creating the event loop. Error message: '%s'",
+ strerror(errno));
+ exit(1);
+ }
server.db = zmalloc(sizeof(redisDb)*server.dbnum);
/* Open the TCP listening socket for the user commands. */
if (server.port != 0 &&
- listenToPort(server.port,server.ipfd,&server.ipfd_count) == REDIS_ERR)
+ listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
exit(1);
/* Open the listening Unix domain socket. */
@@ -1779,7 +2046,7 @@ void initServer(void) {
server.sofd = anetUnixServer(server.neterr,server.unixsocket,
server.unixsocketperm, server.tcp_backlog);
if (server.sofd == ANET_ERR) {
- redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr);
+ serverLog(LL_WARNING, "Opening Unix socket: %s", server.neterr);
exit(1);
}
anetNonBlock(NULL,server.sofd);
@@ -1787,7 +2054,7 @@ void initServer(void) {
/* Abort if there are no listening sockets at all. */
if (server.ipfd_count == 0 && server.sofd < 0) {
- redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting.");
+ serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
exit(1);
}
@@ -1796,12 +2063,13 @@ void initServer(void) {
server.db[j].dict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].ready_keys = dictCreate(&setDictType,NULL);
+ server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL);
server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
- server.db[j].eviction_pool = evictionPoolAlloc();
server.db[j].id = j;
server.db[j].avg_ttl = 0;
+ server.db[j].defrag_later = listCreate();
}
+ evictionPoolAlloc(); /* Initialize the LRU keys pool. */
server.pubsub_channels = dictCreate(&keylistDictType,NULL);
server.pubsub_patterns = listCreate();
listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
@@ -1809,7 +2077,11 @@ void initServer(void) {
server.cronloops = 0;
server.rdb_child_pid = -1;
server.aof_child_pid = -1;
- server.rdb_child_type = REDIS_RDB_CHILD_TYPE_NONE;
+ server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_bgsave_scheduled = 0;
+ server.child_info_pipe[0] = -1;
+ server.child_info_pipe[1] = -1;
+ server.child_info_data.magic = 0;
aofRewriteBufferReset();
server.aof_buf = sdsempty();
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
@@ -1821,17 +2093,23 @@ void initServer(void) {
/* A few stats we don't want to reset: server startup time, and peak mem. */
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
- server.resident_set_size = 0;
- server.lastbgsave_status = REDIS_OK;
- server.aof_last_write_status = REDIS_OK;
+ server.stat_rdb_cow_bytes = 0;
+ server.stat_aof_cow_bytes = 0;
+ server.cron_malloc_stats.zmalloc_used = 0;
+ server.cron_malloc_stats.process_rss = 0;
+ server.cron_malloc_stats.allocator_allocated = 0;
+ server.cron_malloc_stats.allocator_active = 0;
+ server.cron_malloc_stats.allocator_resident = 0;
+ server.lastbgsave_status = C_OK;
+ server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
- updateCachedTime();
- /* Create the serverCron() time event, that's our main way to process
- * background operations. */
- if(aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
- redisPanic("Can't create the serverCron time event.");
+ /* Create the timer callback, this is our way to process many background
+ * operations incrementally, like clients timeout, eviction of unaccessed
+ * expired keys and so forth. */
+ if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
+ serverPanic("Can't create event loop timers.");
exit(1);
}
@@ -1841,19 +2119,29 @@ void initServer(void) {
if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
acceptTcpHandler,NULL) == AE_ERR)
{
- redisPanic(
+ serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
}
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
- acceptUnixHandler,NULL) == AE_ERR) redisPanic("Unrecoverable error creating server.sofd file event.");
+ acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
+
+
+ /* Register a readable event for the pipe used to awake the event loop
+ * when a blocked client in a module needs attention. */
+ if (aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE,
+ moduleBlockedClientPipeReadable,NULL) == AE_ERR) {
+ serverPanic(
+ "Error registering the readable event for the module "
+ "blocked clients subsystem.");
+ }
/* Open the AOF file if needed. */
- if (server.aof_state == REDIS_AOF_ON) {
+ if (server.aof_state == AOF_ON) {
server.aof_fd = open(server.aof_filename,
O_WRONLY|O_APPEND|O_CREAT,0644);
if (server.aof_fd == -1) {
- redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
+ serverLog(LL_WARNING, "Can't open the append-only file: %s",
strerror(errno));
exit(1);
}
@@ -1864,17 +2152,18 @@ void initServer(void) {
* at 3 GB using maxmemory with 'noeviction' policy'. This avoids
* useless crashes of the Redis instance for out of memory. */
if (server.arch_bits == 32 && server.maxmemory == 0) {
- redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
+ serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
- server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION;
+ server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
}
if (server.cluster_enabled) clusterInit();
replicationScriptCacheInit();
- scriptingInit();
+ scriptingInit(1);
slowlogInit();
latencyMonitorInit();
bioInit();
+ server.initial_memory_usage = zmalloc_used_memory();
}
/* Populates the Redis Command Table starting from the hard coded list
@@ -1890,20 +2179,20 @@ void populateCommandTable(void) {
while(*f != '\0') {
switch(*f) {
- case 'w': c->flags |= REDIS_CMD_WRITE; break;
- case 'r': c->flags |= REDIS_CMD_READONLY; break;
- case 'm': c->flags |= REDIS_CMD_DENYOOM; break;
- case 'a': c->flags |= REDIS_CMD_ADMIN; break;
- case 'p': c->flags |= REDIS_CMD_PUBSUB; break;
- case 's': c->flags |= REDIS_CMD_NOSCRIPT; break;
- case 'R': c->flags |= REDIS_CMD_RANDOM; break;
- case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break;
- case 'l': c->flags |= REDIS_CMD_LOADING; break;
- case 't': c->flags |= REDIS_CMD_STALE; break;
- case 'M': c->flags |= REDIS_CMD_SKIP_MONITOR; break;
- case 'k': c->flags |= REDIS_CMD_ASKING; break;
- case 'F': c->flags |= REDIS_CMD_FAST; break;
- default: redisPanic("Unsupported command flag"); break;
+ case 'w': c->flags |= CMD_WRITE; break;
+ case 'r': c->flags |= CMD_READONLY; break;
+ case 'm': c->flags |= CMD_DENYOOM; break;
+ case 'a': c->flags |= CMD_ADMIN; break;
+ case 'p': c->flags |= CMD_PUBSUB; break;
+ case 's': c->flags |= CMD_NOSCRIPT; break;
+ case 'R': c->flags |= CMD_RANDOM; break;
+ case 'S': c->flags |= CMD_SORT_FOR_SCRIPT; break;
+ case 'l': c->flags |= CMD_LOADING; break;
+ case 't': c->flags |= CMD_STALE; break;
+ case 'M': c->flags |= CMD_SKIP_MONITOR; break;
+ case 'k': c->flags |= CMD_ASKING; break;
+ case 'F': c->flags |= CMD_FAST; break;
+ default: serverPanic("Unsupported command flag"); break;
}
f++;
}
@@ -1912,20 +2201,23 @@ void populateCommandTable(void) {
/* Populate an additional dictionary that will be unaffected
* by rename-command statements in redis.conf. */
retval2 = dictAdd(server.orig_commands, sdsnew(c->name), c);
- redisAssert(retval1 == DICT_OK && retval2 == DICT_OK);
+ serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
}
}
void resetCommandTableStats(void) {
- int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
- int j;
-
- for (j = 0; j < numcommands; j++) {
- struct redisCommand *c = redisCommandTable+j;
+ struct redisCommand *c;
+ dictEntry *de;
+ dictIterator *di;
+ di = dictGetSafeIterator(server.commands);
+ while((de = dictNext(di)) != NULL) {
+ c = (struct redisCommand *) dictGetVal(de);
c->microseconds = 0;
c->calls = 0;
}
+ dictReleaseIterator(di);
+
}
/* ========================== Redis OP Array API ============================ */
@@ -1998,37 +2290,113 @@ struct redisCommand *lookupCommandOrOriginal(sds name) {
* to AOF and Slaves.
*
* flags are an xor between:
- * + REDIS_PROPAGATE_NONE (no propagation of command at all)
- * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled)
- * + REDIS_PROPAGATE_REPL (propagate into the replication link)
+ * + PROPAGATE_NONE (no propagation of command at all)
+ * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
+ * + PROPAGATE_REPL (propagate into the replication link)
+ *
+ * This should not be used inside commands implementation. Use instead
+ * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation().
*/
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
{
- if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF)
+ if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)
feedAppendOnlyFile(cmd,dbid,argv,argc);
- if (flags & REDIS_PROPAGATE_REPL)
+ if (flags & PROPAGATE_REPL)
replicationFeedSlaves(server.slaves,dbid,argv,argc);
}
/* Used inside commands to schedule the propagation of additional commands
- * after the current command is propagated to AOF / Replication. */
+ * after the current command is propagated to AOF / Replication.
+ *
+ * 'cmd' must be a pointer to the Redis command to replicate, dbid is the
+ * database ID the command should be propagated into.
+ * Arguments of the command to propagte are passed as an array of redis
+ * objects pointers of len 'argc', using the 'argv' vector.
+ *
+ * The function does not take a reference to the passed 'argv' vector,
+ * so it is up to the caller to release the passed argv (but it is usually
+ * stack allocated). The function autoamtically increments ref count of
+ * passed objects, so the caller does not need to. */
void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int target)
{
- redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target);
+ robj **argvcopy;
+ int j;
+
+ if (server.loading) return; /* No propagation during loading. */
+
+ argvcopy = zmalloc(sizeof(robj*)*argc);
+ for (j = 0; j < argc; j++) {
+ argvcopy[j] = argv[j];
+ incrRefCount(argv[j]);
+ }
+ redisOpArrayAppend(&server.also_propagate,cmd,dbid,argvcopy,argc,target);
}
/* It is possible to call the function forceCommandPropagation() inside a
* Redis command implementation in order to to force the propagation of a
* specific command execution into AOF / Replication. */
-void forceCommandPropagation(redisClient *c, int flags) {
- if (flags & REDIS_PROPAGATE_REPL) c->flags |= REDIS_FORCE_REPL;
- if (flags & REDIS_PROPAGATE_AOF) c->flags |= REDIS_FORCE_AOF;
+void forceCommandPropagation(client *c, int flags) {
+ if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
+ if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
+}
+
+/* Avoid that the executed command is propagated at all. This way we
+ * are free to just propagate what we want using the alsoPropagate()
+ * API. */
+void preventCommandPropagation(client *c) {
+ c->flags |= CLIENT_PREVENT_PROP;
}
-/* Call() is the core of Redis execution of a command */
-void call(redisClient *c, int flags) {
+/* AOF specific version of preventCommandPropagation(). */
+void preventCommandAOF(client *c) {
+ c->flags |= CLIENT_PREVENT_AOF_PROP;
+}
+
+/* Replication specific version of preventCommandPropagation(). */
+void preventCommandReplication(client *c) {
+ c->flags |= CLIENT_PREVENT_REPL_PROP;
+}
+
+/* Call() is the core of Redis execution of a command.
+ *
+ * The following flags can be passed:
+ * CMD_CALL_NONE No flags.
+ * CMD_CALL_SLOWLOG Check command speed and log in the slow log if needed.
+ * CMD_CALL_STATS Populate command stats.
+ * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE_REPL Send command to salves if it modified the dataset
+ * or if the client flags are forcing propagation.
+ * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
+ * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
+ *
+ * The exact propagation behavior depends on the client flags.
+ * Specifically:
+ *
+ * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
+ * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
+ * in the call flags, then the command is propagated even if the
+ * dataset was not affected by the command.
+ * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
+ * are set, the propagation into AOF or to slaves is not performed even
+ * if the command modified the dataset.
+ *
+ * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
+ * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
+ * slaves propagation will never occur.
+ *
+ * Client flags are modified by the implementation of a given command
+ * using the following API:
+ *
+ * forceCommandPropagation(client *c, int flags);
+ * preventCommandPropagation(client *c);
+ * preventCommandAOF(client *c);
+ * preventCommandReplication(client *c);
+ *
+ */
+void call(client *c, int flags) {
long long dirty, start, duration;
int client_old_flags = c->flags;
@@ -2036,14 +2404,18 @@ void call(redisClient *c, int flags) {
* not generated from reading an AOF. */
if (listLength(server.monitors) &&
!server.loading &&
- !(c->cmd->flags & (REDIS_CMD_SKIP_MONITOR|REDIS_CMD_ADMIN)))
+ !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
{
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
}
- /* Call the command. */
- c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL);
+ /* Initialization: clear the flags that must be set by the command on
+ * demand, and initialize the array for additional commands propagation. */
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+ redisOpArray prev_also_propagate = server.also_propagate;
redisOpArrayInit(&server.also_propagate);
+
+ /* Call the command. */
dirty = server.dirty;
start = ustime();
c->cmd->proc(c);
@@ -2053,61 +2425,91 @@ void call(redisClient *c, int flags) {
/* When EVAL is called loading the AOF we don't want commands called
* from Lua to go into the slowlog or to populate statistics. */
- if (server.loading && c->flags & REDIS_LUA_CLIENT)
- flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS);
+ if (server.loading && c->flags & CLIENT_LUA)
+ flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS);
/* If the caller is Lua, we want to force the EVAL caller to propagate
* the script if the command flag or client flag are forcing the
* propagation. */
- if (c->flags & REDIS_LUA_CLIENT && server.lua_caller) {
- if (c->flags & REDIS_FORCE_REPL)
- server.lua_caller->flags |= REDIS_FORCE_REPL;
- if (c->flags & REDIS_FORCE_AOF)
- server.lua_caller->flags |= REDIS_FORCE_AOF;
+ if (c->flags & CLIENT_LUA && server.lua_caller) {
+ if (c->flags & CLIENT_FORCE_REPL)
+ server.lua_caller->flags |= CLIENT_FORCE_REPL;
+ if (c->flags & CLIENT_FORCE_AOF)
+ server.lua_caller->flags |= CLIENT_FORCE_AOF;
}
/* Log the command into the Slow log if needed, and populate the
* per-command statistics that we show in INFO commandstats. */
- if (flags & REDIS_CALL_SLOWLOG && c->cmd->proc != execCommand) {
- char *latency_event = (c->cmd->flags & REDIS_CMD_FAST) ?
+ if (flags & CMD_CALL_SLOWLOG && c->cmd->proc != execCommand) {
+ char *latency_event = (c->cmd->flags & CMD_FAST) ?
"fast-command" : "command";
latencyAddSampleIfNeeded(latency_event,duration/1000);
- slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
+ slowlogPushEntryIfNeeded(c,c->argv,c->argc,duration);
}
- if (flags & REDIS_CALL_STATS) {
- c->cmd->microseconds += duration;
- c->cmd->calls++;
+ if (flags & CMD_CALL_STATS) {
+ c->lastcmd->microseconds += duration;
+ c->lastcmd->calls++;
}
/* Propagate the command into the AOF and replication link */
- if (flags & REDIS_CALL_PROPAGATE) {
- int flags = REDIS_PROPAGATE_NONE;
-
- if (c->flags & REDIS_FORCE_REPL) flags |= REDIS_PROPAGATE_REPL;
- if (c->flags & REDIS_FORCE_AOF) flags |= REDIS_PROPAGATE_AOF;
- if (dirty)
- flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF);
- if (flags != REDIS_PROPAGATE_NONE)
- propagate(c->cmd,c->db->id,c->argv,c->argc,flags);
+ if (flags & CMD_CALL_PROPAGATE &&
+ (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP)
+ {
+ int propagate_flags = PROPAGATE_NONE;
+
+ /* Check if the command operated changes in the data set. If so
+ * set for replication / AOF propagation. */
+ if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
+
+ /* If the client forced AOF / replication of the command, set
+ * the flags regardless of the command effects on the data set. */
+ if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
+ if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
+
+ /* However prevent AOF / replication propagation if the command
+ * implementations called preventCommandPropagation() or similar,
+ * or if we don't have the call() flags to do so. */
+ if (c->flags & CLIENT_PREVENT_REPL_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_REPL))
+ propagate_flags &= ~PROPAGATE_REPL;
+ if (c->flags & CLIENT_PREVENT_AOF_PROP ||
+ !(flags & CMD_CALL_PROPAGATE_AOF))
+ propagate_flags &= ~PROPAGATE_AOF;
+
+ /* Call propagate() only if at least one of AOF / replication
+ * propagation is needed. Note that modules commands handle replication
+ * in an explicit way, so we never replicate them automatically. */
+ if (propagate_flags != PROPAGATE_NONE && !(c->cmd->flags & CMD_MODULE))
+ propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags);
}
- /* Restore the old FORCE_AOF/REPL flags, since call can be executed
+ /* Restore the old replication flags, since call() can be executed
* recursively. */
- c->flags &= ~(REDIS_FORCE_AOF|REDIS_FORCE_REPL);
- c->flags |= client_old_flags & (REDIS_FORCE_AOF|REDIS_FORCE_REPL);
+ c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
+ c->flags |= client_old_flags &
+ (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
/* Handle the alsoPropagate() API to handle commands that want to propagate
- * multiple separated commands. */
+ * multiple separated commands. Note that alsoPropagate() is not affected
+ * by CLIENT_PREVENT_PROP flag. */
if (server.also_propagate.numops) {
int j;
redisOp *rop;
- for (j = 0; j < server.also_propagate.numops; j++) {
- rop = &server.also_propagate.ops[j];
- propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target);
+ if (flags & CMD_CALL_PROPAGATE) {
+ for (j = 0; j < server.also_propagate.numops; j++) {
+ rop = &server.also_propagate.ops[j];
+ int target = rop->target;
+ /* Whatever the command wish is, we honor the call() flags. */
+ if (!(flags&CMD_CALL_PROPAGATE_AOF)) target &= ~PROPAGATE_AOF;
+ if (!(flags&CMD_CALL_PROPAGATE_REPL)) target &= ~PROPAGATE_REPL;
+ if (target)
+ propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,target);
+ }
}
redisOpArrayFree(&server.also_propagate);
}
+ server.also_propagate = prev_also_propagate;
server.stat_numcommands++;
}
@@ -2116,18 +2518,18 @@ void call(redisClient *c, int flags) {
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
- * If 1 is returned the client is still alive and valid and
+ * If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
- * if 0 is returned the client was destroyed (i.e. after QUIT). */
-int processCommand(redisClient *c) {
+ * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
+int processCommand(client *c) {
/* The QUIT command is handled separately. Normal command procs will
* go through checking for replication and QUIT will cause trouble
* when FORCE_REPLICATION is enabled and would be implemented in
* a regular command proc. */
if (!strcasecmp(c->argv[0]->ptr,"quit")) {
addReply(c,shared.ok);
- c->flags |= REDIS_CLOSE_AFTER_REPLY;
- return REDIS_ERR;
+ c->flags |= CLIENT_CLOSE_AFTER_REPLY;
+ return C_ERR;
}
/* Now lookup the command and check ASAP about trivial error conditions
@@ -2135,15 +2537,20 @@ int processCommand(redisClient *c) {
c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
if (!c->cmd) {
flagTransaction(c);
- addReplyErrorFormat(c,"unknown command '%s'",
- (char*)c->argv[0]->ptr);
- return REDIS_OK;
+ sds args = sdsempty();
+ int i;
+ for (i=1; i < c->argc && sdslen(args) < 128; i++)
+ args = sdscatprintf(args, "`%.*s`, ", 128-(int)sdslen(args), (char*)c->argv[i]->ptr);
+ addReplyErrorFormat(c,"unknown command `%s`, with args beginning with: %s",
+ (char*)c->argv[0]->ptr, args);
+ sdsfree(args);
+ return C_OK;
} else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c);
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
- return REDIS_OK;
+ return C_OK;
}
/* Check if the user is authenticated */
@@ -2151,7 +2558,7 @@ int processCommand(redisClient *c) {
{
flagTransaction(c);
addReply(c,shared.noautherr);
- return REDIS_OK;
+ return C_OK;
}
/* If cluster is enabled perform the cluster redirection here.
@@ -2159,39 +2566,24 @@ int processCommand(redisClient *c) {
* 1) The sender of this command is our master.
* 2) The command has no key arguments. */
if (server.cluster_enabled &&
- !(c->flags & REDIS_MASTER) &&
- !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0))
+ !(c->flags & CLIENT_MASTER) &&
+ !(c->flags & CLIENT_LUA &&
+ server.lua_caller->flags & CLIENT_MASTER) &&
+ !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0 &&
+ c->cmd->proc != execCommand))
{
int hashslot;
-
- if (server.cluster->state != REDIS_CLUSTER_OK) {
- flagTransaction(c);
- addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down. Use CLUSTER INFO for more information\r\n"));
- return REDIS_OK;
- } else {
- int error_code;
- clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&error_code);
- if (n == NULL) {
- flagTransaction(c);
- if (error_code == REDIS_CLUSTER_REDIR_CROSS_SLOT) {
- addReplySds(c,sdsnew("-CROSSSLOT Keys in request don't hash to the same slot\r\n"));
- } else if (error_code == REDIS_CLUSTER_REDIR_UNSTABLE) {
- /* The request spawns mutliple keys in the same slot,
- * but the slot is not "stable" currently as there is
- * a migration or import in progress. */
- addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
- } else {
- redisPanic("getNodeByQuery() unknown error.");
- }
- return REDIS_OK;
- } else if (n != server.cluster->myself) {
+ int error_code;
+ clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
+ &hashslot,&error_code);
+ if (n == NULL || n != server.cluster->myself) {
+ if (c->cmd->proc == execCommand) {
+ discardTransaction(c);
+ } else {
flagTransaction(c);
- addReplySds(c,sdscatprintf(sdsempty(),
- "-%s %d %s:%d\r\n",
- (error_code == REDIS_CLUSTER_REDIR_ASK) ? "ASK" : "MOVED",
- hashslot,n->ip,n->port));
- return REDIS_OK;
}
+ clusterRedirectClient(c,n,hashslot,error_code);
+ return C_OK;
}
}
@@ -2202,32 +2594,36 @@ int processCommand(redisClient *c) {
* is returning an error. */
if (server.maxmemory) {
int retval = freeMemoryIfNeeded();
- if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) {
+ /* freeMemoryIfNeeded may flush slave output buffers. This may result
+ * into a slave, that may be the active client, to be freed. */
+ if (server.current_client == NULL) return C_ERR;
+
+ /* It was impossible to free enough memory, and the command the client
+ * is trying to execute is denied during OOM conditions? Error. */
+ if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
flagTransaction(c);
addReply(c, shared.oomerr);
- return REDIS_OK;
+ return C_OK;
}
}
/* Don't accept write commands if there are problems persisting on disk
* and if this is a master instance. */
- if (((server.stop_writes_on_bgsave_err &&
- server.saveparamslen > 0 &&
- server.lastbgsave_status == REDIS_ERR) ||
- server.aof_last_write_status == REDIS_ERR) &&
+ int deny_write_type = writeCommandsDeniedByDiskError();
+ if (deny_write_type != DISK_ERROR_TYPE_NONE &&
server.masterhost == NULL &&
- (c->cmd->flags & REDIS_CMD_WRITE ||
+ (c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
{
flagTransaction(c);
- if (server.aof_last_write_status == REDIS_OK)
+ if (deny_write_type == DISK_ERROR_TYPE_RDB)
addReply(c, shared.bgsaveerr);
else
addReplySds(c,
sdscatprintf(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
strerror(server.aof_last_write_errno)));
- return REDIS_OK;
+ return C_OK;
}
/* Don't accept write commands if there are not enough good slaves and
@@ -2235,51 +2631,52 @@ int processCommand(redisClient *c) {
if (server.masterhost == NULL &&
server.repl_min_slaves_to_write &&
server.repl_min_slaves_max_lag &&
- c->cmd->flags & REDIS_CMD_WRITE &&
+ c->cmd->flags & CMD_WRITE &&
server.repl_good_slaves_count < server.repl_min_slaves_to_write)
{
flagTransaction(c);
addReply(c, shared.noreplicaserr);
- return REDIS_OK;
+ return C_OK;
}
/* Don't accept write commands if this is a read only slave. But
* accept write commands if this is our master. */
if (server.masterhost && server.repl_slave_ro &&
- !(c->flags & REDIS_MASTER) &&
- c->cmd->flags & REDIS_CMD_WRITE)
+ !(c->flags & CLIENT_MASTER) &&
+ c->cmd->flags & CMD_WRITE)
{
addReply(c, shared.roslaveerr);
- return REDIS_OK;
+ return C_OK;
}
/* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
- if (c->flags & REDIS_PUBSUB &&
+ if (c->flags & CLIENT_PUBSUB &&
c->cmd->proc != pingCommand &&
c->cmd->proc != subscribeCommand &&
c->cmd->proc != unsubscribeCommand &&
c->cmd->proc != psubscribeCommand &&
c->cmd->proc != punsubscribeCommand) {
- addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context");
- return REDIS_OK;
+ addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT allowed in this context");
+ return C_OK;
}
- /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
- * we are a slave with a broken link with master. */
- if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED &&
+ /* Only allow commands with flag "t", such as INFO, SLAVEOF and so on,
+ * when slave-serve-stale-data is no and we are a slave with a broken
+ * link with master. */
+ if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
server.repl_serve_stale_data == 0 &&
- !(c->cmd->flags & REDIS_CMD_STALE))
+ !(c->cmd->flags & CMD_STALE))
{
flagTransaction(c);
addReply(c, shared.masterdownerr);
- return REDIS_OK;
+ return C_OK;
}
/* Loading DB? Return an error if the command has not the
- * REDIS_CMD_LOADING flag. */
- if (server.loading && !(c->cmd->flags & REDIS_CMD_LOADING)) {
+ * CMD_LOADING flag. */
+ if (server.loading && !(c->cmd->flags & CMD_LOADING)) {
addReply(c, shared.loadingerr);
- return REDIS_OK;
+ return C_OK;
}
/* Lua script too slow? Only allow a limited number of commands. */
@@ -2295,23 +2692,23 @@ int processCommand(redisClient *c) {
{
flagTransaction(c);
addReply(c, shared.slowscripterr);
- return REDIS_OK;
+ return C_OK;
}
/* Exec the command */
- if (c->flags & REDIS_MULTI &&
+ if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{
queueMultiCommand(c);
addReply(c,shared.queued);
} else {
- call(c,REDIS_CALL_FULL);
+ call(c,CMD_CALL_FULL);
c->woff = server.master_repl_offset;
if (listLength(server.ready_keys))
- handleClientsBlockedOnLists();
+ handleClientsBlockedOnKeys();
}
- return REDIS_OK;
+ return C_OK;
}
/*================================== Shutdown =============================== */
@@ -2326,68 +2723,111 @@ void closeListeningSockets(int unlink_unix_socket) {
if (server.cluster_enabled)
for (j = 0; j < server.cfd_count; j++) close(server.cfd[j]);
if (unlink_unix_socket && server.unixsocket) {
- redisLog(REDIS_NOTICE,"Removing the unix socket file.");
+ serverLog(LL_NOTICE,"Removing the unix socket file.");
unlink(server.unixsocket); /* don't care if this fails */
}
}
int prepareForShutdown(int flags) {
- int save = flags & REDIS_SHUTDOWN_SAVE;
- int nosave = flags & REDIS_SHUTDOWN_NOSAVE;
+ int save = flags & SHUTDOWN_SAVE;
+ int nosave = flags & SHUTDOWN_NOSAVE;
+
+ serverLog(LL_WARNING,"User requested shutdown...");
+
+ /* Kill all the Lua debugger forked sessions. */
+ ldbKillForkedSessions();
- redisLog(REDIS_WARNING,"User requested shutdown...");
/* Kill the saving child if there is a background saving in progress.
We want to avoid race conditions, for instance our saving child may
overwrite the synchronous saving did by SHUTDOWN. */
if (server.rdb_child_pid != -1) {
- redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!");
+ serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
kill(server.rdb_child_pid,SIGUSR1);
rdbRemoveTempFile(server.rdb_child_pid);
}
- if (server.aof_state != REDIS_AOF_OFF) {
+
+ if (server.aof_state != AOF_OFF) {
/* Kill the AOF saving child as the AOF we already have may be longer
* but contains the full dataset anyway. */
if (server.aof_child_pid != -1) {
/* If we have AOF enabled but haven't written the AOF yet, don't
* shutdown or else the dataset will be lost. */
- if (server.aof_state == REDIS_AOF_WAIT_REWRITE) {
- redisLog(REDIS_WARNING, "Writing initial AOF, can't exit.");
- return REDIS_ERR;
+ if (server.aof_state == AOF_WAIT_REWRITE) {
+ serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
+ return C_ERR;
}
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"There is a child rewriting the AOF. Killing it!");
kill(server.aof_child_pid,SIGUSR1);
}
- /* Append only file: fsync() the AOF and exit */
- redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");
- aof_fsync(server.aof_fd);
+ /* Append only file: flush buffers and fsync() the AOF at exit */
+ serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
+ flushAppendOnlyFile(1);
+ redis_fsync(server.aof_fd);
}
+
+ /* Create a new RDB file before exiting. */
if ((server.saveparamslen > 0 && !nosave) || save) {
- redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting.");
+ serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
/* Snapshotting. Perform a SYNC SAVE and exit */
- if (rdbSave(server.rdb_filename) != REDIS_OK) {
+ rdbSaveInfo rsi, *rsiptr;
+ rsiptr = rdbPopulateSaveInfo(&rsi);
+ if (rdbSave(server.rdb_filename,rsiptr) != C_OK) {
/* Ooops.. error saving! The best we can do is to continue
* operating. Note that if there was a background saving process,
* in the next cron() Redis will be notified that the background
* saving aborted, handling special stuff like slaves pending for
* synchronization... */
- redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit.");
- return REDIS_ERR;
+ serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
+ return C_ERR;
}
}
+
+ /* Remove the pid file if possible and needed. */
if (server.daemonize || server.pidfile) {
- redisLog(REDIS_NOTICE,"Removing the pid file.");
+ serverLog(LL_NOTICE,"Removing the pid file.");
unlink(server.pidfile);
}
+
+ /* Best effort flush of slave output buffers, so that we hopefully
+ * send them pending writes. */
+ flushSlavesOutputBuffers();
+
/* Close the listening sockets. Apparently this allows faster restarts. */
closeListeningSockets(1);
- redisLog(REDIS_WARNING,"%s is now ready to exit, bye bye...",
+ serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
server.sentinel_mode ? "Sentinel" : "Redis");
- return REDIS_OK;
+ return C_OK;
}
/*================================== Commands =============================== */
+/* Sometimes Redis cannot accept write commands because there is a perstence
+ * error with the RDB or AOF file, and Redis is configured in order to stop
+ * accepting writes in such situation. This function returns if such a
+ * condition is active, and the type of the condition.
+ *
+ * Function return values:
+ *
+ * DISK_ERROR_TYPE_NONE: No problems, we can accept writes.
+ * DISK_ERROR_TYPE_AOF: Don't accept writes: AOF errors.
+ * DISK_ERROR_TYPE_RDB: Don't accept writes: RDB errors.
+ */
+int writeCommandsDeniedByDiskError(void) {
+ if (server.stop_writes_on_bgsave_err &&
+ server.saveparamslen > 0 &&
+ server.lastbgsave_status == C_ERR)
+ {
+ return DISK_ERROR_TYPE_RDB;
+ } else if (server.aof_state != AOF_OFF &&
+ server.aof_last_write_status == C_ERR)
+ {
+ return DISK_ERROR_TYPE_AOF;
+ } else {
+ return DISK_ERROR_TYPE_NONE;
+ }
+}
+
/* Return zero if strings are the same, non-zero if they are not.
* The comparison is performed in a way that prevents an attacker to obtain
* information about the nature of the strings just monitoring the execution
@@ -2398,7 +2838,7 @@ int prepareForShutdown(int flags) {
* possible branch misprediction related leak.
*/
int time_independent_strcmp(char *a, char *b) {
- char bufa[REDIS_AUTHPASS_MAX_LEN], bufb[REDIS_AUTHPASS_MAX_LEN];
+ char bufa[CONFIG_AUTHPASS_MAX_LEN], bufb[CONFIG_AUTHPASS_MAX_LEN];
/* The above two strlen perform len(a) + len(b) operations where either
* a or b are fixed (our password) length, and the difference is only
* relative to the length of the user provided string, so no information
@@ -2430,7 +2870,7 @@ int time_independent_strcmp(char *a, char *b) {
return diff; /* If zero strings are the same. */
}
-void authCommand(redisClient *c) {
+void authCommand(client *c) {
if (!server.requirepass) {
addReplyError(c,"Client sent AUTH, but no password is set");
} else if (!time_independent_strcmp(c->argv[1]->ptr, server.requirepass)) {
@@ -2444,7 +2884,7 @@ void authCommand(redisClient *c) {
/* The PING command. It works in a different way if the client is in
* in Pub/Sub mode. */
-void pingCommand(redisClient *c) {
+void pingCommand(client *c) {
/* The command takes zero or one arguments. */
if (c->argc > 2) {
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
@@ -2452,7 +2892,7 @@ void pingCommand(redisClient *c) {
return;
}
- if (c->flags & REDIS_PUBSUB) {
+ if (c->flags & CLIENT_PUBSUB) {
addReply(c,shared.mbulkhdr[2]);
addReplyBulkCBuffer(c,"pong",4);
if (c->argc == 1)
@@ -2467,11 +2907,11 @@ void pingCommand(redisClient *c) {
}
}
-void echoCommand(redisClient *c) {
+void echoCommand(client *c) {
addReplyBulk(c,c->argv[1]);
}
-void timeCommand(redisClient *c) {
+void timeCommand(client *c) {
struct timeval tv;
/* gettimeofday() can only fail if &tv is a bad address so we
@@ -2483,7 +2923,7 @@ void timeCommand(redisClient *c) {
}
/* Helper function for addReplyCommand() to output flags. */
-int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *reply) {
+int addReplyCommandFlag(client *c, struct redisCommand *cmd, int f, char *reply) {
if (cmd->flags & f) {
addReplyStatus(c, reply);
return 1;
@@ -2492,7 +2932,7 @@ int addReplyCommandFlag(redisClient *c, struct redisCommand *cmd, int f, char *r
}
/* Output the representation of a Redis command. Used by the COMMAND command. */
-void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
+void addReplyCommand(client *c, struct redisCommand *cmd) {
if (!cmd) {
addReply(c, shared.nullbulk);
} else {
@@ -2503,20 +2943,22 @@ void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
int flagcount = 0;
void *flaglen = addDeferredMultiBulkLength(c);
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_WRITE, "write");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_READONLY, "readonly");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_DENYOOM, "denyoom");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_ADMIN, "admin");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_PUBSUB, "pubsub");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_NOSCRIPT, "noscript");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_RANDOM, "random");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_SORT_FOR_SCRIPT,"sort_for_script");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_LOADING, "loading");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_STALE, "stale");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_SKIP_MONITOR, "skip_monitor");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_ASKING, "asking");
- flagcount += addReplyCommandFlag(c,cmd,REDIS_CMD_FAST, "fast");
- if (cmd->getkeys_proc) {
+ flagcount += addReplyCommandFlag(c,cmd,CMD_WRITE, "write");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_READONLY, "readonly");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_DENYOOM, "denyoom");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_ADMIN, "admin");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_PUBSUB, "pubsub");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_NOSCRIPT, "noscript");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_RANDOM, "random");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_SORT_FOR_SCRIPT,"sort_for_script");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_LOADING, "loading");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_STALE, "stale");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_SKIP_MONITOR, "skip_monitor");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_ASKING, "asking");
+ flagcount += addReplyCommandFlag(c,cmd,CMD_FAST, "fast");
+ if ((cmd->getkeys_proc && !(cmd->flags & CMD_MODULE)) ||
+ cmd->flags & CMD_MODULE_GETKEYS)
+ {
addReplyStatus(c, "movablekeys");
flagcount += 1;
}
@@ -2529,11 +2971,20 @@ void addReplyCommand(redisClient *c, struct redisCommand *cmd) {
}
/* COMMAND <subcommand> <args> */
-void commandCommand(redisClient *c) {
+void commandCommand(client *c) {
dictIterator *di;
dictEntry *de;
- if (c->argc == 1) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"(no subcommand) -- Return details about all Redis commands.",
+"COUNT -- Return the total number of commands in this Redis server.",
+"GETKEYS <full-command> -- Return the keys from a full Redis command.",
+"INFO [command-name ...] -- Return details about multiple Redis commands.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (c->argc == 1) {
addReplyMultiBulkLen(c, dictSize(server.commands));
di = dictGetIterator(server.commands);
while ((de = dictNext(di)) != NULL) {
@@ -2553,7 +3004,10 @@ void commandCommand(redisClient *c) {
int *keys, numkeys, j;
if (!cmd) {
- addReplyErrorFormat(c,"Invalid command specified");
+ addReplyError(c,"Invalid command specified");
+ return;
+ } else if (cmd->getkeys_proc == NULL && cmd->firstkey == 0) {
+ addReplyError(c,"The command has no key arguments");
return;
} else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
((c->argc-2) < -cmd->arity))
@@ -2563,12 +3017,15 @@ void commandCommand(redisClient *c) {
}
keys = getKeysFromCommand(cmd,c->argv+2,c->argc-2,&numkeys);
- addReplyMultiBulkLen(c,numkeys);
- for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
- getKeysFreeResult(keys);
+ if (!keys) {
+ addReplyError(c,"Invalid arguments specified for command");
+ } else {
+ addReplyMultiBulkLen(c,numkeys);
+ for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
+ getKeysFreeResult(keys);
+ }
} else {
- addReplyError(c, "Unknown subcommand or wrong number of arguments.");
- return;
+ addReplySubcommandSyntaxError(c);
}
}
@@ -2580,7 +3037,6 @@ void bytesToHuman(char *s, unsigned long long n) {
if (n < 1024) {
/* Bytes */
sprintf(s,"%lluB",n);
- return;
} else if (n < (1024*1024)) {
d = (double)n/(1024);
sprintf(s,"%.2fK",d);
@@ -2608,9 +3064,8 @@ void bytesToHuman(char *s, unsigned long long n) {
sds genRedisInfoString(char *section) {
sds info = sdsempty();
time_t uptime = server.unixtime-server.stat_starttime;
- int j, numcommands;
+ int j;
struct rusage self_ru, c_ru;
- unsigned long lol, bib;
int allsections = 0, defsections = 0;
int sections = 0;
@@ -2620,7 +3075,6 @@ sds genRedisInfoString(char *section) {
getrusage(RUSAGE_SELF, &self_ru);
getrusage(RUSAGE_CHILDREN, &c_ru);
- getClientsMaxBuffers(&lol,&bib);
/* Server */
if (allsections || defsections || !strcasecmp(section,"server")) {
@@ -2640,6 +3094,8 @@ sds genRedisInfoString(char *section) {
call_uname = 0;
}
+ unsigned int lruclock;
+ atomicGet(server.lruclock,lruclock);
info = sdscatprintf(info,
"# Server\r\n"
"redis_version:%s\r\n"
@@ -2650,6 +3106,7 @@ sds genRedisInfoString(char *section) {
"os:%s %s %s\r\n"
"arch_bits:%d\r\n"
"multiplexing_api:%s\r\n"
+ "atomicvar_api:%s\r\n"
"gcc_version:%d.%d.%d\r\n"
"process_id:%ld\r\n"
"run_id:%s\r\n"
@@ -2657,7 +3114,9 @@ sds genRedisInfoString(char *section) {
"uptime_in_seconds:%jd\r\n"
"uptime_in_days:%jd\r\n"
"hz:%d\r\n"
+ "configured_hz:%d\r\n"
"lru_clock:%ld\r\n"
+ "executable:%s\r\n"
"config_file:%s\r\n",
REDIS_VERSION,
redisGitSHA1(),
@@ -2667,6 +3126,7 @@ sds genRedisInfoString(char *section) {
name.sysname, name.release, name.machine,
server.arch_bits,
aeGetApiName(),
+ REDIS_ATOMIC_API,
#ifdef __GNUC__
__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
#else
@@ -2678,22 +3138,26 @@ sds genRedisInfoString(char *section) {
(intmax_t)uptime,
(intmax_t)(uptime/(3600*24)),
server.hz,
- (unsigned long) server.lruclock,
+ server.config_hz,
+ (unsigned long) lruclock,
+ server.executable ? server.executable : "",
server.configfile ? server.configfile : "");
}
/* Clients */
if (allsections || defsections || !strcasecmp(section,"clients")) {
+ size_t maxin, maxout;
+ getExpansiveClientsInfo(&maxin,&maxout);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# Clients\r\n"
"connected_clients:%lu\r\n"
- "client_longest_output_list:%lu\r\n"
- "client_biggest_input_buf:%lu\r\n"
+ "client_recent_max_input_buffer:%zu\r\n"
+ "client_recent_max_output_buffer:%zu\r\n"
"blocked_clients:%d\r\n",
listLength(server.clients)-listLength(server.slaves),
- lol, bib,
- server.bpop_blocked_clients);
+ maxin, maxout,
+ server.blocked_clients);
}
/* Memory */
@@ -2701,9 +3165,15 @@ sds genRedisInfoString(char *section) {
char hmem[64];
char peak_hmem[64];
char total_system_hmem[64];
+ char used_memory_lua_hmem[64];
+ char used_memory_scripts_hmem[64];
+ char used_memory_rss_hmem[64];
+ char maxmemory_hmem[64];
size_t zmalloc_used = zmalloc_used_memory();
size_t total_system_mem = server.system_memory_size;
- char *evict_policy = maxmemoryToString();
+ const char *evict_policy = evictPolicyToString();
+ long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024;
+ struct redisMemOverhead *mh = getMemoryOverheadData();
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
@@ -2715,6 +3185,10 @@ sds genRedisInfoString(char *section) {
bytesToHuman(hmem,zmalloc_used);
bytesToHuman(peak_hmem,server.stat_peak_memory);
bytesToHuman(total_system_hmem,total_system_mem);
+ bytesToHuman(used_memory_lua_hmem,memory_lua);
+ bytesToHuman(used_memory_scripts_hmem,server.lua_scripts_mem);
+ bytesToHuman(used_memory_rss_hmem,server.cron_malloc_stats.process_rss);
+ bytesToHuman(maxmemory_hmem,server.maxmemory);
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
@@ -2722,26 +3196,85 @@ sds genRedisInfoString(char *section) {
"used_memory:%zu\r\n"
"used_memory_human:%s\r\n"
"used_memory_rss:%zu\r\n"
+ "used_memory_rss_human:%s\r\n"
"used_memory_peak:%zu\r\n"
"used_memory_peak_human:%s\r\n"
+ "used_memory_peak_perc:%.2f%%\r\n"
+ "used_memory_overhead:%zu\r\n"
+ "used_memory_startup:%zu\r\n"
+ "used_memory_dataset:%zu\r\n"
+ "used_memory_dataset_perc:%.2f%%\r\n"
+ "allocator_allocated:%zu\r\n"
+ "allocator_active:%zu\r\n"
+ "allocator_resident:%zu\r\n"
"total_system_memory:%lu\r\n"
"total_system_memory_human:%s\r\n"
"used_memory_lua:%lld\r\n"
+ "used_memory_lua_human:%s\r\n"
+ "used_memory_scripts:%lld\r\n"
+ "used_memory_scripts_human:%s\r\n"
+ "number_of_cached_scripts:%lu\r\n"
+ "maxmemory:%lld\r\n"
+ "maxmemory_human:%s\r\n"
+ "maxmemory_policy:%s\r\n"
+ "allocator_frag_ratio:%.2f\r\n"
+ "allocator_frag_bytes:%zu\r\n"
+ "allocator_rss_ratio:%.2f\r\n"
+ "allocator_rss_bytes:%zu\r\n"
+ "rss_overhead_ratio:%.2f\r\n"
+ "rss_overhead_bytes:%zu\r\n"
"mem_fragmentation_ratio:%.2f\r\n"
+ "mem_fragmentation_bytes:%zu\r\n"
+ "mem_not_counted_for_evict:%zu\r\n"
+ "mem_replication_backlog:%zu\r\n"
+ "mem_clients_slaves:%zu\r\n"
+ "mem_clients_normal:%zu\r\n"
+ "mem_aof_buffer:%zu\r\n"
"mem_allocator:%s\r\n"
- "maxmemory_policy:%s\r\n",
+ "active_defrag_running:%d\r\n"
+ "lazyfree_pending_objects:%zu\r\n",
zmalloc_used,
hmem,
- server.resident_set_size,
+ server.cron_malloc_stats.process_rss,
+ used_memory_rss_hmem,
server.stat_peak_memory,
peak_hmem,
+ mh->peak_perc,
+ mh->overhead_total,
+ mh->startup_allocated,
+ mh->dataset,
+ mh->dataset_perc,
+ server.cron_malloc_stats.allocator_allocated,
+ server.cron_malloc_stats.allocator_active,
+ server.cron_malloc_stats.allocator_resident,
(unsigned long)total_system_mem,
total_system_hmem,
- ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL,
- zmalloc_get_fragmentation_ratio(server.resident_set_size),
+ memory_lua,
+ used_memory_lua_hmem,
+ server.lua_scripts_mem,
+ used_memory_scripts_hmem,
+ dictSize(server.lua_scripts),
+ server.maxmemory,
+ maxmemory_hmem,
+ evict_policy,
+ mh->allocator_frag,
+ mh->allocator_frag_bytes,
+ mh->allocator_rss,
+ mh->allocator_rss_bytes,
+ mh->rss_extra,
+ mh->rss_extra_bytes,
+ mh->total_frag, /* this is the total RSS overhead, including fragmentation, */
+ mh->total_frag_bytes, /* named so for backwards compatibility */
+ freeMemoryGetNotCountedMemory(),
+ mh->repl_backlog,
+ mh->clients_slaves,
+ mh->clients_normal,
+ mh->aof_buffer,
ZMALLOC_LIB,
- evict_policy
- );
+ server.active_defrag_running,
+ lazyfreeGetPendingObjectsCount()
+ );
+ freeMemoryOverheadData(mh);
}
/* Persistence */
@@ -2756,31 +3289,35 @@ sds genRedisInfoString(char *section) {
"rdb_last_bgsave_status:%s\r\n"
"rdb_last_bgsave_time_sec:%jd\r\n"
"rdb_current_bgsave_time_sec:%jd\r\n"
+ "rdb_last_cow_size:%zu\r\n"
"aof_enabled:%d\r\n"
"aof_rewrite_in_progress:%d\r\n"
"aof_rewrite_scheduled:%d\r\n"
"aof_last_rewrite_time_sec:%jd\r\n"
"aof_current_rewrite_time_sec:%jd\r\n"
"aof_last_bgrewrite_status:%s\r\n"
- "aof_last_write_status:%s\r\n",
+ "aof_last_write_status:%s\r\n"
+ "aof_last_cow_size:%zu\r\n",
server.loading,
server.dirty,
server.rdb_child_pid != -1,
(intmax_t)server.lastsave,
- (server.lastbgsave_status == REDIS_OK) ? "ok" : "err",
+ (server.lastbgsave_status == C_OK) ? "ok" : "err",
(intmax_t)server.rdb_save_time_last,
(intmax_t)((server.rdb_child_pid == -1) ?
-1 : time(NULL)-server.rdb_save_time_start),
- server.aof_state != REDIS_AOF_OFF,
+ server.stat_rdb_cow_bytes,
+ server.aof_state != AOF_OFF,
server.aof_child_pid != -1,
server.aof_rewrite_scheduled,
(intmax_t)server.aof_rewrite_time_last,
(intmax_t)((server.aof_child_pid == -1) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
- (server.aof_lastbgrewrite_status == REDIS_OK) ? "ok" : "err",
- (server.aof_last_write_status == REDIS_OK) ? "ok" : "err");
+ (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
+ (server.aof_last_write_status == C_OK) ? "ok" : "err",
+ server.stat_aof_cow_bytes);
- if (server.aof_state != REDIS_AOF_OFF) {
+ if (server.aof_state != AOF_OFF) {
info = sdscatprintf(info,
"aof_current_size:%lld\r\n"
"aof_base_size:%lld\r\n"
@@ -2794,7 +3331,7 @@ sds genRedisInfoString(char *section) {
server.aof_rewrite_scheduled,
sdslen(server.aof_buf),
aofRewriteBufferSize(),
- bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC),
+ bioPendingJobsOfType(BIO_AOF_FSYNC),
server.aof_delayed_fsync);
}
@@ -2847,32 +3384,46 @@ sds genRedisInfoString(char *section) {
"sync_partial_ok:%lld\r\n"
"sync_partial_err:%lld\r\n"
"expired_keys:%lld\r\n"
+ "expired_stale_perc:%.2f\r\n"
+ "expired_time_cap_reached_count:%lld\r\n"
"evicted_keys:%lld\r\n"
"keyspace_hits:%lld\r\n"
"keyspace_misses:%lld\r\n"
"pubsub_channels:%ld\r\n"
"pubsub_patterns:%lu\r\n"
"latest_fork_usec:%lld\r\n"
- "migrate_cached_sockets:%ld\r\n",
+ "migrate_cached_sockets:%ld\r\n"
+ "slave_expires_tracked_keys:%zu\r\n"
+ "active_defrag_hits:%lld\r\n"
+ "active_defrag_misses:%lld\r\n"
+ "active_defrag_key_hits:%lld\r\n"
+ "active_defrag_key_misses:%lld\r\n",
server.stat_numconnections,
server.stat_numcommands,
- getInstantaneousMetric(REDIS_METRIC_COMMAND),
+ getInstantaneousMetric(STATS_METRIC_COMMAND),
server.stat_net_input_bytes,
server.stat_net_output_bytes,
- (float)getInstantaneousMetric(REDIS_METRIC_NET_INPUT)/1024,
- (float)getInstantaneousMetric(REDIS_METRIC_NET_OUTPUT)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
+ (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
server.stat_rejected_conn,
server.stat_sync_full,
server.stat_sync_partial_ok,
server.stat_sync_partial_err,
server.stat_expiredkeys,
+ server.stat_expired_stale_perc*100,
+ server.stat_expired_time_cap_reached_count,
server.stat_evictedkeys,
server.stat_keyspace_hits,
server.stat_keyspace_misses,
dictSize(server.pubsub_channels),
listLength(server.pubsub_patterns),
server.stat_fork_time,
- dictSize(server.migrate_cached_sockets));
+ dictSize(server.migrate_cached_sockets),
+ getSlaveKeyWithExpireCount(),
+ server.stat_active_defrag_hits,
+ server.stat_active_defrag_misses,
+ server.stat_active_defrag_key_hits,
+ server.stat_active_defrag_key_misses);
}
/* Replication */
@@ -2899,15 +3450,15 @@ sds genRedisInfoString(char *section) {
"slave_repl_offset:%lld\r\n"
,server.masterhost,
server.masterport,
- (server.repl_state == REDIS_REPL_CONNECTED) ?
+ (server.repl_state == REPL_STATE_CONNECTED) ?
"up" : "down",
server.master ?
((int)(server.unixtime-server.master->lastinteraction)) : -1,
- server.repl_state == REDIS_REPL_TRANSFER,
+ server.repl_state == REPL_STATE_TRANSFER,
slave_repl_offset
);
- if (server.repl_state == REDIS_REPL_TRANSFER) {
+ if (server.repl_state == REPL_STATE_TRANSFER) {
info = sdscatprintf(info,
"master_sync_left_bytes:%lld\r\n"
"master_sync_last_io_seconds_ago:%d\r\n"
@@ -2917,7 +3468,7 @@ sds genRedisInfoString(char *section) {
);
}
- if (server.repl_state != REDIS_REPL_CONNECTED) {
+ if (server.repl_state != REPL_STATE_CONNECTED) {
info = sdscatprintf(info,
"master_link_down_since_seconds:%jd\r\n",
(intmax_t)server.unixtime-server.repl_down_since);
@@ -2949,44 +3500,54 @@ sds genRedisInfoString(char *section) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
+ client *slave = listNodeValue(ln);
char *state = NULL;
- char ip[REDIS_IP_STR_LEN];
+ char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
int port;
long lag = 0;
- if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1) continue;
+ if (slaveip[0] == '\0') {
+ if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1)
+ continue;
+ slaveip = ip;
+ }
switch(slave->replstate) {
- case REDIS_REPL_WAIT_BGSAVE_START:
- case REDIS_REPL_WAIT_BGSAVE_END:
+ case SLAVE_STATE_WAIT_BGSAVE_START:
+ case SLAVE_STATE_WAIT_BGSAVE_END:
state = "wait_bgsave";
break;
- case REDIS_REPL_SEND_BULK:
+ case SLAVE_STATE_SEND_BULK:
state = "send_bulk";
break;
- case REDIS_REPL_ONLINE:
+ case SLAVE_STATE_ONLINE:
state = "online";
break;
}
if (state == NULL) continue;
- if (slave->replstate == REDIS_REPL_ONLINE)
+ if (slave->replstate == SLAVE_STATE_ONLINE)
lag = time(NULL) - slave->repl_ack_time;
info = sdscatprintf(info,
"slave%d:ip=%s,port=%d,state=%s,"
"offset=%lld,lag=%ld\r\n",
- slaveid,ip,slave->slave_listening_port,state,
+ slaveid,slaveip,slave->slave_listening_port,state,
slave->repl_ack_off, lag);
slaveid++;
}
}
info = sdscatprintf(info,
+ "master_replid:%s\r\n"
+ "master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
+ "second_repl_offset:%lld\r\n"
"repl_backlog_active:%d\r\n"
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
+ server.replid,
+ server.replid2,
server.master_repl_offset,
+ server.second_replid_offset,
server.repl_backlog != NULL,
server.repl_backlog_size,
server.repl_backlog_off,
@@ -2998,30 +3559,34 @@ sds genRedisInfoString(char *section) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# CPU\r\n"
- "used_cpu_sys:%.2f\r\n"
- "used_cpu_user:%.2f\r\n"
- "used_cpu_sys_children:%.2f\r\n"
- "used_cpu_user_children:%.2f\r\n",
- (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
- (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
- (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
- (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
+ "used_cpu_sys:%ld.%06ld\r\n"
+ "used_cpu_user:%ld.%06ld\r\n"
+ "used_cpu_sys_children:%ld.%06ld\r\n"
+ "used_cpu_user_children:%ld.%06ld\r\n",
+ (long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec,
+ (long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec,
+ (long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec,
+ (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec);
}
- /* cmdtime */
+ /* Command statistics */
if (allsections || !strcasecmp(section,"commandstats")) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Commandstats\r\n");
- numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
- for (j = 0; j < numcommands; j++) {
- struct redisCommand *c = redisCommandTable+j;
+ struct redisCommand *c;
+ dictEntry *de;
+ dictIterator *di;
+ di = dictGetSafeIterator(server.commands);
+ while((de = dictNext(di)) != NULL) {
+ c = (struct redisCommand *) dictGetVal(de);
if (!c->calls) continue;
info = sdscatprintf(info,
"cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
c->name, c->calls, c->microseconds,
(c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
}
+ dictReleaseIterator(di);
}
/* Cluster */
@@ -3052,7 +3617,7 @@ sds genRedisInfoString(char *section) {
return info;
}
-void infoCommand(redisClient *c) {
+void infoCommand(client *c) {
char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
if (c->argc > 2) {
@@ -3062,310 +3627,15 @@ void infoCommand(redisClient *c) {
addReplyBulkSds(c, genRedisInfoString(section));
}
-void monitorCommand(redisClient *c) {
+void monitorCommand(client *c) {
/* ignore MONITOR if already slave or in monitor mode */
- if (c->flags & REDIS_SLAVE) return;
+ if (c->flags & CLIENT_SLAVE) return;
- c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
+ c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
listAddNodeTail(server.monitors,c);
addReply(c,shared.ok);
}
-/* ============================ Maxmemory directive ======================== */
-
-/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
- * file to limit the max memory used by the server, before processing a
- * command.
- *
- * The goal of the function is to free enough memory to keep Redis under the
- * configured memory limit.
- *
- * The function starts calculating how many bytes should be freed to keep
- * Redis under the limit, and enters a loop selecting the best keys to
- * evict accordingly to the configured policy.
- *
- * If all the bytes needed to return back under the limit were freed the
- * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller
- * should block the execution of commands that will result in more memory
- * used by the server.
- *
- * ------------------------------------------------------------------------
- *
- * LRU approximation algorithm
- *
- * Redis uses an approximation of the LRU algorithm that runs in constant
- * memory. Every time there is a key to expire, we sample N keys (with
- * N very small, usually in around 5) to populate a pool of best keys to
- * evict of M keys (the pool size is defined by REDIS_EVICTION_POOL_SIZE).
- *
- * The N keys sampled are added in the pool of good keys to expire (the one
- * with an old access time) if they are better than one of the current keys
- * in the pool.
- *
- * After the pool is populated, the best key we have in the pool is expired.
- * However note that we don't remove keys from the pool when they are deleted
- * so the pool may contain keys that no longer exist.
- *
- * When we try to evict a key, and all the entries in the pool don't exist
- * we populate it again. This time we'll be sure that the pool has at least
- * one key that can be evicted, if there is at least one key that can be
- * evicted in the whole database. */
-
-/* Create a new eviction pool. */
-struct evictionPoolEntry *evictionPoolAlloc(void) {
- struct evictionPoolEntry *ep;
- int j;
-
- ep = zmalloc(sizeof(*ep)*REDIS_EVICTION_POOL_SIZE);
- for (j = 0; j < REDIS_EVICTION_POOL_SIZE; j++) {
- ep[j].idle = 0;
- ep[j].key = NULL;
- }
- return ep;
-}
-
-/* This is an helper function for freeMemoryIfNeeded(), it is used in order
- * to populate the evictionPool with a few entries every time we want to
- * expire a key. Keys with idle time smaller than one of the current
- * keys are added. Keys are always added if there are free entries.
- *
- * We insert keys on place in ascending order, so keys with the smaller
- * idle time are on the left, and keys with the higher idle time on the
- * right. */
-
-#define EVICTION_SAMPLES_ARRAY_SIZE 16
-void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
- int j, k, count;
- dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
- dictEntry **samples;
-
- /* Try to use a static buffer: this function is a big hit...
- * Note: it was actually measured that this helps. */
- if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
- samples = _samples;
- } else {
- samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
- }
-
-#if 1 /* Use bulk get by default. */
- count = dictGetRandomKeys(sampledict,samples,server.maxmemory_samples);
-#else
- count = server.maxmemory_samples;
- for (j = 0; j < count; j++) samples[j] = dictGetRandomKey(sampledict);
-#endif
-
- for (j = 0; j < count; j++) {
- unsigned long long idle;
- sds key;
- robj *o;
- dictEntry *de;
-
- de = samples[j];
- key = dictGetKey(de);
- /* If the dictionary we are sampling from is not the main
- * dictionary (but the expires one) we need to lookup the key
- * again in the key dictionary to obtain the value object. */
- if (sampledict != keydict) de = dictFind(keydict, key);
- o = dictGetVal(de);
- idle = estimateObjectIdleTime(o);
-
- /* Insert the element inside the pool.
- * First, find the first empty bucket or the first populated
- * bucket that has an idle time smaller than our idle time. */
- k = 0;
- while (k < REDIS_EVICTION_POOL_SIZE &&
- pool[k].key &&
- pool[k].idle < idle) k++;
- if (k == 0 && pool[REDIS_EVICTION_POOL_SIZE-1].key != NULL) {
- /* Can't insert if the element is < the worst element we have
- * and there are no empty buckets. */
- continue;
- } else if (k < REDIS_EVICTION_POOL_SIZE && pool[k].key == NULL) {
- /* Inserting into empty position. No setup needed before insert. */
- } else {
- /* Inserting in the middle. Now k points to the first element
- * greater than the element to insert. */
- if (pool[REDIS_EVICTION_POOL_SIZE-1].key == NULL) {
- /* Free space on the right? Insert at k shifting
- * all the elements from k to end to the right. */
- memmove(pool+k+1,pool+k,
- sizeof(pool[0])*(REDIS_EVICTION_POOL_SIZE-k-1));
- } else {
- /* No free space on right? Insert at k-1 */
- k--;
- /* Shift all elements on the left of k (included) to the
- * left, so we discard the element with smaller idle time. */
- sdsfree(pool[0].key);
- memmove(pool,pool+1,sizeof(pool[0])*k);
- }
- }
- pool[k].key = sdsdup(key);
- pool[k].idle = idle;
- }
- if (samples != _samples) zfree(samples);
-}
-
-int freeMemoryIfNeeded(void) {
- size_t mem_used, mem_tofree, mem_freed;
- int slaves = listLength(server.slaves);
- mstime_t latency;
-
- /* Remove the size of slaves output buffers and AOF buffer from the
- * count of used memory. */
- mem_used = zmalloc_used_memory();
- if (slaves) {
- listIter li;
- listNode *ln;
-
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- redisClient *slave = listNodeValue(ln);
- unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
- if (obuf_bytes > mem_used)
- mem_used = 0;
- else
- mem_used -= obuf_bytes;
- }
- }
- if (server.aof_state != REDIS_AOF_OFF) {
- mem_used -= sdslen(server.aof_buf);
- mem_used -= aofRewriteBufferSize();
- }
-
- /* Check if we are over the memory limit. */
- if (mem_used <= server.maxmemory) return REDIS_OK;
-
- if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION)
- return REDIS_ERR; /* We need to free memory, but policy forbids. */
-
- /* Compute how much memory we need to free. */
- mem_tofree = mem_used - server.maxmemory;
- mem_freed = 0;
- latencyStartMonitor(latency);
- while (mem_freed < mem_tofree) {
- int j, k, keys_freed = 0;
-
- for (j = 0; j < server.dbnum; j++) {
- long bestval = 0; /* just to prevent warning */
- sds bestkey = NULL;
- dictEntry *de;
- redisDb *db = server.db+j;
- dict *dict;
-
- if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
- server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM)
- {
- dict = server.db[j].dict;
- } else {
- dict = server.db[j].expires;
- }
- if (dictSize(dict) == 0) continue;
-
- /* volatile-random and allkeys-random policy */
- if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM ||
- server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM)
- {
- de = dictGetRandomKey(dict);
- bestkey = dictGetKey(de);
- }
-
- /* volatile-lru and allkeys-lru policy */
- else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU ||
- server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU)
- {
- struct evictionPoolEntry *pool = db->eviction_pool;
-
- while(bestkey == NULL) {
- evictionPoolPopulate(dict, db->dict, db->eviction_pool);
- /* Go backward from best to worst element to evict. */
- for (k = REDIS_EVICTION_POOL_SIZE-1; k >= 0; k--) {
- if (pool[k].key == NULL) continue;
- de = dictFind(dict,pool[k].key);
-
- /* Remove the entry from the pool. */
- sdsfree(pool[k].key);
- /* Shift all elements on its right to left. */
- memmove(pool+k,pool+k+1,
- sizeof(pool[0])*(REDIS_EVICTION_POOL_SIZE-k-1));
- /* Clear the element on the right which is empty
- * since we shifted one position to the left. */
- pool[REDIS_EVICTION_POOL_SIZE-1].key = NULL;
- pool[REDIS_EVICTION_POOL_SIZE-1].idle = 0;
-
- /* If the key exists, is our pick. Otherwise it is
- * a ghost and we need to try the next element. */
- if (de) {
- bestkey = dictGetKey(de);
- break;
- } else {
- /* Ghost... */
- continue;
- }
- }
- }
- }
-
- /* volatile-ttl */
- else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) {
- for (k = 0; k < server.maxmemory_samples; k++) {
- sds thiskey;
- long thisval;
-
- de = dictGetRandomKey(dict);
- thiskey = dictGetKey(de);
- thisval = (long) dictGetVal(de);
-
- /* Expire sooner (minor expire unix timestamp) is better
- * candidate for deletion */
- if (bestkey == NULL || thisval < bestval) {
- bestkey = thiskey;
- bestval = thisval;
- }
- }
- }
-
- /* Finally remove the selected key. */
- if (bestkey) {
- long long delta;
-
- robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
- propagateExpire(db,keyobj);
- /* We compute the amount of memory freed by dbDelete() alone.
- * It is possible that actually the memory needed to propagate
- * the DEL in AOF and replication link is greater than the one
- * we are freeing removing the key, but we can't account for
- * that otherwise we would never exit the loop.
- *
- * AOF and Output buffer memory will be freed eventually so
- * we only care about memory used by the key space. */
- delta = (long long) zmalloc_used_memory();
- dbDelete(db,keyobj);
- delta -= (long long) zmalloc_used_memory();
- mem_freed += delta;
- server.stat_evictedkeys++;
- notifyKeyspaceEvent(REDIS_NOTIFY_EVICTED, "evicted",
- keyobj, db->id);
- decrRefCount(keyobj);
- keys_freed++;
-
- /* When the memory to free starts to be big enough, we may
- * start spending so much time here that is impossible to
- * deliver data to the slaves fast enough, so we force the
- * transmission here inside the loop. */
- if (slaves) flushSlavesOutputBuffers();
- }
- }
- if (!keys_freed) {
- latencyEndMonitor(latency);
- latencyAddSampleIfNeeded("eviction-cycle",latency);
- return REDIS_ERR; /* nothing to free... */
- }
- }
- latencyEndMonitor(latency);
- latencyAddSampleIfNeeded("eviction-cycle",latency);
- return REDIS_OK;
-}
-
/* =================================== Main! ================================ */
#ifdef __linux__
@@ -3385,10 +3655,10 @@ int linuxOvercommitMemoryValue(void) {
void linuxMemoryWarnings(void) {
if (linuxOvercommitMemoryValue() == 0) {
- redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
+ serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
}
if (THPIsEnabled()) {
- redisLog(REDIS_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled.");
+ serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled.");
}
}
#endif /* __linux__ */
@@ -3396,7 +3666,7 @@ void linuxMemoryWarnings(void) {
void createPidFile(void) {
/* If pidfile requested, but no pidfile defined, use
* default pidfile path */
- if (!server.pidfile) server.pidfile = zstrdup(REDIS_DEFAULT_PID_FILE);
+ if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
/* Try to write the pid file in a best-effort way. */
FILE *fp = fopen(server.pidfile,"w");
@@ -3460,15 +3730,18 @@ void redisAsciiArt(void) {
else if (server.sentinel_mode) mode = "sentinel";
else mode = "standalone";
- if (server.syslog_enabled) {
- redisLog(REDIS_NOTICE,
- "Redis %s (%s/%d) %s bit, %s mode, port %d, pid %ld ready to start.",
- REDIS_VERSION,
- redisGitSHA1(),
- strtol(redisGitDirty(),NULL,10) > 0,
- (sizeof(long) == 8) ? "64" : "32",
- mode, server.port,
- (long) getpid()
+ /* Show the ASCII logo if: log file is stdout AND stdout is a
+ * tty AND syslog logging is disabled. Also show logo if the user
+ * forced us to do so via redis.conf. */
+ int show_logo = ((!server.syslog_enabled &&
+ server.logfile[0] == '\0' &&
+ isatty(fileno(stdout))) ||
+ server.always_show_logo);
+
+ if (!show_logo) {
+ serverLog(LL_NOTICE,
+ "Running mode=%s, port=%d.",
+ mode, server.port
);
} else {
snprintf(buf,1024*16,ascii_logo,
@@ -3479,7 +3752,7 @@ void redisAsciiArt(void) {
mode, server.port,
(long) getpid()
);
- redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf);
+ serverLogRaw(LL_NOTICE|LL_RAW,buf);
}
zfree(buf);
}
@@ -3503,14 +3776,14 @@ static void sigShutdownHandler(int sig) {
* the user really wanting to quit ASAP without waiting to persist
* on disk. */
if (server.shutdown_asap && sig == SIGINT) {
- redisLogFromHandler(REDIS_WARNING, "You insist... exiting now.");
+ serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
rdbRemoveTempFile(getpid());
exit(1); /* Exit with an error since this was not a clean shutdown. */
} else if (server.loading) {
exit(0);
}
- redisLogFromHandler(REDIS_WARNING, msg);
+ serverLogFromHandler(LL_WARNING, msg);
server.shutdown_asap = 1;
}
@@ -3540,7 +3813,7 @@ void setupSignalHandlers(void) {
void memtest(size_t megabytes, int passes);
/* Returns 1 if there is --sentinel among the arguments or if
- * argv[0] is exactly "redis-sentinel". */
+ * argv[0] contains "redis-sentinel". */
int checkForSentinelMode(int argc, char **argv) {
int j;
@@ -3553,24 +3826,43 @@ int checkForSentinelMode(int argc, char **argv) {
/* Function called at startup to load RDB or AOF file in memory. */
void loadDataFromDisk(void) {
long long start = ustime();
- if (server.aof_state == REDIS_AOF_ON) {
- if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK)
- redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
+ if (server.aof_state == AOF_ON) {
+ if (loadAppendOnlyFile(server.aof_filename) == C_OK)
+ serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
} else {
- if (rdbLoad(server.rdb_filename) == REDIS_OK) {
- redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds",
+ rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+ if (rdbLoad(server.rdb_filename,&rsi) == C_OK) {
+ serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
(float)(ustime()-start)/1000000);
+
+ /* Restore the replication ID / offset from the RDB file. */
+ if (server.masterhost &&
+ rsi.repl_id_is_set &&
+ rsi.repl_offset != -1 &&
+ /* Note that older implementations may save a repl_stream_db
+ * of -1 inside the RDB file in a wrong way, see more information
+ * in function rdbPopulateSaveInfo. */
+ rsi.repl_stream_db != -1)
+ {
+ memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
+ server.master_repl_offset = rsi.repl_offset;
+ /* If we are a slave, create a cached master from this
+ * information, in order to allow partial resynchronizations
+ * with masters. */
+ replicationCacheMasterUsingMyself();
+ selectDb(server.cached_master,rsi.repl_stream_db);
+ }
} else if (errno != ENOENT) {
- redisLog(REDIS_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
+ serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
exit(1);
}
}
}
void redisOutOfMemoryHandler(size_t allocation_size) {
- redisLog(REDIS_WARNING,"Out Of Memory allocating %zu bytes!",
+ serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
allocation_size);
- redisPanic("Redis aborting for OUT OF MEMORY");
+ serverPanic("Redis aborting for OUT OF MEMORY");
}
void redisSetProcTitle(char *title) {
@@ -3585,7 +3877,7 @@ void redisSetProcTitle(char *title) {
server.port,
server_mode);
#else
- REDIS_NOTUSED(title);
+ UNUSED(title);
#endif
}
@@ -3597,12 +3889,12 @@ int redisSupervisedUpstart(void) {
const char *upstart_job = getenv("UPSTART_JOB");
if (!upstart_job) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"upstart supervision requested, but UPSTART_JOB not found");
return 0;
}
- redisLog(REDIS_NOTICE, "supervised by upstart, will stop to signal readyness");
+ serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness");
raise(SIGSTOP);
unsetenv("UPSTART_JOB");
return 1;
@@ -3617,7 +3909,7 @@ int redisSupervisedSystemd(void) {
int sendto_flags = 0;
if (!notify_socket) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"systemd supervision requested, but NOTIFY_SOCKET not found");
return 0;
}
@@ -3626,9 +3918,9 @@ int redisSupervisedSystemd(void) {
return 0;
}
- redisLog(REDIS_NOTICE, "supervised by systemd, will signal readyness");
+ serverLog(LL_NOTICE, "supervised by systemd, will signal readiness");
if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Can't connect to systemd socket %s", notify_socket);
return 0;
}
@@ -3657,7 +3949,7 @@ int redisSupervisedSystemd(void) {
sendto_flags |= MSG_NOSIGNAL;
#endif
if (sendmsg(fd, &hdr, sendto_flags) < 0) {
- redisLog(REDIS_WARNING, "Can't send notification to systemd");
+ serverLog(LL_WARNING, "Can't send notification to systemd");
close(fd);
return 0;
}
@@ -3666,7 +3958,7 @@ int redisSupervisedSystemd(void) {
}
int redisIsSupervised(int mode) {
- if (mode == REDIS_SUPERVISED_AUTODETECT) {
+ if (mode == SUPERVISED_AUTODETECT) {
const char *upstart_job = getenv("UPSTART_JOB");
const char *notify_socket = getenv("NOTIFY_SOCKET");
@@ -3675,9 +3967,9 @@ int redisIsSupervised(int mode) {
} else if (notify_socket) {
redisSupervisedSystemd();
}
- } else if (mode == REDIS_SUPERVISED_UPSTART) {
+ } else if (mode == SUPERVISED_UPSTART) {
return redisSupervisedUpstart();
- } else if (mode == REDIS_SUPERVISED_SYSTEMD) {
+ } else if (mode == SUPERVISED_SYSTEMD) {
return redisSupervisedSystemd();
}
@@ -3687,6 +3979,7 @@ int redisIsSupervised(int mode) {
int main(int argc, char **argv) {
struct timeval tv;
+ int j;
#ifdef REDIS_TEST
if (argc == 3 && !strcasecmp(argv[1], "test")) {
@@ -3719,13 +4012,24 @@ int main(int argc, char **argv) {
spt_init(argc, argv);
#endif
setlocale(LC_COLLATE,"");
- zmalloc_enable_thread_safeness();
+ tzset(); /* Populates 'timezone' global. */
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
- dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
+
+ char hashseed[16];
+ getRandomHexChars(hashseed,sizeof(hashseed));
+ dictSetHashFunctionSeed((uint8_t*)hashseed);
server.sentinel_mode = checkForSentinelMode(argc,argv);
initServerConfig();
+ moduleInitModulesSystem();
+
+ /* Store the executable path and arguments in a safe place in order
+ * to be able to restart the server later. */
+ server.executable = getAbsolutePath(argv[0]);
+ server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
+ server.exec_argv[argc] = NULL;
+ for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
/* We need to init sentinel right now as parsing the configuration file
* in sentinel mode will have the effect of populating the sentinel
@@ -3735,8 +4039,16 @@ int main(int argc, char **argv) {
initSentinel();
}
+ /* Check if we need to start in redis-check-rdb/aof mode. We just execute
+ * the program main. However the program is part of the Redis executable
+ * so that we can easily execute an RDB check on loading errors. */
+ if (strstr(argv[0],"redis-check-rdb") != NULL)
+ redis_check_rdb_main(argc,argv,NULL);
+ else if (strstr(argv[0],"redis-check-aof") != NULL)
+ redis_check_aof_main(argc,argv);
+
if (argc >= 2) {
- int j = 1; /* First option to parse in argv[] */
+ j = 1; /* First option to parse in argv[] */
sds options = sdsempty();
char *configfile = NULL;
@@ -3757,8 +4069,16 @@ int main(int argc, char **argv) {
}
/* First argument is the config file name? */
- if (argv[j][0] != '-' || argv[j][1] != '-')
- configfile = argv[j++];
+ if (argv[j][0] != '-' || argv[j][1] != '-') {
+ configfile = argv[j];
+ server.configfile = getAbsolutePath(configfile);
+ /* Replace the config file in server.exec_argv with
+ * its absolute path. */
+ zfree(server.exec_argv[j]);
+ server.exec_argv[j] = zstrdup(server.configfile);
+ j++;
+ }
+
/* All the other options are parsed and conceptually appended to the
* configuration file. For instance --port 6380 will generate the
* string "port 6380\n" to be parsed after the actual file name
@@ -3766,6 +4086,11 @@ int main(int argc, char **argv) {
while(j != argc) {
if (argv[j][0] == '-' && argv[j][1] == '-') {
/* Option name */
+ if (!strcmp(argv[j], "--check-rdb")) {
+ /* Argument has no options, need to skip for parsing. */
+ j++;
+ continue;
+ }
if (sdslen(options)) options = sdscat(options,"\n");
options = sdscat(options,argv[j]+2);
options = sdscat(options," ");
@@ -3777,58 +4102,73 @@ int main(int argc, char **argv) {
j++;
}
if (server.sentinel_mode && configfile && *configfile == '-') {
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel config from STDIN not allowed.");
- redisLog(REDIS_WARNING,
+ serverLog(LL_WARNING,
"Sentinel needs config file on disk to save state. Exiting...");
exit(1);
}
- if (configfile) server.configfile = getAbsolutePath(configfile);
resetServerSaveParams();
loadServerConfig(configfile,options);
sdsfree(options);
+ }
+
+ serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
+ serverLog(LL_WARNING,
+ "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
+ REDIS_VERSION,
+ (sizeof(long) == 8) ? 64 : 32,
+ redisGitSHA1(),
+ strtol(redisGitDirty(),NULL,10) > 0,
+ (int)getpid());
+
+ if (argc == 1) {
+ serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
} else {
- redisLog(REDIS_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
+ serverLog(LL_WARNING, "Configuration loaded");
}
server.supervised = redisIsSupervised(server.supervised_mode);
int background = server.daemonize && !server.supervised;
if (background) daemonize();
+
initServer();
if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
redisAsciiArt();
+ checkTcpBacklogSettings();
if (!server.sentinel_mode) {
/* Things not needed when running in Sentinel mode. */
- redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION);
+ serverLog(LL_WARNING,"Server initialized");
#ifdef __linux__
linuxMemoryWarnings();
#endif
- checkTcpBacklogSettings();
+ moduleLoadFromQueue();
loadDataFromDisk();
if (server.cluster_enabled) {
- if (verifyClusterConfigWithData() == REDIS_ERR) {
- redisLog(REDIS_WARNING,
+ if (verifyClusterConfigWithData() == C_ERR) {
+ serverLog(LL_WARNING,
"You can't have keys in a DB different than DB 0 when in "
"Cluster mode. Exiting.");
exit(1);
}
}
if (server.ipfd_count > 0)
- redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
+ serverLog(LL_NOTICE,"Ready to accept connections");
if (server.sofd > 0)
- redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
+ serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
} else {
sentinelIsRunning();
}
/* Warning the user about suspicious maxmemory setting. */
if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
- redisLog(REDIS_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
+ serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
}
aeSetBeforeSleepProc(server.el,beforeSleep);
+ aeSetAfterSleepProc(server.el,afterSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return 0;
diff --git a/src/server.h b/src/server.h
new file mode 100644
index 000000000..186d08250
--- /dev/null
+++ b/src/server.h
@@ -0,0 +1,2136 @@
+/*
+ * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_H
+#define __REDIS_H
+
+#include "fmacros.h"
+#include "config.h"
+#include "solarisfixes.h"
+#include "rio.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <syslog.h>
+#include <netinet/in.h>
+#include <lua.h>
+#include <signal.h>
+
+typedef long long mstime_t; /* millisecond time type. */
+
+#include "ae.h" /* Event driven programming library */
+#include "sds.h" /* Dynamic safe strings */
+#include "dict.h" /* Hash tables */
+#include "adlist.h" /* Linked lists */
+#include "zmalloc.h" /* total memory usage aware version of malloc/free */
+#include "anet.h" /* Networking the easy way */
+#include "ziplist.h" /* Compact list data structure */
+#include "intset.h" /* Compact integer set structure */
+#include "version.h" /* Version macro */
+#include "util.h" /* Misc functions useful in many places */
+#include "latency.h" /* Latency monitor API */
+#include "sparkline.h" /* ASCII graphs API */
+#include "quicklist.h" /* Lists are encoded as linked lists of
+ N-elements flat arrays */
+#include "rax.h" /* Radix tree */
+
+/* Following includes allow test functions to be called from Redis main() */
+#include "zipmap.h"
+#include "sha1.h"
+#include "endianconv.h"
+#include "crc64.h"
+
+/* Error codes */
+#define C_OK 0
+#define C_ERR -1
+
+/* Static server configuration */
+#define CONFIG_DEFAULT_DYNAMIC_HZ 1 /* Adapt hz to # of clients.*/
+#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
+#define CONFIG_MIN_HZ 1
+#define CONFIG_MAX_HZ 500
+#define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */
+#define CONFIG_DEFAULT_SERVER_PORT 6379 /* TCP port. */
+#define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */
+#define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */
+#define CONFIG_DEFAULT_DBNUM 16
+#define CONFIG_MAX_LINE 1024
+#define CRON_DBS_PER_CALL 16
+#define NET_MAX_WRITES_PER_EVENT (1024*64)
+#define PROTO_SHARED_SELECT_CMDS 10
+#define OBJ_SHARED_INTEGERS 10000
+#define OBJ_SHARED_BULKHDR_LEN 32
+#define LOG_MAX_LEN 1024 /* Default maximum length of syslog messages.*/
+#define AOF_REWRITE_PERC 100
+#define AOF_REWRITE_MIN_SIZE (64*1024*1024)
+#define AOF_REWRITE_ITEMS_PER_CMD 64
+#define AOF_READ_DIFF_INTERVAL_BYTES (1024*10)
+#define CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN 10000
+#define CONFIG_DEFAULT_SLOWLOG_MAX_LEN 128
+#define CONFIG_DEFAULT_MAX_CLIENTS 10000
+#define CONFIG_AUTHPASS_MAX_LEN 512
+#define CONFIG_DEFAULT_SLAVE_PRIORITY 100
+#define CONFIG_DEFAULT_REPL_TIMEOUT 60
+#define CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD 10
+#define CONFIG_RUN_ID_SIZE 40
+#define RDB_EOF_MARK_SIZE 40
+#define CONFIG_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
+#define CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
+#define CONFIG_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
+#define CONFIG_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
+#define CONFIG_DEFAULT_PID_FILE "/var/run/redis.pid"
+#define CONFIG_DEFAULT_SYSLOG_IDENT "redis"
+#define CONFIG_DEFAULT_CLUSTER_CONFIG_FILE "nodes.conf"
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP NULL /* Auto detect. */
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT 0 /* Use server.port */
+#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT 0 /* Use +10000 offset. */
+#define CONFIG_DEFAULT_DAEMONIZE 0
+#define CONFIG_DEFAULT_UNIX_SOCKET_PERM 0
+#define CONFIG_DEFAULT_TCP_KEEPALIVE 300
+#define CONFIG_DEFAULT_PROTECTED_MODE 1
+#define CONFIG_DEFAULT_LOGFILE ""
+#define CONFIG_DEFAULT_SYSLOG_ENABLED 0
+#define CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR 1
+#define CONFIG_DEFAULT_RDB_COMPRESSION 1
+#define CONFIG_DEFAULT_RDB_CHECKSUM 1
+#define CONFIG_DEFAULT_RDB_FILENAME "dump.rdb"
+#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0
+#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5
+#define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1
+#define CONFIG_DEFAULT_SLAVE_READ_ONLY 1
+#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP NULL
+#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT 0
+#define CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY 0
+#define CONFIG_DEFAULT_MAXMEMORY 0
+#define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5
+#define CONFIG_DEFAULT_LFU_LOG_FACTOR 10
+#define CONFIG_DEFAULT_LFU_DECAY_TIME 1
+#define CONFIG_DEFAULT_AOF_FILENAME "appendonly.aof"
+#define CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0
+#define CONFIG_DEFAULT_AOF_LOAD_TRUNCATED 1
+#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 1
+#define CONFIG_DEFAULT_ACTIVE_REHASHING 1
+#define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
+#define CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC 1
+#define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0
+#define CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG 10
+#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
+#define NET_PEER_ID_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
+#define CONFIG_BINDADDR_MAX 16
+#define CONFIG_MIN_RESERVED_FDS 32
+#define CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD 0
+#define CONFIG_DEFAULT_SLAVE_LAZY_FLUSH 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
+#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
+#define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0
+#define CONFIG_DEFAULT_ACTIVE_DEFRAG 0
+#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */
+#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */
+#define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */
+#define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 5 /* 5% CPU min (at lower threshold) */
+#define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 75 /* 75% CPU max (at upper threshold) */
+#define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */
+#define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */
+
+#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
+#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
+#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
+#define ACTIVE_EXPIRE_CYCLE_SLOW 0
+#define ACTIVE_EXPIRE_CYCLE_FAST 1
+
+/* Instantaneous metrics tracking. */
+#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
+#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
+#define STATS_METRIC_NET_INPUT 1 /* Bytes read to network .*/
+#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
+#define STATS_METRIC_COUNT 3
+
+/* Protocol and I/O related defines */
+#define PROTO_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
+#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
+#define PROTO_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
+#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
+#define PROTO_MBULK_BIG_ARG (1024*32)
+#define LONG_STR_SIZE 21 /* Bytes needed for long -> str + '\0' */
+#define REDIS_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
+
+#define LIMIT_PENDING_QUERYBUF (4*1024*1024) /* 4mb */
+
+/* When configuring the server eventloop, we setup it so that the total number
+ * of file descriptors we can handle are server.maxclients + RESERVED_FDS +
+ * a few more to stay safe. Since RESERVED_FDS defaults to 32, we add 96
+ * in order to make sure of not over provisioning more than 128 fds. */
+#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96)
+
+/* Hash table parameters */
+#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
+
+/* Command flags. Please check the command table defined in the redis.c file
+ * for more information about the meaning of every flag. */
+#define CMD_WRITE (1<<0) /* "w" flag */
+#define CMD_READONLY (1<<1) /* "r" flag */
+#define CMD_DENYOOM (1<<2) /* "m" flag */
+#define CMD_MODULE (1<<3) /* Command exported by module. */
+#define CMD_ADMIN (1<<4) /* "a" flag */
+#define CMD_PUBSUB (1<<5) /* "p" flag */
+#define CMD_NOSCRIPT (1<<6) /* "s" flag */
+#define CMD_RANDOM (1<<7) /* "R" flag */
+#define CMD_SORT_FOR_SCRIPT (1<<8) /* "S" flag */
+#define CMD_LOADING (1<<9) /* "l" flag */
+#define CMD_STALE (1<<10) /* "t" flag */
+#define CMD_SKIP_MONITOR (1<<11) /* "M" flag */
+#define CMD_ASKING (1<<12) /* "k" flag */
+#define CMD_FAST (1<<13) /* "F" flag */
+#define CMD_MODULE_GETKEYS (1<<14) /* Use the modules getkeys interface. */
+#define CMD_MODULE_NO_CLUSTER (1<<15) /* Deny on Redis Cluster. */
+
+/* AOF states */
+#define AOF_OFF 0 /* AOF is off */
+#define AOF_ON 1 /* AOF is on */
+#define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
+
+/* Client flags */
+#define CLIENT_SLAVE (1<<0) /* This client is a slave server */
+#define CLIENT_MASTER (1<<1) /* This client is a master server */
+#define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
+#define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */
+#define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
+#define CLIENT_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
+#define CLIENT_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
+#define CLIENT_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
+ server.unblocked_clients */
+#define CLIENT_LUA (1<<8) /* This is a non connected client used by Lua */
+#define CLIENT_ASKING (1<<9) /* Client issued the ASKING command */
+#define CLIENT_CLOSE_ASAP (1<<10)/* Close this client ASAP */
+#define CLIENT_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
+#define CLIENT_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
+#define CLIENT_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
+#define CLIENT_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
+#define CLIENT_FORCE_REPL (1<<15) /* Force replication of current cmd. */
+#define CLIENT_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
+#define CLIENT_READONLY (1<<17) /* Cluster client is in read-only state. */
+#define CLIENT_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
+#define CLIENT_PREVENT_AOF_PROP (1<<19) /* Don't propagate to AOF. */
+#define CLIENT_PREVENT_REPL_PROP (1<<20) /* Don't propagate to slaves. */
+#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP|CLIENT_PREVENT_REPL_PROP)
+#define CLIENT_PENDING_WRITE (1<<21) /* Client has output to send but a write
+ handler is yet not installed. */
+#define CLIENT_REPLY_OFF (1<<22) /* Don't send replies to client. */
+#define CLIENT_REPLY_SKIP_NEXT (1<<23) /* Set CLIENT_REPLY_SKIP for next cmd */
+#define CLIENT_REPLY_SKIP (1<<24) /* Don't send just this reply. */
+#define CLIENT_LUA_DEBUG (1<<25) /* Run EVAL in debug mode. */
+#define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */
+#define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */
+
+/* Client block type (btype field in client structure)
+ * if CLIENT_BLOCKED flag is set. */
+#define BLOCKED_NONE 0 /* Not blocked, no CLIENT_BLOCKED flag set. */
+#define BLOCKED_LIST 1 /* BLPOP & co. */
+#define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */
+#define BLOCKED_MODULE 3 /* Blocked by a loadable module. */
+#define BLOCKED_STREAM 4 /* XREAD. */
+#define BLOCKED_ZSET 5 /* BZPOP et al. */
+#define BLOCKED_NUM 6 /* Number of blocked states. */
+
+/* Client request types */
+#define PROTO_REQ_INLINE 1
+#define PROTO_REQ_MULTIBULK 2
+
+/* Client classes for client limits, currently used only for
+ * the max-client-output-buffer limit implementation. */
+#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
+#define CLIENT_TYPE_SLAVE 1 /* Slaves. */
+#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
+#define CLIENT_TYPE_MASTER 3 /* Master. */
+#define CLIENT_TYPE_OBUF_COUNT 3 /* Number of clients to expose to output
+ buffer configuration. Just the first
+ three: normal, slave, pubsub. */
+
+/* Slave replication state. Used in server.repl_state for slaves to remember
+ * what to do next. */
+#define REPL_STATE_NONE 0 /* No active replication */
+#define REPL_STATE_CONNECT 1 /* Must connect to master */
+#define REPL_STATE_CONNECTING 2 /* Connecting to master */
+/* --- Handshake states, must be ordered --- */
+#define REPL_STATE_RECEIVE_PONG 3 /* Wait for PING reply */
+#define REPL_STATE_SEND_AUTH 4 /* Send AUTH to master */
+#define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */
+#define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */
+#define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */
+#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */
+#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */
+#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */
+#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */
+/* --- End of handshake states --- */
+#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */
+#define REPL_STATE_CONNECTED 15 /* Connected to master */
+
+/* State of slaves from the POV of the master. Used in client->replstate.
+ * In SEND_BULK and ONLINE state the slave receives new updates
+ * in its output queue. In the WAIT_BGSAVE states instead the server is waiting
+ * to start the next background saving in order to send updates to it. */
+#define SLAVE_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
+#define SLAVE_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
+#define SLAVE_STATE_SEND_BULK 8 /* Sending RDB file to slave. */
+#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
+
+/* Slave capabilities. */
+#define SLAVE_CAPA_NONE 0
+#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
+#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
+
+/* Synchronous read timeout - slave side */
+#define CONFIG_REPL_SYNCIO_TIMEOUT 5
+
+/* List related stuff */
+#define LIST_HEAD 0
+#define LIST_TAIL 1
+#define ZSET_MIN 0
+#define ZSET_MAX 1
+
+/* Sort operations */
+#define SORT_OP_GET 0
+
+/* Log levels */
+#define LL_DEBUG 0
+#define LL_VERBOSE 1
+#define LL_NOTICE 2
+#define LL_WARNING 3
+#define LL_RAW (1<<10) /* Modifier to log without timestamp */
+#define CONFIG_DEFAULT_VERBOSITY LL_NOTICE
+
+/* Supervision options */
+#define SUPERVISED_NONE 0
+#define SUPERVISED_AUTODETECT 1
+#define SUPERVISED_SYSTEMD 2
+#define SUPERVISED_UPSTART 3
+
+/* Anti-warning macro... */
+#define UNUSED(V) ((void) V)
+
+#define ZSKIPLIST_MAXLEVEL 64 /* Should be enough for 2^64 elements */
+#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
+
+/* Append only defines */
+#define AOF_FSYNC_NO 0
+#define AOF_FSYNC_ALWAYS 1
+#define AOF_FSYNC_EVERYSEC 2
+#define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
+
+/* Zipped structures related defaults */
+#define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512
+#define OBJ_HASH_MAX_ZIPLIST_VALUE 64
+#define OBJ_SET_MAX_INTSET_ENTRIES 512
+#define OBJ_ZSET_MAX_ZIPLIST_ENTRIES 128
+#define OBJ_ZSET_MAX_ZIPLIST_VALUE 64
+#define OBJ_STREAM_NODE_MAX_BYTES 4096
+#define OBJ_STREAM_NODE_MAX_ENTRIES 100
+
+/* List defaults */
+#define OBJ_LIST_MAX_ZIPLIST_SIZE -2
+#define OBJ_LIST_COMPRESS_DEPTH 0
+
+/* HyperLogLog defines */
+#define CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
+
+/* Sets operations codes */
+#define SET_OP_UNION 0
+#define SET_OP_DIFF 1
+#define SET_OP_INTER 2
+
+/* Redis maxmemory strategies. Instead of using just incremental number
+ * for this defines, we use a set of flags so that testing for certain
+ * properties common to multiple policies is faster. */
+#define MAXMEMORY_FLAG_LRU (1<<0)
+#define MAXMEMORY_FLAG_LFU (1<<1)
+#define MAXMEMORY_FLAG_ALLKEYS (1<<2)
+#define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \
+ (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)
+
+#define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU)
+#define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU)
+#define MAXMEMORY_VOLATILE_TTL (2<<8)
+#define MAXMEMORY_VOLATILE_RANDOM (3<<8)
+#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_ALLKEYS_RANDOM ((6<<8)|MAXMEMORY_FLAG_ALLKEYS)
+#define MAXMEMORY_NO_EVICTION (7<<8)
+
+#define CONFIG_DEFAULT_MAXMEMORY_POLICY MAXMEMORY_NO_EVICTION
+
+/* Scripting */
+#define LUA_SCRIPT_TIME_LIMIT 5000 /* milliseconds */
+
+/* Units */
+#define UNIT_SECONDS 0
+#define UNIT_MILLISECONDS 1
+
+/* SHUTDOWN flags */
+#define SHUTDOWN_NOFLAGS 0 /* No flags. */
+#define SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
+ points are configured. */
+#define SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
+
+/* Command call flags, see call() function */
+#define CMD_CALL_NONE 0
+#define CMD_CALL_SLOWLOG (1<<0)
+#define CMD_CALL_STATS (1<<1)
+#define CMD_CALL_PROPAGATE_AOF (1<<2)
+#define CMD_CALL_PROPAGATE_REPL (1<<3)
+#define CMD_CALL_PROPAGATE (CMD_CALL_PROPAGATE_AOF|CMD_CALL_PROPAGATE_REPL)
+#define CMD_CALL_FULL (CMD_CALL_SLOWLOG | CMD_CALL_STATS | CMD_CALL_PROPAGATE)
+
+/* Command propagation flags, see propagate() function */
+#define PROPAGATE_NONE 0
+#define PROPAGATE_AOF 1
+#define PROPAGATE_REPL 2
+
+/* RDB active child save type. */
+#define RDB_CHILD_TYPE_NONE 0
+#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
+#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
+
+/* Keyspace changes notification classes. Every class is associated with a
+ * character for configuration purposes. */
+#define NOTIFY_KEYSPACE (1<<0) /* K */
+#define NOTIFY_KEYEVENT (1<<1) /* E */
+#define NOTIFY_GENERIC (1<<2) /* g */
+#define NOTIFY_STRING (1<<3) /* $ */
+#define NOTIFY_LIST (1<<4) /* l */
+#define NOTIFY_SET (1<<5) /* s */
+#define NOTIFY_HASH (1<<6) /* h */
+#define NOTIFY_ZSET (1<<7) /* z */
+#define NOTIFY_EXPIRED (1<<8) /* x */
+#define NOTIFY_EVICTED (1<<9) /* e */
+#define NOTIFY_STREAM (1<<10) /* t */
+#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM) /* A flag */
+
+/* Get the first bind addr or NULL */
+#define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL)
+
+/* Using the following macro you can run code inside serverCron() with the
+ * specified period, specified in milliseconds.
+ * The actual resolution depends on server.hz. */
+#define run_with_period(_ms_) if ((_ms_ <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
+
+/* We can print the stacktrace, so our assert is defined this way: */
+#define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
+#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))
+#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)
+
+/*-----------------------------------------------------------------------------
+ * Data types
+ *----------------------------------------------------------------------------*/
+
+/* A redis object, that is a type able to hold a string / list / set */
+
+/* The actual Redis Object */
+#define OBJ_STRING 0 /* String object. */
+#define OBJ_LIST 1 /* List object. */
+#define OBJ_SET 2 /* Set object. */
+#define OBJ_ZSET 3 /* Sorted set object. */
+#define OBJ_HASH 4 /* Hash object. */
+
+/* The "module" object type is a special one that signals that the object
+ * is one directly managed by a Redis module. In this case the value points
+ * to a moduleValue struct, which contains the object value (which is only
+ * handled by the module itself) and the RedisModuleType struct which lists
+ * function pointers in order to serialize, deserialize, AOF-rewrite and
+ * free the object.
+ *
+ * Inside the RDB file, module types are encoded as OBJ_MODULE followed
+ * by a 64 bit module type ID, which has a 54 bits module-specific signature
+ * in order to dispatch the loading to the right module, plus a 10 bits
+ * encoding version. */
+#define OBJ_MODULE 5 /* Module object. */
+#define OBJ_STREAM 6 /* Stream object. */
+
+/* Extract encver / signature from a module type ID. */
+#define REDISMODULE_TYPE_ENCVER_BITS 10
+#define REDISMODULE_TYPE_ENCVER_MASK ((1<<REDISMODULE_TYPE_ENCVER_BITS)-1)
+#define REDISMODULE_TYPE_ENCVER(id) (id & REDISMODULE_TYPE_ENCVER_MASK)
+#define REDISMODULE_TYPE_SIGN(id) ((id & ~((uint64_t)REDISMODULE_TYPE_ENCVER_MASK)) >>REDISMODULE_TYPE_ENCVER_BITS)
+
+struct RedisModule;
+struct RedisModuleIO;
+struct RedisModuleDigest;
+struct RedisModuleCtx;
+struct redisObject;
+
+/* Each module type implementation should export a set of methods in order
+ * to serialize and deserialize the value in the RDB file, rewrite the AOF
+ * log, create the digest for "DEBUG DIGEST", and free the value when a key
+ * is deleted. */
+typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver);
+typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value);
+typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value);
+typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value);
+typedef size_t (*moduleTypeMemUsageFunc)(const void *value);
+typedef void (*moduleTypeFreeFunc)(void *value);
+
+/* The module type, which is referenced in each value of a given type, defines
+ * the methods and links to the module exporting the type. */
+typedef struct RedisModuleType {
+ uint64_t id; /* Higher 54 bits of type ID + 10 lower bits of encoding ver. */
+ struct RedisModule *module;
+ moduleTypeLoadFunc rdb_load;
+ moduleTypeSaveFunc rdb_save;
+ moduleTypeRewriteFunc aof_rewrite;
+ moduleTypeMemUsageFunc mem_usage;
+ moduleTypeDigestFunc digest;
+ moduleTypeFreeFunc free;
+ char name[10]; /* 9 bytes name + null term. Charset: A-Z a-z 0-9 _- */
+} moduleType;
+
+/* In Redis objects 'robj' structures of type OBJ_MODULE, the value pointer
+ * is set to the following structure, referencing the moduleType structure
+ * in order to work with the value, and at the same time providing a raw
+ * pointer to the value, as created by the module commands operating with
+ * the module type.
+ *
+ * So for example in order to free such a value, it is possible to use
+ * the following code:
+ *
+ * if (robj->type == OBJ_MODULE) {
+ * moduleValue *mt = robj->ptr;
+ * mt->type->free(mt->value);
+ * zfree(mt); // We need to release this in-the-middle struct as well.
+ * }
+ */
+typedef struct moduleValue {
+ moduleType *type;
+ void *value;
+} moduleValue;
+
+/* This is a wrapper for the 'rio' streams used inside rdb.c in Redis, so that
+ * the user does not have to take the total count of the written bytes nor
+ * to care about error conditions. */
+typedef struct RedisModuleIO {
+ size_t bytes; /* Bytes read / written so far. */
+ rio *rio; /* Rio stream. */
+ moduleType *type; /* Module type doing the operation. */
+ int error; /* True if error condition happened. */
+ int ver; /* Module serialization version: 1 (old),
+ * 2 (current version with opcodes annotation). */
+ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/
+} RedisModuleIO;
+
+/* Macro to initialize an IO context. Note that the 'ver' field is populated
+ * inside rdb.c according to the version of the value to load. */
+#define moduleInitIOContext(iovar,mtype,rioptr) do { \
+ iovar.rio = rioptr; \
+ iovar.type = mtype; \
+ iovar.bytes = 0; \
+ iovar.error = 0; \
+ iovar.ver = 0; \
+ iovar.ctx = NULL; \
+} while(0);
+
+/* This is a structure used to export DEBUG DIGEST capabilities to Redis
+ * modules. We want to capture both the ordered and unordered elements of
+ * a data structure, so that a digest can be created in a way that correctly
+ * reflects the values. See the DEBUG DIGEST command implementation for more
+ * background. */
+typedef struct RedisModuleDigest {
+ unsigned char o[20]; /* Ordered elements. */
+ unsigned char x[20]; /* Xored elements. */
+} RedisModuleDigest;
+
+/* Just start with a digest composed of all zero bytes. */
+#define moduleInitDigestContext(mdvar) do { \
+ memset(mdvar.o,0,sizeof(mdvar.o)); \
+ memset(mdvar.x,0,sizeof(mdvar.x)); \
+} while(0);
+
+/* Objects encoding. Some kind of objects like Strings and Hashes can be
+ * internally represented in multiple ways. The 'encoding' field of the object
+ * is set to one of this fields for this object. */
+#define OBJ_ENCODING_RAW 0 /* Raw representation */
+#define OBJ_ENCODING_INT 1 /* Encoded as integer */
+#define OBJ_ENCODING_HT 2 /* Encoded as hash table */
+#define OBJ_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
+#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
+#define OBJ_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
+#define OBJ_ENCODING_INTSET 6 /* Encoded as intset */
+#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
+#define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
+#define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
+#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
+
+#define LRU_BITS 24
+#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
+#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
+
+#define OBJ_SHARED_REFCOUNT INT_MAX
+typedef struct redisObject {
+ unsigned type:4;
+ unsigned encoding:4;
+ unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
+ * LFU data (least significant 8 bits frequency
+ * and most significant 16 bits access time). */
+ int refcount;
+ void *ptr;
+} robj;
+
+/* Macro used to initialize a Redis object allocated on the stack.
+ * Note that this macro is taken near the structure definition to make sure
+ * we'll update it when the structure is changed, to avoid bugs like
+ * bug #85 introduced exactly in this way. */
+#define initStaticStringObject(_var,_ptr) do { \
+ _var.refcount = 1; \
+ _var.type = OBJ_STRING; \
+ _var.encoding = OBJ_ENCODING_RAW; \
+ _var.ptr = _ptr; \
+} while(0)
+
+struct evictionPoolEntry; /* Defined in evict.c */
+
+/* This structure is used in order to represent the output buffer of a client,
+ * which is actually a linked list of blocks like that, that is: client->reply. */
+typedef struct clientReplyBlock {
+ size_t size, used;
+ char buf[];
+} clientReplyBlock;
+
+/* Redis database representation. There are multiple databases identified
+ * by integers from 0 (the default database) up to the max configured
+ * database. The database number is the 'id' field in the structure. */
+typedef struct redisDb {
+ dict *dict; /* The keyspace for this DB */
+ dict *expires; /* Timeout of keys with a timeout set */
+ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
+ dict *ready_keys; /* Blocked keys that received a PUSH */
+ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
+ int id; /* Database ID */
+ long long avg_ttl; /* Average TTL, just for stats */
+ list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
+} redisDb;
+
+/* Client MULTI/EXEC state */
+typedef struct multiCmd {
+ robj **argv;
+ int argc;
+ struct redisCommand *cmd;
+} multiCmd;
+
+typedef struct multiState {
+ multiCmd *commands; /* Array of MULTI commands */
+ int count; /* Total number of MULTI commands */
+ int minreplicas; /* MINREPLICAS for synchronous replication */
+ time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
+} multiState;
+
+/* This structure holds the blocking operation state for a client.
+ * The fields used depend on client->btype. */
+typedef struct blockingState {
+ /* Generic fields. */
+ mstime_t timeout; /* Blocking operation timeout. If UNIX current time
+ * is > timeout then the operation timed out. */
+
+ /* BLOCKED_LIST, BLOCKED_ZSET and BLOCKED_STREAM */
+ dict *keys; /* The keys we are waiting to terminate a blocking
+ * operation such as BLPOP or XREAD. Or NULL. */
+ robj *target; /* The key that should receive the element,
+ * for BRPOPLPUSH. */
+
+ /* BLOCK_STREAM */
+ size_t xread_count; /* XREAD COUNT option. */
+ robj *xread_group; /* XREADGROUP group name. */
+ robj *xread_consumer; /* XREADGROUP consumer name. */
+ mstime_t xread_retry_time, xread_retry_ttl;
+ int xread_group_noack;
+
+ /* BLOCKED_WAIT */
+ int numreplicas; /* Number of replicas we are waiting for ACK. */
+ long long reploffset; /* Replication offset to reach. */
+
+ /* BLOCKED_MODULE */
+ void *module_blocked_handle; /* RedisModuleBlockedClient structure.
+ which is opaque for the Redis core, only
+ handled in module.c. */
+} blockingState;
+
+/* The following structure represents a node in the server.ready_keys list,
+ * where we accumulate all the keys that had clients blocked with a blocking
+ * operation such as B[LR]POP, but received new data in the context of the
+ * last executed command.
+ *
+ * After the execution of every command or script, we run this list to check
+ * if as a result we should serve data to clients blocked, unblocking them.
+ * Note that server.ready_keys will not have duplicates as there dictionary
+ * also called ready_keys in every structure representing a Redis database,
+ * where we make sure to remember if a given key was already added in the
+ * server.ready_keys list. */
+typedef struct readyList {
+ redisDb *db;
+ robj *key;
+} readyList;
+
+/* With multiplexing we need to take per-client state.
+ * Clients are taken in a linked list. */
+typedef struct client {
+ uint64_t id; /* Client incremental unique ID. */
+ int fd; /* Client socket. */
+ redisDb *db; /* Pointer to currently SELECTed DB. */
+ robj *name; /* As set by CLIENT SETNAME. */
+ sds querybuf; /* Buffer we use to accumulate client queries. */
+ sds pending_querybuf; /* If this client is flagged as master, this buffer
+ represents the yet not applied portion of the
+ replication stream that we are receiving from
+ the master. */
+ size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
+ int argc; /* Num of arguments of current command. */
+ robj **argv; /* Arguments of current command. */
+ struct redisCommand *cmd, *lastcmd; /* Last command executed. */
+ int reqtype; /* Request protocol type: PROTO_REQ_* */
+ int multibulklen; /* Number of multi bulk arguments left to read. */
+ long bulklen; /* Length of bulk argument in multi bulk request. */
+ list *reply; /* List of reply objects to send to the client. */
+ unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
+ size_t sentlen; /* Amount of bytes already sent in the current
+ buffer or object being sent. */
+ time_t ctime; /* Client creation time. */
+ time_t lastinteraction; /* Time of the last interaction, used for timeout */
+ time_t obuf_soft_limit_reached_time;
+ int flags; /* Client flags: CLIENT_* macros. */
+ int authenticated; /* When requirepass is non-NULL. */
+ int replstate; /* Replication state if this is a slave. */
+ int repl_put_online_on_ack; /* Install slave write handler on ACK. */
+ int repldbfd; /* Replication DB file descriptor. */
+ off_t repldboff; /* Replication DB file offset. */
+ off_t repldbsize; /* Replication DB file size. */
+ sds replpreamble; /* Replication DB preamble. */
+ long long read_reploff; /* Read replication offset if this is a master. */
+ long long reploff; /* Applied replication offset if this is a master. */
+ long long repl_ack_off; /* Replication ack offset, if this is a slave. */
+ long long repl_ack_time;/* Replication ack time, if this is a slave. */
+ long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
+ copying this slave output buffer
+ should use. */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */
+ int slave_listening_port; /* As configured with: SLAVECONF listening-port */
+ char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */
+ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
+ multiState mstate; /* MULTI/EXEC state */
+ int btype; /* Type of blocking op if CLIENT_BLOCKED. */
+ blockingState bpop; /* blocking state */
+ long long woff; /* Last write global replication offset. */
+ list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
+ dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
+ list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
+ sds peerid; /* Cached peer ID. */
+ listNode *client_list_node; /* list node in client list */
+
+ /* Response buffer */
+ int bufpos;
+ char buf[PROTO_REPLY_CHUNK_BYTES];
+} client;
+
+struct saveparam {
+ time_t seconds;
+ int changes;
+};
+
+struct moduleLoadQueueEntry {
+ sds path;
+ int argc;
+ robj **argv;
+};
+
+struct sharedObjectsStruct {
+ robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *cnegone, *pong, *space,
+ *colon, *nullbulk, *nullmultibulk, *queued,
+ *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
+ *outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
+ *masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
+ *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
+ *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
+ *rpop, *lpop, *lpush, *zpopmin, *zpopmax, *emptyscan,
+ *select[PROTO_SHARED_SELECT_CMDS],
+ *integers[OBJ_SHARED_INTEGERS],
+ *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
+ *bulkhdr[OBJ_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
+ sds minstring, maxstring;
+};
+
+/* ZSETs use a specialized version of Skiplists */
+typedef struct zskiplistNode {
+ sds ele;
+ double score;
+ struct zskiplistNode *backward;
+ struct zskiplistLevel {
+ struct zskiplistNode *forward;
+ unsigned long span;
+ } level[];
+} zskiplistNode;
+
+typedef struct zskiplist {
+ struct zskiplistNode *header, *tail;
+ unsigned long length;
+ int level;
+} zskiplist;
+
+typedef struct zset {
+ dict *dict;
+ zskiplist *zsl;
+} zset;
+
+typedef struct clientBufferLimitsConfig {
+ unsigned long long hard_limit_bytes;
+ unsigned long long soft_limit_bytes;
+ time_t soft_limit_seconds;
+} clientBufferLimitsConfig;
+
+extern clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT];
+
+/* The redisOp structure defines a Redis Operation, that is an instance of
+ * a command with an argument vector, database ID, propagation target
+ * (PROPAGATE_*), and command pointer.
+ *
+ * Currently only used to additionally propagate more commands to AOF/Replication
+ * after the propagation of the executed command. */
+typedef struct redisOp {
+ robj **argv;
+ int argc, dbid, target;
+ struct redisCommand *cmd;
+} redisOp;
+
+/* Defines an array of Redis operations. There is an API to add to this
+ * structure in a easy way.
+ *
+ * redisOpArrayInit();
+ * redisOpArrayAppend();
+ * redisOpArrayFree();
+ */
+typedef struct redisOpArray {
+ redisOp *ops;
+ int numops;
+} redisOpArray;
+
+/* This structure is returned by the getMemoryOverheadData() function in
+ * order to return memory overhead information. */
+struct redisMemOverhead {
+ size_t peak_allocated;
+ size_t total_allocated;
+ size_t startup_allocated;
+ size_t repl_backlog;
+ size_t clients_slaves;
+ size_t clients_normal;
+ size_t aof_buffer;
+ size_t lua_caches;
+ size_t overhead_total;
+ size_t dataset;
+ size_t total_keys;
+ size_t bytes_per_key;
+ float dataset_perc;
+ float peak_perc;
+ float total_frag;
+ size_t total_frag_bytes;
+ float allocator_frag;
+ size_t allocator_frag_bytes;
+ float allocator_rss;
+ size_t allocator_rss_bytes;
+ float rss_extra;
+ size_t rss_extra_bytes;
+ size_t num_dbs;
+ struct {
+ size_t dbid;
+ size_t overhead_ht_main;
+ size_t overhead_ht_expires;
+ } *db;
+};
+
+/* This structure can be optionally passed to RDB save/load functions in
+ * order to implement additional functionalities, by storing and loading
+ * metadata to the RDB file.
+ *
+ * Currently the only use is to select a DB at load time, useful in
+ * replication in order to make sure that chained slaves (slaves of slaves)
+ * select the correct DB and are able to accept the stream coming from the
+ * top-level master. */
+typedef struct rdbSaveInfo {
+ /* Used saving and loading. */
+ int repl_stream_db; /* DB to select in server.master client. */
+
+ /* Used only loading. */
+ int repl_id_is_set; /* True if repl_id field is set. */
+ char repl_id[CONFIG_RUN_ID_SIZE+1]; /* Replication ID. */
+ long long repl_offset; /* Replication offset. */
+} rdbSaveInfo;
+
+#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1}
+
+struct malloc_stats {
+ size_t zmalloc_used;
+ size_t process_rss;
+ size_t allocator_allocated;
+ size_t allocator_active;
+ size_t allocator_resident;
+};
+
+/*-----------------------------------------------------------------------------
+ * Global server state
+ *----------------------------------------------------------------------------*/
+
+struct clusterState;
+
+/* AIX defines hz to __hz, we don't use this define and in order to allow
+ * Redis build on AIX we need to undef it. */
+#ifdef _AIX
+#undef hz
+#endif
+
+#define CHILD_INFO_MAGIC 0xC17DDA7A12345678LL
+#define CHILD_INFO_TYPE_RDB 0
+#define CHILD_INFO_TYPE_AOF 1
+
+struct redisServer {
+ /* General */
+ pid_t pid; /* Main process pid. */
+ char *configfile; /* Absolute config file path, or NULL */
+ char *executable; /* Absolute executable file path. */
+ char **exec_argv; /* Executable argv vector (copy). */
+ int dynamic_hz; /* Change hz value depending on # of clients. */
+ int config_hz; /* Configured HZ value. May be different than
+ the actual 'hz' field value if dynamic-hz
+ is enabled. */
+ int hz; /* serverCron() calls frequency in hertz */
+ redisDb *db;
+ dict *commands; /* Command table */
+ dict *orig_commands; /* Command table before command renaming. */
+ aeEventLoop *el;
+ unsigned int lruclock; /* Clock for LRU eviction */
+ int shutdown_asap; /* SHUTDOWN needed ASAP */
+ int activerehashing; /* Incremental rehash in serverCron() */
+ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */
+ char *requirepass; /* Pass for AUTH command, or NULL */
+ char *pidfile; /* PID file path */
+ int arch_bits; /* 32 or 64 depending on sizeof(long) */
+ int cronloops; /* Number of times the cron function run */
+ char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */
+ int sentinel_mode; /* True if this instance is a Sentinel. */
+ size_t initial_memory_usage; /* Bytes used after initialization. */
+ int always_show_logo; /* Show logo even for non-stdout logging. */
+ /* Modules */
+ dict *moduleapi; /* Exported APIs dictionary for modules. */
+ list *loadmodule_queue; /* List of modules to load at startup. */
+ int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a
+ client blocked on a module command needs
+ to be processed. */
+ /* Networking */
+ int port; /* TCP listening port */
+ int tcp_backlog; /* TCP listen() backlog */
+ char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
+ int bindaddr_count; /* Number of addresses in server.bindaddr[] */
+ char *unixsocket; /* UNIX socket path */
+ mode_t unixsocketperm; /* UNIX socket permission */
+ int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
+ int ipfd_count; /* Used slots in ipfd[] */
+ int sofd; /* Unix socket file descriptor */
+ int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
+ int cfd_count; /* Used slots in cfd[] */
+ list *clients; /* List of active clients */
+ list *clients_to_close; /* Clients to close asynchronously */
+ list *clients_pending_write; /* There is to write or install handler. */
+ list *slaves, *monitors; /* List of slaves and MONITORs */
+ client *current_client; /* Current client, only used on crash report */
+ rax *clients_index; /* Active clients dictionary by client ID. */
+ int clients_paused; /* True if clients are currently paused */
+ mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
+ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
+ dict *migrate_cached_sockets;/* MIGRATE cached sockets */
+ uint64_t next_client_id; /* Next client unique ID. Incremental. */
+ int protected_mode; /* Don't accept external connections. */
+ /* RDB / AOF loading information */
+ int loading; /* We are loading data from disk if true */
+ off_t loading_total_bytes;
+ off_t loading_loaded_bytes;
+ time_t loading_start_time;
+ off_t loading_process_events_interval_bytes;
+ /* Fast pointers to often looked up command */
+ struct redisCommand *delCommand, *multiCommand, *lpushCommand,
+ *lpopCommand, *rpopCommand, *zpopminCommand,
+ *zpopmaxCommand, *sremCommand, *execCommand,
+ *expireCommand, *pexpireCommand, *xclaimCommand;
+ /* Fields used only for stats */
+ time_t stat_starttime; /* Server start time */
+ long long stat_numcommands; /* Number of processed commands */
+ long long stat_numconnections; /* Number of connections received */
+ long long stat_expiredkeys; /* Number of expired keys */
+ double stat_expired_stale_perc; /* Percentage of keys probably expired */
+ long long stat_expired_time_cap_reached_count; /* Early expire cylce stops.*/
+ long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
+ long long stat_keyspace_hits; /* Number of successful lookups of keys */
+ long long stat_keyspace_misses; /* Number of failed lookups of keys */
+ long long stat_active_defrag_hits; /* number of allocations moved */
+ long long stat_active_defrag_misses; /* number of allocations scanned but not moved */
+ long long stat_active_defrag_key_hits; /* number of keys with moved allocations */
+ long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */
+ long long stat_active_defrag_scanned; /* number of dictEntries scanned */
+ size_t stat_peak_memory; /* Max used memory record */
+ long long stat_fork_time; /* Time needed to perform latest fork() */
+ double stat_fork_rate; /* Fork rate in GB/sec. */
+ long long stat_rejected_conn; /* Clients rejected because of maxclients */
+ long long stat_sync_full; /* Number of full resyncs with slaves. */
+ long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
+ long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
+ list *slowlog; /* SLOWLOG list of commands */
+ long long slowlog_entry_id; /* SLOWLOG current entry ID */
+ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
+ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
+ struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
+ long long stat_net_input_bytes; /* Bytes read from network. */
+ long long stat_net_output_bytes; /* Bytes written to network. */
+ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
+ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
+ /* The following two are used to track instantaneous metrics, like
+ * number of operations per second, network traffic. */
+ struct {
+ long long last_sample_time; /* Timestamp of last sample in ms */
+ long long last_sample_count;/* Count in last sample */
+ long long samples[STATS_METRIC_SAMPLES];
+ int idx;
+ } inst_metric[STATS_METRIC_COUNT];
+ /* Configuration */
+ int verbosity; /* Loglevel in redis.conf */
+ int maxidletime; /* Client timeout in seconds */
+ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
+ int active_expire_enabled; /* Can be disabled for testing purposes. */
+ int active_defrag_enabled;
+ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
+ int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
+ int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
+ int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */
+ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */
+ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */
+ size_t client_max_querybuf_len; /* Limit for client query buffer length */
+ int dbnum; /* Total number of configured DBs */
+ int supervised; /* 1 if supervised, 0 otherwise. */
+ int supervised_mode; /* See SUPERVISED_* */
+ int daemonize; /* True if running as a daemon */
+ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
+ /* AOF persistence */
+ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
+ int aof_fsync; /* Kind of fsync() policy */
+ char *aof_filename; /* Name of the AOF file */
+ int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
+ int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
+ off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
+ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
+ off_t aof_current_size; /* AOF current size. */
+ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
+ pid_t aof_child_pid; /* PID if rewriting process */
+ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
+ sds aof_buf; /* AOF buffer, written before entering the event loop */
+ int aof_fd; /* File descriptor of currently selected AOF file */
+ int aof_selected_db; /* Currently selected DB in AOF */
+ time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
+ time_t aof_last_fsync; /* UNIX time of last fsync() */
+ time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
+ time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
+ int aof_lastbgrewrite_status; /* C_OK or C_ERR */
+ unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
+ int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
+ int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
+ int aof_last_write_status; /* C_OK or C_ERR */
+ int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
+ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
+ int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */
+ /* AOF pipes used to communicate between parent and child during rewrite. */
+ int aof_pipe_write_data_to_child;
+ int aof_pipe_read_data_from_parent;
+ int aof_pipe_write_ack_to_parent;
+ int aof_pipe_read_ack_from_child;
+ int aof_pipe_write_ack_to_child;
+ int aof_pipe_read_ack_from_parent;
+ int aof_stop_sending_diff; /* If true stop sending accumulated diffs
+ to child process. */
+ sds aof_child_diff; /* AOF diff accumulator child side. */
+ /* RDB persistence */
+ long long dirty; /* Changes to DB from the last save */
+ long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
+ pid_t rdb_child_pid; /* PID of RDB saving child */
+ struct saveparam *saveparams; /* Save points array for RDB */
+ int saveparamslen; /* Number of saving points */
+ char *rdb_filename; /* Name of RDB file */
+ int rdb_compression; /* Use compression in RDB? */
+ int rdb_checksum; /* Use RDB checksum? */
+ time_t lastsave; /* Unix time of last successful save */
+ time_t lastbgsave_try; /* Unix time of last attempted bgsave */
+ time_t rdb_save_time_last; /* Time used by last RDB save run. */
+ time_t rdb_save_time_start; /* Current RDB save start time. */
+ int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
+ int rdb_child_type; /* Type of save by active child. */
+ int lastbgsave_status; /* C_OK or C_ERR */
+ int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
+ int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
+ int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
+ /* Pipe and data structures for child -> parent info sharing. */
+ int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
+ struct {
+ int process_type; /* AOF or RDB child? */
+ size_t cow_size; /* Copy on write size. */
+ unsigned long long magic; /* Magic value to make sure data is valid. */
+ } child_info_data;
+ /* Propagation of commands in AOF / replication */
+ redisOpArray also_propagate; /* Additional command to propagate. */
+ /* Logging */
+ char *logfile; /* Path of log file */
+ int syslog_enabled; /* Is syslog enabled? */
+ char *syslog_ident; /* Syslog ident */
+ int syslog_facility; /* Syslog facility */
+ /* Replication (master) */
+ char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
+ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
+ long long master_repl_offset; /* My current replication offset */
+ long long second_replid_offset; /* Accept offsets up to this for replid2. */
+ int slaveseldb; /* Last SELECTed DB in replication output */
+ int repl_ping_slave_period; /* Master pings the slave every N seconds */
+ char *repl_backlog; /* Replication backlog for partial syncs */
+ long long repl_backlog_size; /* Backlog circular buffer size */
+ long long repl_backlog_histlen; /* Backlog actual data length */
+ long long repl_backlog_idx; /* Backlog circular buffer current offset,
+ that is the next byte will'll write to.*/
+ long long repl_backlog_off; /* Replication "master offset" of first
+ byte in the replication backlog buffer.*/
+ time_t repl_backlog_time_limit; /* Time without slaves after the backlog
+ gets released. */
+ time_t repl_no_slaves_since; /* We have no slaves since that time.
+ Only valid if server.slaves len is 0. */
+ int repl_min_slaves_to_write; /* Min number of slaves to write. */
+ int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
+ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
+ int repl_diskless_sync; /* Send RDB to slaves sockets directly. */
+ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
+ /* Replication (slave) */
+ char *masterauth; /* AUTH with this password with master */
+ char *masterhost; /* Hostname of master */
+ int masterport; /* Port of master */
+ int repl_timeout; /* Timeout after N seconds of master idle */
+ client *master; /* Client that is master for this slave */
+ client *cached_master; /* Cached master to be reused for PSYNC. */
+ int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
+ int repl_state; /* Replication status if the instance is a slave */
+ off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
+ off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
+ off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
+ int repl_transfer_s; /* Slave -> Master SYNC socket */
+ int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
+ char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
+ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
+ int repl_serve_stale_data; /* Serve stale data when link is down? */
+ int repl_slave_ro; /* Slave is read only? */
+ time_t repl_down_since; /* Unix time at which link with master went down */
+ int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
+ int slave_priority; /* Reported in INFO and used by Sentinel. */
+ int slave_announce_port; /* Give the master this listening port. */
+ char *slave_announce_ip; /* Give the master this ip address. */
+ /* The following two fields is where we store master PSYNC replid/offset
+ * while the PSYNC is in progress. At the end we'll copy the fields into
+ * the server->master client structure. */
+ char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */
+ long long master_initial_offset; /* Master PSYNC offset. */
+ int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
+ /* Replication script cache. */
+ dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
+ list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
+ unsigned int repl_scriptcache_size; /* Max number of elements. */
+ /* Synchronous replication. */
+ list *clients_waiting_acks; /* Clients waiting in WAIT command. */
+ int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
+ /* Limits */
+ unsigned int maxclients; /* Max number of simultaneous clients */
+ unsigned long long maxmemory; /* Max number of memory bytes to use */
+ int maxmemory_policy; /* Policy for key eviction */
+ int maxmemory_samples; /* Pricision of random sampling */
+ int lfu_log_factor; /* LFU logarithmic counter factor. */
+ int lfu_decay_time; /* LFU counter decay factor. */
+ long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
+ /* Blocked clients */
+ unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
+ unsigned int blocked_clients_by_type[BLOCKED_NUM];
+ list *unblocked_clients; /* list of clients to unblock before next loop */
+ list *ready_keys; /* List of readyList structures for BLPOP & co */
+ /* Sort parameters - qsort_r() is only available under BSD so we
+ * have to take this state global, in order to pass it to sortCompare() */
+ int sort_desc;
+ int sort_alpha;
+ int sort_bypattern;
+ int sort_store;
+ /* Zip structure config, see redis.conf for more information */
+ size_t hash_max_ziplist_entries;
+ size_t hash_max_ziplist_value;
+ size_t set_max_intset_entries;
+ size_t zset_max_ziplist_entries;
+ size_t zset_max_ziplist_value;
+ size_t hll_sparse_max_bytes;
+ size_t stream_node_max_bytes;
+ int64_t stream_node_max_entries;
+ /* List parameters */
+ int list_max_ziplist_size;
+ int list_compress_depth;
+ /* time cache */
+ time_t unixtime; /* Unix time sampled every cron cycle. */
+ time_t timezone; /* Cached timezone. As set by tzset(). */
+ int daylight_active; /* Currently in daylight saving time. */
+ long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
+ /* Pubsub */
+ dict *pubsub_channels; /* Map channels to list of subscribed clients */
+ list *pubsub_patterns; /* A list of pubsub_patterns */
+ int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
+ xor of NOTIFY_... flags. */
+ /* Cluster */
+ int cluster_enabled; /* Is cluster enabled? */
+ mstime_t cluster_node_timeout; /* Cluster node timeout. */
+ char *cluster_configfile; /* Cluster auto-generated config file name. */
+ struct clusterState *cluster; /* State of the cluster */
+ int cluster_migration_barrier; /* Cluster replicas migration barrier. */
+ int cluster_slave_validity_factor; /* Slave max data age for failover. */
+ int cluster_require_full_coverage; /* If true, put the cluster down if
+ there is at least an uncovered slot.*/
+ int cluster_slave_no_failover; /* Prevent slave from starting a failover
+ if the master is in failure state. */
+ char *cluster_announce_ip; /* IP address to announce on cluster bus. */
+ int cluster_announce_port; /* base port to announce on cluster bus. */
+ int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
+ /* Scripting */
+ lua_State *lua; /* The Lua interpreter. We use just one for all clients */
+ client *lua_client; /* The "fake client" to query Redis from Lua */
+ client *lua_caller; /* The client running EVAL right now, or NULL */
+ dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
+ unsigned long long lua_scripts_mem; /* Cached scripts' memory + oh */
+ mstime_t lua_time_limit; /* Script timeout in milliseconds */
+ mstime_t lua_time_start; /* Start time of script, milliseconds time */
+ int lua_write_dirty; /* True if a write command was called during the
+ execution of the current script. */
+ int lua_random_dirty; /* True if a random command was called during the
+ execution of the current script. */
+ int lua_replicate_commands; /* True if we are doing single commands repl. */
+ int lua_multi_emitted;/* True if we already proagated MULTI. */
+ int lua_repl; /* Script replication flags for redis.set_repl(). */
+ int lua_timedout; /* True if we reached the time limit for script
+ execution. */
+ int lua_kill; /* Kill the script if true. */
+ int lua_always_replicate_commands; /* Default replication type. */
+ /* Lazy free */
+ int lazyfree_lazy_eviction;
+ int lazyfree_lazy_expire;
+ int lazyfree_lazy_server_del;
+ /* Latency monitor */
+ long long latency_monitor_threshold;
+ dict *latency_events;
+ /* Assert & bug reporting */
+ const char *assert_failed;
+ const char *assert_file;
+ int assert_line;
+ int bug_report_start; /* True if bug report header was already logged. */
+ int watchdog_period; /* Software watchdog period in ms. 0 = off */
+ /* System hardware info */
+ size_t system_memory_size; /* Total memory in system as reported by OS */
+
+ /* Mutexes used to protect atomic variables when atomic builtins are
+ * not available. */
+ pthread_mutex_t lruclock_mutex;
+ pthread_mutex_t next_client_id_mutex;
+ pthread_mutex_t unixtime_mutex;
+};
+
+typedef struct pubsubPattern {
+ client *client;
+ robj *pattern;
+} pubsubPattern;
+
+typedef void redisCommandProc(client *c);
+typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+struct redisCommand {
+ char *name;
+ redisCommandProc *proc;
+ int arity;
+ char *sflags; /* Flags as string representation, one char per flag. */
+ int flags; /* The actual flags, obtained from the 'sflags' field. */
+ /* Use a function to determine keys arguments in a command line.
+ * Used for Redis Cluster redirect. */
+ redisGetKeysProc *getkeys_proc;
+ /* What keys should be loaded in background when calling this command? */
+ int firstkey; /* The first argument that's a key (0 = no keys) */
+ int lastkey; /* The last argument that's a key */
+ int keystep; /* The step between first and last key */
+ long long microseconds, calls;
+};
+
+struct redisFunctionSym {
+ char *name;
+ unsigned long pointer;
+};
+
+typedef struct _redisSortObject {
+ robj *obj;
+ union {
+ double score;
+ robj *cmpobj;
+ } u;
+} redisSortObject;
+
+typedef struct _redisSortOperation {
+ int type;
+ robj *pattern;
+} redisSortOperation;
+
+/* Structure to hold list iteration abstraction. */
+typedef struct {
+ robj *subject;
+ unsigned char encoding;
+ unsigned char direction; /* Iteration direction */
+ quicklistIter *iter;
+} listTypeIterator;
+
+/* Structure for an entry while iterating over a list. */
+typedef struct {
+ listTypeIterator *li;
+ quicklistEntry entry; /* Entry in quicklist */
+} listTypeEntry;
+
+/* Structure to hold set iteration abstraction. */
+typedef struct {
+ robj *subject;
+ int encoding;
+ int ii; /* intset iterator */
+ dictIterator *di;
+} setTypeIterator;
+
+/* Structure to hold hash iteration abstraction. Note that iteration over
+ * hashes involves both fields and values. Because it is possible that
+ * not both are required, store pointers in the iterator to avoid
+ * unnecessary memory allocation for fields/values. */
+typedef struct {
+ robj *subject;
+ int encoding;
+
+ unsigned char *fptr, *vptr;
+
+ dictIterator *di;
+ dictEntry *de;
+} hashTypeIterator;
+
+#include "stream.h" /* Stream data type header file. */
+
+#define OBJ_HASH_KEY 1
+#define OBJ_HASH_VALUE 2
+
+/*-----------------------------------------------------------------------------
+ * Extern declarations
+ *----------------------------------------------------------------------------*/
+
+extern struct redisServer server;
+extern struct sharedObjectsStruct shared;
+extern dictType objectKeyPointerValueDictType;
+extern dictType objectKeyHeapPointerValueDictType;
+extern dictType setDictType;
+extern dictType zsetDictType;
+extern dictType clusterNodesDictType;
+extern dictType clusterNodesBlackListDictType;
+extern dictType dbDictType;
+extern dictType shaScriptObjectDictType;
+extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
+extern dictType hashDictType;
+extern dictType replScriptCacheDictType;
+extern dictType keyptrDictType;
+extern dictType modulesDictType;
+
+/*-----------------------------------------------------------------------------
+ * Functions prototypes
+ *----------------------------------------------------------------------------*/
+
+/* Modules */
+void moduleInitModulesSystem(void);
+int moduleLoad(const char *path, void **argv, int argc);
+void moduleLoadFromQueue(void);
+int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+moduleType *moduleTypeLookupModuleByID(uint64_t id);
+void moduleTypeNameByID(char *name, uint64_t moduleid);
+void moduleFreeContext(struct RedisModuleCtx *ctx);
+void unblockClientFromModule(client *c);
+void moduleHandleBlockedClients(void);
+void moduleBlockedClientTimedOut(client *c);
+void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
+size_t moduleCount(void);
+void moduleAcquireGIL(void);
+void moduleReleaseGIL(void);
+void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid);
+
+
+/* Utils */
+long long ustime(void);
+long long mstime(void);
+void getRandomHexChars(char *p, size_t len);
+void getRandomBytes(unsigned char *p, size_t len);
+uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
+void exitFromChild(int retcode);
+size_t redisPopcount(void *s, long count);
+void redisSetProcTitle(char *title);
+
+/* networking.c -- Networking and Client related operations */
+client *createClient(int fd);
+void closeTimedoutClients(void);
+void freeClient(client *c);
+void freeClientAsync(client *c);
+void resetClient(client *c);
+void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void *addDeferredMultiBulkLength(client *c);
+void setDeferredMultiBulkLength(client *c, void *node, long length);
+void processInputBuffer(client *c);
+void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void addReplyString(client *c, const char *s, size_t len);
+void addReplyBulk(client *c, robj *obj);
+void addReplyBulkCString(client *c, const char *s);
+void addReplyBulkCBuffer(client *c, const void *p, size_t len);
+void addReplyBulkLongLong(client *c, long long ll);
+void addReply(client *c, robj *obj);
+void addReplySds(client *c, sds s);
+void addReplyBulkSds(client *c, sds s);
+void addReplyError(client *c, const char *err);
+void addReplyStatus(client *c, const char *status);
+void addReplyDouble(client *c, double d);
+void addReplyHumanLongDouble(client *c, long double d);
+void addReplyLongLong(client *c, long long ll);
+void addReplyMultiBulkLen(client *c, long length);
+void addReplyHelp(client *c, const char **help);
+void addReplySubcommandSyntaxError(client *c);
+void copyClientOutputBuffer(client *dst, client *src);
+size_t sdsZmallocSize(sds s);
+size_t getStringObjectSdsUsedMemory(robj *o);
+void freeClientReplyValue(void *o);
+void *dupClientReplyValue(void *o);
+void getClientsMaxBuffers(unsigned long *longest_output_list,
+ unsigned long *biggest_input_buffer);
+char *getClientPeerId(client *client);
+sds catClientInfoString(sds s, client *client);
+sds getAllClientsInfoString(int type);
+void rewriteClientCommandVector(client *c, int argc, ...);
+void rewriteClientCommandArgument(client *c, int i, robj *newval);
+void replaceClientCommandVector(client *c, int argc, robj **argv);
+unsigned long getClientOutputBufferMemoryUsage(client *c);
+void freeClientsInAsyncFreeQueue(void);
+void asyncCloseClientOnOutputBufferLimitReached(client *c);
+int getClientType(client *c);
+int getClientTypeByName(char *name);
+char *getClientTypeName(int class);
+void flushSlavesOutputBuffers(void);
+void disconnectSlaves(void);
+int listenToPort(int port, int *fds, int *count);
+void pauseClients(mstime_t duration);
+int clientsArePaused(void);
+int processEventsWhileBlocked(void);
+int handleClientsWithPendingWrites(void);
+int clientHasPendingReplies(client *c);
+void unlinkClient(client *c);
+int writeToClient(int fd, client *c, int handler_installed);
+void linkClient(client *c);
+
+#ifdef __GNUC__
+void addReplyErrorFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+void addReplyStatusFormat(client *c, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void addReplyErrorFormat(client *c, const char *fmt, ...);
+void addReplyStatusFormat(client *c, const char *fmt, ...);
+#endif
+
+/* List data type */
+void listTypeTryConversion(robj *subject, robj *value);
+void listTypePush(robj *subject, robj *value, int where);
+robj *listTypePop(robj *subject, int where);
+unsigned long listTypeLength(const robj *subject);
+listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
+void listTypeReleaseIterator(listTypeIterator *li);
+int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
+robj *listTypeGet(listTypeEntry *entry);
+void listTypeInsert(listTypeEntry *entry, robj *value, int where);
+int listTypeEqual(listTypeEntry *entry, robj *o);
+void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
+void listTypeConvert(robj *subject, int enc);
+void unblockClientWaitingData(client *c);
+void popGenericCommand(client *c, int where);
+
+/* MULTI/EXEC/WATCH... */
+void unwatchAllKeys(client *c);
+void initClientMultiState(client *c);
+void freeClientMultiState(client *c);
+void queueMultiCommand(client *c);
+void touchWatchedKey(redisDb *db, robj *key);
+void touchWatchedKeysOnFlush(int dbid);
+void discardTransaction(client *c);
+void flagTransaction(client *c);
+void execCommandPropagateMulti(client *c);
+
+/* Redis object implementation */
+void decrRefCount(robj *o);
+void decrRefCountVoid(void *o);
+void incrRefCount(robj *o);
+robj *makeObjectShared(robj *o);
+robj *resetRefCount(robj *obj);
+void freeStringObject(robj *o);
+void freeListObject(robj *o);
+void freeSetObject(robj *o);
+void freeZsetObject(robj *o);
+void freeHashObject(robj *o);
+robj *createObject(int type, void *ptr);
+robj *createStringObject(const char *ptr, size_t len);
+robj *createRawStringObject(const char *ptr, size_t len);
+robj *createEmbeddedStringObject(const char *ptr, size_t len);
+robj *dupStringObject(const robj *o);
+int isSdsRepresentableAsLongLong(sds s, long long *llval);
+int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
+robj *tryObjectEncoding(robj *o);
+robj *getDecodedObject(robj *o);
+size_t stringObjectLen(robj *o);
+robj *createStringObjectFromLongLong(long long value);
+robj *createStringObjectFromLongLongForValue(long long value);
+robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
+robj *createQuicklistObject(void);
+robj *createZiplistObject(void);
+robj *createSetObject(void);
+robj *createIntsetObject(void);
+robj *createHashObject(void);
+robj *createZsetObject(void);
+robj *createZsetZiplistObject(void);
+robj *createStreamObject(void);
+robj *createModuleObject(moduleType *mt, void *value);
+int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg);
+int checkType(client *c, robj *o, int type);
+int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg);
+int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg);
+int getDoubleFromObject(const robj *o, double *target);
+int getLongLongFromObject(robj *o, long long *target);
+int getLongDoubleFromObject(robj *o, long double *target);
+int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg);
+char *strEncoding(int encoding);
+int compareStringObjects(robj *a, robj *b);
+int collateStringObjects(robj *a, robj *b);
+int equalStringObjects(robj *a, robj *b);
+unsigned long long estimateObjectIdleTime(robj *o);
+#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
+
+/* Synchronous I/O with timeout */
+ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
+ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
+
+/* Replication */
+void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
+void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen);
+void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc);
+void updateSlavesWaitingBgsave(int bgsaveerr, int type);
+void replicationCron(void);
+void replicationHandleMasterDisconnection(void);
+void replicationCacheMaster(client *c);
+void resizeReplicationBacklog(long long newsize);
+void replicationSetMaster(char *ip, int port);
+void replicationUnsetMaster(void);
+void refreshGoodSlavesCount(void);
+void replicationScriptCacheInit(void);
+void replicationScriptCacheFlush(void);
+void replicationScriptCacheAdd(sds sha1);
+int replicationScriptCacheExists(sds sha1);
+void processClientsWaitingReplicas(void);
+void unblockClientWaitingReplicas(client *c);
+int replicationCountAcksByOffset(long long offset);
+void replicationSendNewlineToMaster(void);
+long long replicationGetSlaveOffset(void);
+char *replicationGetSlaveName(client *c);
+long long getPsyncInitialOffset(void);
+int replicationSetupSlaveForFullResync(client *slave, long long offset);
+void changeReplicationId(void);
+void clearReplicationId2(void);
+void chopReplicationBacklog(void);
+void replicationCacheMasterUsingMyself(void);
+void feedReplicationBacklog(void *ptr, size_t len);
+
+/* Generic persistence functions */
+void startLoading(FILE *fp);
+void loadingProgress(off_t pos);
+void stopLoading(void);
+
+#define DISK_ERROR_TYPE_AOF 1 /* Don't accept writes: AOF errors. */
+#define DISK_ERROR_TYPE_RDB 2 /* Don't accept writes: RDB errors. */
+#define DISK_ERROR_TYPE_NONE 0 /* No problems, we can accept writes. */
+int writeCommandsDeniedByDiskError(void);
+
+/* RDB persistence */
+#include "rdb.h"
+int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi);
+
+/* AOF persistence */
+void flushAppendOnlyFile(int force);
+void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
+void aofRemoveTempFile(pid_t childpid);
+int rewriteAppendOnlyFileBackground(void);
+int loadAppendOnlyFile(char *filename);
+void stopAppendOnly(void);
+int startAppendOnly(void);
+void backgroundRewriteDoneHandler(int exitcode, int bysignal);
+void aofRewriteBufferReset(void);
+unsigned long aofRewriteBufferSize(void);
+ssize_t aofReadDiffFromParent(void);
+
+/* Child info */
+void openChildInfoPipe(void);
+void closeChildInfoPipe(void);
+void sendChildInfo(int process_type);
+void receiveChildInfo(void);
+
+/* Sorted sets data type */
+
+/* Input flags. */
+#define ZADD_NONE 0
+#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
+#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
+#define ZADD_XX (1<<2) /* Only touch elements already existing. */
+
+/* Output flags. */
+#define ZADD_NOP (1<<3) /* Operation not performed because of conditionals.*/
+#define ZADD_NAN (1<<4) /* Only touch elements already existing. */
+#define ZADD_ADDED (1<<5) /* The element was new and was added. */
+#define ZADD_UPDATED (1<<6) /* The element already existed, score updated. */
+
+/* Flags only used by the ZADD command but not by zsetAdd() API: */
+#define ZADD_CH (1<<16) /* Return num of elements added or updated. */
+
+/* Struct to hold a inclusive/exclusive range spec by score comparison. */
+typedef struct {
+ double min, max;
+ int minex, maxex; /* are min or max exclusive? */
+} zrangespec;
+
+/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
+typedef struct {
+ sds min, max; /* May be set to shared.(minstring|maxstring) */
+ int minex, maxex; /* are min or max exclusive? */
+} zlexrangespec;
+
+zskiplist *zslCreate(void);
+void zslFree(zskiplist *zsl);
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score);
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
+zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
+zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
+double zzlGetScore(unsigned char *sptr);
+void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
+unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
+unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range);
+unsigned long zsetLength(const robj *zobj);
+void zsetConvert(robj *zobj, int encoding);
+void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen);
+int zsetScore(robj *zobj, sds member, double *score);
+unsigned long zslGetRank(zskiplist *zsl, double score, sds o);
+int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore);
+long zsetRank(robj *zobj, sds ele, int reverse);
+int zsetDel(robj *zobj, sds ele);
+void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey, robj *countarg);
+sds ziplistGetObject(unsigned char *sptr);
+int zslValueGteMin(double value, zrangespec *spec);
+int zslValueLteMax(double value, zrangespec *spec);
+void zslFreeLexRange(zlexrangespec *spec);
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec);
+unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range);
+unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range);
+zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
+zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec);
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec);
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
+
+/* Core functions */
+int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
+size_t freeMemoryGetNotCountedMemory();
+int freeMemoryIfNeeded(void);
+int processCommand(client *c);
+void setupSignalHandlers(void);
+struct redisCommand *lookupCommand(sds name);
+struct redisCommand *lookupCommandByCString(char *s);
+struct redisCommand *lookupCommandOrOriginal(sds name);
+void call(client *c, int flags);
+void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
+void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
+void forceCommandPropagation(client *c, int flags);
+void preventCommandPropagation(client *c);
+void preventCommandAOF(client *c);
+void preventCommandReplication(client *c);
+int prepareForShutdown();
+#ifdef __GNUC__
+void serverLog(int level, const char *fmt, ...)
+ __attribute__((format(printf, 2, 3)));
+#else
+void serverLog(int level, const char *fmt, ...);
+#endif
+void serverLogRaw(int level, const char *msg);
+void serverLogFromHandler(int level, const char *msg);
+void usage(void);
+void updateDictResizePolicy(void);
+int htNeedsResize(dict *dict);
+void populateCommandTable(void);
+void resetCommandTableStats(void);
+void adjustOpenFilesLimit(void);
+void closeListeningSockets(int unlink_unix_socket);
+void updateCachedTime(void);
+void resetServerStats(void);
+void activeDefragCycle(void);
+unsigned int getLRUClock(void);
+unsigned int LRU_CLOCK(void);
+const char *evictPolicyToString(void);
+struct redisMemOverhead *getMemoryOverheadData(void);
+void freeMemoryOverheadData(struct redisMemOverhead *mh);
+
+#define RESTART_SERVER_NONE 0
+#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
+#define RESTART_SERVER_CONFIG_REWRITE (1<<1) /* CONFIG REWRITE before restart.*/
+int restartServer(int flags, mstime_t delay);
+
+/* Set data type */
+robj *setTypeCreate(sds value);
+int setTypeAdd(robj *subject, sds value);
+int setTypeRemove(robj *subject, sds value);
+int setTypeIsMember(robj *subject, sds value);
+setTypeIterator *setTypeInitIterator(robj *subject);
+void setTypeReleaseIterator(setTypeIterator *si);
+int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
+sds setTypeNextObject(setTypeIterator *si);
+int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
+unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
+unsigned long setTypeSize(const robj *subject);
+void setTypeConvert(robj *subject, int enc);
+
+/* Hash data type */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+
+void hashTypeConvert(robj *o, int enc);
+void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
+void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2);
+int hashTypeExists(robj *o, sds key);
+int hashTypeDelete(robj *o, sds key);
+unsigned long hashTypeLength(const robj *o);
+hashTypeIterator *hashTypeInitIterator(robj *subject);
+void hashTypeReleaseIterator(hashTypeIterator *hi);
+int hashTypeNext(hashTypeIterator *hi);
+void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
+ unsigned char **vstr,
+ unsigned int *vlen,
+ long long *vll);
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
+robj *hashTypeGetValueObject(robj *o, sds field);
+int hashTypeSet(robj *o, sds field, sds value, int flags);
+
+/* Pub / Sub */
+int pubsubUnsubscribeAllChannels(client *c, int notify);
+int pubsubUnsubscribeAllPatterns(client *c, int notify);
+void freePubsubPattern(void *p);
+int listMatchPubsubPattern(void *a, void *b);
+int pubsubPublishMessage(robj *channel, robj *message);
+
+/* Keyspace events notification */
+void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
+int keyspaceEventsStringToFlags(char *classes);
+sds keyspaceEventsFlagsToString(int flags);
+
+/* Configuration */
+void loadServerConfig(char *filename, char *options);
+void appendServerSaveParams(time_t seconds, int changes);
+void resetServerSaveParams(void);
+struct rewriteConfigState; /* Forward declaration to export API. */
+void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
+int rewriteConfig(char *path);
+
+/* db.c -- Keyspace access API */
+int removeExpire(redisDb *db, robj *key);
+void propagateExpire(redisDb *db, robj *key, int lazy);
+int expireIfNeeded(redisDb *db, robj *key);
+long long getExpire(redisDb *db, robj *key);
+void setExpire(client *c, redisDb *db, robj *key, long long when);
+robj *lookupKey(redisDb *db, robj *key, int flags);
+robj *lookupKeyRead(redisDb *db, robj *key);
+robj *lookupKeyWrite(redisDb *db, robj *key);
+robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
+robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
+robj *objectCommandLookup(client *c, robj *key);
+robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
+void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
+ long long lru_clock);
+#define LOOKUP_NONE 0
+#define LOOKUP_NOTOUCH (1<<0)
+void dbAdd(redisDb *db, robj *key, robj *val);
+void dbOverwrite(redisDb *db, robj *key, robj *val);
+void setKey(redisDb *db, robj *key, robj *val);
+int dbExists(redisDb *db, robj *key);
+robj *dbRandomKey(redisDb *db);
+int dbSyncDelete(redisDb *db, robj *key);
+int dbDelete(redisDb *db, robj *key);
+robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
+
+#define EMPTYDB_NO_FLAGS 0 /* No flags. */
+#define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */
+long long emptyDb(int dbnum, int flags, void(callback)(void*));
+
+int selectDb(client *c, int id);
+void signalModifiedKey(redisDb *db, robj *key);
+void signalFlushedDb(int dbid);
+unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
+unsigned int countKeysInSlot(unsigned int hashslot);
+unsigned int delKeysInSlot(unsigned int hashslot);
+int verifyClusterConfigWithData(void);
+void scanGenericCommand(client *c, robj *o, unsigned long cursor);
+int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor);
+void slotToKeyAdd(robj *key);
+void slotToKeyDel(robj *key);
+void slotToKeyFlush(void);
+int dbAsyncDelete(redisDb *db, robj *key);
+void emptyDbAsync(redisDb *db);
+void slotToKeyFlushAsync(void);
+size_t lazyfreeGetPendingObjectsCount(void);
+void freeObjAsync(robj *o);
+
+/* API to get key arguments from commands */
+int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+void getKeysFreeResult(int *result);
+int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys);
+int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
+
+/* Cluster */
+void clusterInit(void);
+unsigned short crc16(const char *buf, int len);
+unsigned int keyHashSlot(char *key, int keylen);
+void clusterCron(void);
+void clusterPropagatePublish(robj *channel, robj *message);
+void migrateCloseTimedoutSockets(void);
+void clusterBeforeSleep(void);
+int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, unsigned char *payload, uint32_t len);
+
+/* Sentinel */
+void initSentinelConfig(void);
+void initSentinel(void);
+void sentinelTimer(void);
+char *sentinelHandleConfiguration(char **argv, int argc);
+void sentinelIsRunning(void);
+
+/* redis-check-rdb & aof */
+int redis_check_rdb(char *rdbfilename, FILE *fp);
+int redis_check_rdb_main(int argc, char **argv, FILE *fp);
+int redis_check_aof_main(int argc, char **argv);
+
+/* Scripting */
+void scriptingInit(int setup);
+int ldbRemoveChild(pid_t pid);
+void ldbKillForkedSessions(void);
+int ldbPendingChildren(void);
+sds luaCreateFunction(client *c, lua_State *lua, robj *body);
+
+/* Blocked clients */
+void processUnblockedClients(void);
+void blockClient(client *c, int btype);
+void unblockClient(client *c);
+void replyToBlockedClientTimedOut(client *c);
+int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit);
+void disconnectAllBlockedClients(void);
+void handleClientsBlockedOnKeys(void);
+void signalKeyAsReady(redisDb *db, robj *key);
+void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, streamID *ids);
+
+/* expire.c -- Handling of expired keys */
+void activeExpireCycle(int type);
+void expireSlaveKeys(void);
+void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
+void flushSlaveKeysWithExpireList(void);
+size_t getSlaveKeyWithExpireCount(void);
+
+/* evict.c -- maxmemory handling and LRU eviction. */
+void evictionPoolAlloc(void);
+#define LFU_INIT_VAL 5
+unsigned long LFUGetTimeInMinutes(void);
+uint8_t LFULogIncr(uint8_t value);
+unsigned long LFUDecrAndReturn(robj *o);
+
+/* Keys hashing / comparison functions for dict.c hash tables. */
+uint64_t dictSdsHash(const void *key);
+int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
+void dictSdsDestructor(void *privdata, void *val);
+
+/* Git SHA1 */
+char *redisGitSHA1(void);
+char *redisGitDirty(void);
+uint64_t redisBuildId(void);
+
+/* Commands prototypes */
+void authCommand(client *c);
+void pingCommand(client *c);
+void echoCommand(client *c);
+void commandCommand(client *c);
+void setCommand(client *c);
+void setnxCommand(client *c);
+void setexCommand(client *c);
+void psetexCommand(client *c);
+void getCommand(client *c);
+void delCommand(client *c);
+void unlinkCommand(client *c);
+void existsCommand(client *c);
+void setbitCommand(client *c);
+void getbitCommand(client *c);
+void bitfieldCommand(client *c);
+void setrangeCommand(client *c);
+void getrangeCommand(client *c);
+void incrCommand(client *c);
+void decrCommand(client *c);
+void incrbyCommand(client *c);
+void decrbyCommand(client *c);
+void incrbyfloatCommand(client *c);
+void selectCommand(client *c);
+void swapdbCommand(client *c);
+void randomkeyCommand(client *c);
+void keysCommand(client *c);
+void scanCommand(client *c);
+void dbsizeCommand(client *c);
+void lastsaveCommand(client *c);
+void saveCommand(client *c);
+void bgsaveCommand(client *c);
+void bgrewriteaofCommand(client *c);
+void shutdownCommand(client *c);
+void moveCommand(client *c);
+void renameCommand(client *c);
+void renamenxCommand(client *c);
+void lpushCommand(client *c);
+void rpushCommand(client *c);
+void lpushxCommand(client *c);
+void rpushxCommand(client *c);
+void linsertCommand(client *c);
+void lpopCommand(client *c);
+void rpopCommand(client *c);
+void llenCommand(client *c);
+void lindexCommand(client *c);
+void lrangeCommand(client *c);
+void ltrimCommand(client *c);
+void typeCommand(client *c);
+void lsetCommand(client *c);
+void saddCommand(client *c);
+void sremCommand(client *c);
+void smoveCommand(client *c);
+void sismemberCommand(client *c);
+void scardCommand(client *c);
+void spopCommand(client *c);
+void srandmemberCommand(client *c);
+void sinterCommand(client *c);
+void sinterstoreCommand(client *c);
+void sunionCommand(client *c);
+void sunionstoreCommand(client *c);
+void sdiffCommand(client *c);
+void sdiffstoreCommand(client *c);
+void sscanCommand(client *c);
+void syncCommand(client *c);
+void flushdbCommand(client *c);
+void flushallCommand(client *c);
+void sortCommand(client *c);
+void lremCommand(client *c);
+void rpoplpushCommand(client *c);
+void infoCommand(client *c);
+void mgetCommand(client *c);
+void monitorCommand(client *c);
+void expireCommand(client *c);
+void expireatCommand(client *c);
+void pexpireCommand(client *c);
+void pexpireatCommand(client *c);
+void getsetCommand(client *c);
+void ttlCommand(client *c);
+void touchCommand(client *c);
+void pttlCommand(client *c);
+void persistCommand(client *c);
+void slaveofCommand(client *c);
+void roleCommand(client *c);
+void debugCommand(client *c);
+void msetCommand(client *c);
+void msetnxCommand(client *c);
+void zaddCommand(client *c);
+void zincrbyCommand(client *c);
+void zrangeCommand(client *c);
+void zrangebyscoreCommand(client *c);
+void zrevrangebyscoreCommand(client *c);
+void zrangebylexCommand(client *c);
+void zrevrangebylexCommand(client *c);
+void zcountCommand(client *c);
+void zlexcountCommand(client *c);
+void zrevrangeCommand(client *c);
+void zcardCommand(client *c);
+void zremCommand(client *c);
+void zscoreCommand(client *c);
+void zremrangebyscoreCommand(client *c);
+void zremrangebylexCommand(client *c);
+void zpopminCommand(client *c);
+void zpopmaxCommand(client *c);
+void bzpopminCommand(client *c);
+void bzpopmaxCommand(client *c);
+void multiCommand(client *c);
+void execCommand(client *c);
+void discardCommand(client *c);
+void blpopCommand(client *c);
+void brpopCommand(client *c);
+void brpoplpushCommand(client *c);
+void appendCommand(client *c);
+void strlenCommand(client *c);
+void zrankCommand(client *c);
+void zrevrankCommand(client *c);
+void hsetCommand(client *c);
+void hsetnxCommand(client *c);
+void hgetCommand(client *c);
+void hmsetCommand(client *c);
+void hmgetCommand(client *c);
+void hdelCommand(client *c);
+void hlenCommand(client *c);
+void hstrlenCommand(client *c);
+void zremrangebyrankCommand(client *c);
+void zunionstoreCommand(client *c);
+void zinterstoreCommand(client *c);
+void zscanCommand(client *c);
+void hkeysCommand(client *c);
+void hvalsCommand(client *c);
+void hgetallCommand(client *c);
+void hexistsCommand(client *c);
+void hscanCommand(client *c);
+void configCommand(client *c);
+void hincrbyCommand(client *c);
+void hincrbyfloatCommand(client *c);
+void subscribeCommand(client *c);
+void unsubscribeCommand(client *c);
+void psubscribeCommand(client *c);
+void punsubscribeCommand(client *c);
+void publishCommand(client *c);
+void pubsubCommand(client *c);
+void watchCommand(client *c);
+void unwatchCommand(client *c);
+void clusterCommand(client *c);
+void restoreCommand(client *c);
+void migrateCommand(client *c);
+void askingCommand(client *c);
+void readonlyCommand(client *c);
+void readwriteCommand(client *c);
+void dumpCommand(client *c);
+void objectCommand(client *c);
+void memoryCommand(client *c);
+void clientCommand(client *c);
+void evalCommand(client *c);
+void evalShaCommand(client *c);
+void scriptCommand(client *c);
+void timeCommand(client *c);
+void bitopCommand(client *c);
+void bitcountCommand(client *c);
+void bitposCommand(client *c);
+void replconfCommand(client *c);
+void waitCommand(client *c);
+void geoencodeCommand(client *c);
+void geodecodeCommand(client *c);
+void georadiusbymemberCommand(client *c);
+void georadiusbymemberroCommand(client *c);
+void georadiusCommand(client *c);
+void georadiusroCommand(client *c);
+void geoaddCommand(client *c);
+void geohashCommand(client *c);
+void geoposCommand(client *c);
+void geodistCommand(client *c);
+void pfselftestCommand(client *c);
+void pfaddCommand(client *c);
+void pfcountCommand(client *c);
+void pfmergeCommand(client *c);
+void pfdebugCommand(client *c);
+void latencyCommand(client *c);
+void moduleCommand(client *c);
+void securityWarningCommand(client *c);
+void xaddCommand(client *c);
+void xrangeCommand(client *c);
+void xrevrangeCommand(client *c);
+void xlenCommand(client *c);
+void xreadCommand(client *c);
+void xgroupCommand(client *c);
+void xackCommand(client *c);
+void xpendingCommand(client *c);
+void xclaimCommand(client *c);
+void xinfoCommand(client *c);
+void xdelCommand(client *c);
+void xtrimCommand(client *c);
+
+#if defined(__GNUC__)
+void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
+void free(void *ptr) __attribute__ ((deprecated));
+void *malloc(size_t size) __attribute__ ((deprecated));
+void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
+#endif
+
+/* Debugging stuff */
+void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line);
+void _serverAssert(const char *estr, const char *file, int line);
+void _serverPanic(const char *file, int line, const char *msg, ...);
+void bugReportStart(void);
+void serverLogObjectDebugInfo(const robj *o);
+void sigsegvHandler(int sig, siginfo_t *info, void *secret);
+sds genRedisInfoString(char *section);
+void enableWatchdog(int period);
+void disableWatchdog(void);
+void watchdogScheduleSignal(int period);
+void serverLogHexDump(int level, char *descr, void *value, size_t len);
+int memtest_preserving_test(unsigned long *m, size_t bytes, int passes);
+void mixDigest(unsigned char *digest, void *ptr, size_t len);
+void xorDigest(unsigned char *digest, void *ptr, size_t len);
+
+#define redisDebug(fmt, ...) \
+ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
+#define redisDebugMark() \
+ printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
+
+#endif
diff --git a/src/setproctitle.c b/src/setproctitle.c
index f44253e16..6563242de 100644
--- a/src/setproctitle.c
+++ b/src/setproctitle.c
@@ -39,7 +39,11 @@
#include <errno.h> /* errno program_invocation_name program_invocation_short_name */
#if !defined(HAVE_SETPROCTITLE)
-#define HAVE_SETPROCTITLE (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__)
+#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__)
+#define HAVE_SETPROCTITLE 1
+#else
+#define HAVE_SETPROCTITLE 0
+#endif
#endif
diff --git a/src/sha1.c b/src/sha1.c
index 7f73b40d3..ce487e367 100644
--- a/src/sha1.c
+++ b/src/sha1.c
@@ -23,7 +23,7 @@ A million repetitions of "a"
#include <stdio.h>
#include <string.h>
-#include <sys/types.h> /* for u_int*_t */
+#include <stdint.h>
#include "solarisfixes.h"
#include "sha1.h"
#include "config.h"
@@ -53,12 +53,12 @@ A million repetitions of "a"
/* Hash a single 512-bit block. This is the core of the algorithm. */
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64])
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
- u_int32_t a, b, c, d, e;
+ uint32_t a, b, c, d, e;
typedef union {
unsigned char c[64];
- u_int32_t l[16];
+ uint32_t l[16];
} CHAR64LONG16;
#ifdef SHA1HANDSOFF
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
@@ -128,9 +128,9 @@ void SHA1Init(SHA1_CTX* context)
/* Run your data through this. */
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len)
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len)
{
- u_int32_t i, j;
+ uint32_t i, j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
@@ -168,7 +168,7 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context)
for (i = 0; i < 2; i++)
{
- u_int32_t t = context->count[i];
+ uint32_t t = context->count[i];
int j;
for (j = 0; j < 4; t >>= 8, j++)
diff --git a/src/sha1.h b/src/sha1.h
index 4c76d19da..f41691258 100644
--- a/src/sha1.h
+++ b/src/sha1.h
@@ -8,14 +8,14 @@ By Steve Reid <steve@edmweb.com>
*/
typedef struct {
- u_int32_t state[5];
- u_int32_t count[2];
+ uint32_t state[5];
+ uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
-void SHA1Transform(u_int32_t state[5], const unsigned char buffer[64]);
+void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX* context);
-void SHA1Update(SHA1_CTX* context, const unsigned char* data, u_int32_t len);
+void SHA1Update(SHA1_CTX* context, const unsigned char* data, uint32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
#ifdef REDIS_TEST
diff --git a/src/siphash.c b/src/siphash.c
new file mode 100644
index 000000000..6b9419031
--- /dev/null
+++ b/src/siphash.c
@@ -0,0 +1,360 @@
+/*
+ SipHash reference C implementation
+
+ Copyright (c) 2012-2016 Jean-Philippe Aumasson
+ <jeanphilippe.aumasson@gmail.com>
+ Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+ Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+ ----------------------------------------------------------------------------
+
+ This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
+ in the following ways:
+
+ 1. We use SipHash 1-2. This is not believed to be as strong as the
+ suggested 2-4 variant, but AFAIK there are not trivial attacks
+ against this reduced-rounds version, and it runs at the same speed
+ as Murmurhash2 that we used previously, why the 2-4 variant slowed
+ down Redis by a 4% figure more or less.
+ 2. Hard-code rounds in the hope the compiler can optimize it more
+ in this raw from. Anyway we always want the standard 2-4 variant.
+ 3. Modify the prototype and implementation so that the function directly
+ returns an uint64_t value, the hash itself, instead of receiving an
+ output buffer. This also means that the output size is set to 8 bytes
+ and the 16 bytes output code handling was removed.
+ 4. Provide a case insensitive variant to be used when hashing strings that
+ must be considered identical by the hash table regardless of the case.
+ If we don't have directly a case insensitive hash function, we need to
+ perform a text transformation in some temporary buffer, which is costly.
+ 5. Remove debugging code.
+ 6. Modified the original test.c file to be a stand-alone function testing
+ the function in the new form (returing an uint64_t) using just the
+ relevant test vector.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+/* Fast tolower() alike function that does not care about locale
+ * but just returns a-z insetad of A-Z. */
+int siptlw(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c+('a'-'A');
+ } else {
+ return c;
+ }
+}
+
+/* Test of the CPU is Little Endian and supports not aligned accesses.
+ * Two interesting conditions to speedup the function that happen to be
+ * in most of x86 servers. */
+#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__)
+#define UNALIGNED_LE_CPU
+#endif
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v) \
+ (p)[0] = (uint8_t)((v)); \
+ (p)[1] = (uint8_t)((v) >> 8); \
+ (p)[2] = (uint8_t)((v) >> 16); \
+ (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v) \
+ U32TO8_LE((p), (uint32_t)((v))); \
+ U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#ifdef UNALIGNED_LE_CPU
+#define U8TO64_LE(p) (*((uint64_t*)(p)))
+#else
+#define U8TO64_LE(p) \
+ (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) | \
+ ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) | \
+ ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) | \
+ ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+#endif
+
+#define U8TO64_LE_NOCASE(p) \
+ (((uint64_t)(siptlw((p)[0]))) | \
+ ((uint64_t)(siptlw((p)[1])) << 8) | \
+ ((uint64_t)(siptlw((p)[2])) << 16) | \
+ ((uint64_t)(siptlw((p)[3])) << 24) | \
+ ((uint64_t)(siptlw((p)[4])) << 32) | \
+ ((uint64_t)(siptlw((p)[5])) << 40) | \
+ ((uint64_t)(siptlw((p)[6])) << 48) | \
+ ((uint64_t)(siptlw((p)[7])) << 56))
+
+#define SIPROUND \
+ do { \
+ v0 += v1; \
+ v1 = ROTL(v1, 13); \
+ v1 ^= v0; \
+ v0 = ROTL(v0, 32); \
+ v2 += v3; \
+ v3 = ROTL(v3, 16); \
+ v3 ^= v2; \
+ v0 += v3; \
+ v3 = ROTL(v3, 21); \
+ v3 ^= v0; \
+ v2 += v1; \
+ v1 = ROTL(v1, 17); \
+ v1 ^= v2; \
+ v2 = ROTL(v2, 32); \
+ } while (0)
+
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)in[6]) << 48; /* fall-thru */
+ case 6: b |= ((uint64_t)in[5]) << 40; /* fall-thru */
+ case 5: b |= ((uint64_t)in[4]) << 32; /* fall-thru */
+ case 4: b |= ((uint64_t)in[3]) << 24; /* fall-thru */
+ case 3: b |= ((uint64_t)in[2]) << 16; /* fall-thru */
+ case 2: b |= ((uint64_t)in[1]) << 8; /* fall-thru */
+ case 1: b |= ((uint64_t)in[0]); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
+{
+#ifndef UNALIGNED_LE_CPU
+ uint64_t hash;
+ uint8_t *out = (uint8_t*) &hash;
+#endif
+ uint64_t v0 = 0x736f6d6570736575ULL;
+ uint64_t v1 = 0x646f72616e646f6dULL;
+ uint64_t v2 = 0x6c7967656e657261ULL;
+ uint64_t v3 = 0x7465646279746573ULL;
+ uint64_t k0 = U8TO64_LE(k);
+ uint64_t k1 = U8TO64_LE(k + 8);
+ uint64_t m;
+ const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+ const int left = inlen & 7;
+ uint64_t b = ((uint64_t)inlen) << 56;
+ v3 ^= k1;
+ v2 ^= k0;
+ v1 ^= k1;
+ v0 ^= k0;
+
+ for (; in != end; in += 8) {
+ m = U8TO64_LE_NOCASE(in);
+ v3 ^= m;
+
+ SIPROUND;
+
+ v0 ^= m;
+ }
+
+ switch (left) {
+ case 7: b |= ((uint64_t)siptlw(in[6])) << 48; /* fall-thru */
+ case 6: b |= ((uint64_t)siptlw(in[5])) << 40; /* fall-thru */
+ case 5: b |= ((uint64_t)siptlw(in[4])) << 32; /* fall-thru */
+ case 4: b |= ((uint64_t)siptlw(in[3])) << 24; /* fall-thru */
+ case 3: b |= ((uint64_t)siptlw(in[2])) << 16; /* fall-thru */
+ case 2: b |= ((uint64_t)siptlw(in[1])) << 8; /* fall-thru */
+ case 1: b |= ((uint64_t)siptlw(in[0])); break;
+ case 0: break;
+ }
+
+ v3 ^= b;
+
+ SIPROUND;
+
+ v0 ^= b;
+ v2 ^= 0xff;
+
+ SIPROUND;
+ SIPROUND;
+
+ b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+ U64TO8_LE(out, b);
+ return hash;
+#else
+ return b;
+#endif
+}
+
+
+/* --------------------------------- TEST ------------------------------------ */
+
+#ifdef SIPHASH_TEST
+
+const uint8_t vectors_sip64[64][8] = {
+ { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
+ { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
+ { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
+ { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
+ { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
+ { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
+ { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
+ { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
+ { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
+ { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
+ { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
+ { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
+ { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
+ { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
+ { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
+ { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
+ { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
+ { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
+ { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
+ { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
+ { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
+ { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
+ { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
+ { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
+ { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
+ { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
+ { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
+ { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
+ { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
+ { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
+ { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
+ { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
+ { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
+ { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
+ { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
+ { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
+ { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
+ { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
+ { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
+ { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
+ { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
+ { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
+ { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
+ { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
+ { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
+ { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
+ { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
+ { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
+ { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
+ { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
+ { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
+ { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
+ { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
+ { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
+ { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
+ { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
+ { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
+ { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
+ { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
+ { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
+ { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
+ { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
+ { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
+ { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
+};
+
+
+/* Test siphash using a test vector. Returns 0 if the function passed
+ * all the tests, otherwise 1 is returned.
+ *
+ * IMPORTANT: The test vector is for SipHash 2-4. Before running
+ * the test revert back the siphash() function to 2-4 rounds since
+ * now it uses 1-2 rounds. */
+int siphash_test(void) {
+ uint8_t in[64], k[16];
+ int i;
+ int fails = 0;
+
+ for (i = 0; i < 16; ++i)
+ k[i] = i;
+
+ for (i = 0; i < 64; ++i) {
+ in[i] = i;
+ uint64_t hash = siphash(in, i, k);
+ const uint8_t *v = NULL;
+ v = (uint8_t *)vectors_sip64;
+ if (memcmp(&hash, v + (i * 8), 8)) {
+ /* printf("fail for %d bytes\n", i); */
+ fails++;
+ }
+ }
+
+ /* Run a few basic tests with the case insensitive version. */
+ uint64_t h1, h2;
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 != h2) fails++;
+
+ h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+ if (h1 == h2) fails++;
+
+ if (!fails) return 0;
+ return 1;
+}
+
+int main(void) {
+ if (siphash_test() == 0) {
+ printf("SipHash test: OK\n");
+ return 0;
+ } else {
+ printf("SipHash test: FAILED\n");
+ return 1;
+ }
+}
+
+#endif
diff --git a/src/slowlog.c b/src/slowlog.c
index ff6ccf472..8e183fca4 100644
--- a/src/slowlog.c
+++ b/src/slowlog.c
@@ -39,13 +39,13 @@
*/
-#include "redis.h"
+#include "server.h"
#include "slowlog.h"
/* Create a new slowlog entry.
* Incrementing the ref count of all the objects retained is up to
* this function. */
-slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
+slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long duration) {
slowlogEntry *se = zmalloc(sizeof(*se));
int j, slargc = argc;
@@ -57,12 +57,12 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
* at SLOWLOG_ENTRY_MAX_ARGC, but use the last argument to specify
* how many remaining arguments there were in the original command. */
if (slargc != argc && j == slargc-1) {
- se->argv[j] = createObject(REDIS_STRING,
+ se->argv[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"... (%d more arguments)",
argc-slargc+1));
} else {
/* Trim too long strings as well... */
- if (argv[j]->type == REDIS_STRING &&
+ if (argv[j]->type == OBJ_STRING &&
sdsEncodedObject(argv[j]) &&
sdslen(argv[j]->ptr) > SLOWLOG_ENTRY_MAX_STRING)
{
@@ -71,16 +71,25 @@ slowlogEntry *slowlogCreateEntry(robj **argv, int argc, long long duration) {
s = sdscatprintf(s,"... (%lu more bytes)",
(unsigned long)
sdslen(argv[j]->ptr) - SLOWLOG_ENTRY_MAX_STRING);
- se->argv[j] = createObject(REDIS_STRING,s);
- } else {
+ se->argv[j] = createObject(OBJ_STRING,s);
+ } else if (argv[j]->refcount == OBJ_SHARED_REFCOUNT) {
se->argv[j] = argv[j];
- incrRefCount(argv[j]);
+ } else {
+ /* Here we need to dupliacate the string objects composing the
+ * argument vector of the command, because those may otherwise
+ * end shared with string objects stored into keys. Having
+ * shared objects between any part of Redis, and the data
+ * structure holding the data, is a problem: FLUSHALL ASYNC
+ * may release the shared string object and create a race. */
+ se->argv[j] = dupStringObject(argv[j]);
}
}
}
se->time = time(NULL);
se->duration = duration;
se->id = server.slowlog_entry_id++;
+ se->peerid = sdsnew(getClientPeerId(c));
+ se->cname = c->name ? sdsnew(c->name->ptr) : sdsempty();
return se;
}
@@ -95,6 +104,8 @@ void slowlogFreeEntry(void *septr) {
for (j = 0; j < se->argc; j++)
decrRefCount(se->argv[j]);
zfree(se->argv);
+ sdsfree(se->peerid);
+ sdsfree(se->cname);
zfree(se);
}
@@ -109,10 +120,11 @@ void slowlogInit(void) {
/* Push a new entry into the slow log.
* This function will make sure to trim the slow log accordingly to the
* configured max length. */
-void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration) {
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration) {
if (server.slowlog_log_slower_than < 0) return; /* Slowlog disabled */
if (duration >= server.slowlog_log_slower_than)
- listAddNodeHead(server.slowlog,slowlogCreateEntry(argv,argc,duration));
+ listAddNodeHead(server.slowlog,
+ slowlogCreateEntry(c,argv,argc,duration));
/* Remove old entries if needed. */
while (listLength(server.slowlog) > server.slowlog_max_len)
@@ -127,8 +139,18 @@ void slowlogReset(void) {
/* The SLOWLOG command. Implements all the subcommands needed to handle the
* Redis slow log. */
-void slowlogCommand(redisClient *c) {
- if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) {
+void slowlogCommand(client *c) {
+ if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
+ const char *help[] = {
+"GET [count] -- Return top entries from the slowlog (default: 10)."
+" Entries are made of:",
+" id, timestamp, time in microseconds, arguments array, client IP and port, client name",
+"LEN -- Return the length of the slowlog.",
+"RESET -- Reset the slowlog.",
+NULL
+ };
+ addReplyHelp(c, help);
+ } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"reset")) {
slowlogReset();
addReply(c,shared.ok);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"len")) {
@@ -143,7 +165,7 @@ void slowlogCommand(redisClient *c) {
slowlogEntry *se;
if (c->argc == 3 &&
- getLongFromObjectOrReply(c,c->argv[2],&count,NULL) != REDIS_OK)
+ getLongFromObjectOrReply(c,c->argv[2],&count,NULL) != C_OK)
return;
listRewind(server.slowlog,&li);
@@ -152,18 +174,19 @@ void slowlogCommand(redisClient *c) {
int j;
se = ln->value;
- addReplyMultiBulkLen(c,4);
+ addReplyMultiBulkLen(c,6);
addReplyLongLong(c,se->id);
addReplyLongLong(c,se->time);
addReplyLongLong(c,se->duration);
addReplyMultiBulkLen(c,se->argc);
for (j = 0; j < se->argc; j++)
addReplyBulk(c,se->argv[j]);
+ addReplyBulkCBuffer(c,se->peerid,sdslen(se->peerid));
+ addReplyBulkCBuffer(c,se->cname,sdslen(se->cname));
sent++;
}
setDeferredMultiBulkLength(c,totentries,sent);
} else {
- addReplyError(c,
- "Unknown SLOWLOG subcommand or wrong # of args. Try GET, RESET, LEN.");
+ addReplySubcommandSyntaxError(c);
}
}
diff --git a/src/slowlog.h b/src/slowlog.h
index e3067de91..655fb25f4 100644
--- a/src/slowlog.h
+++ b/src/slowlog.h
@@ -35,13 +35,15 @@ typedef struct slowlogEntry {
robj **argv;
int argc;
long long id; /* Unique entry identifier. */
- long long duration; /* Time spent by the query, in nanoseconds. */
+ long long duration; /* Time spent by the query, in microseconds. */
time_t time; /* Unix time at which the query was executed. */
+ sds cname; /* Client name. */
+ sds peerid; /* Client network address. */
} slowlogEntry;
/* Exported API */
void slowlogInit(void);
-void slowlogPushEntryIfNeeded(robj **argv, int argc, long long duration);
+void slowlogPushEntryIfNeeded(client *c, robj **argv, int argc, long long duration);
/* Exported commands */
-void slowlogCommand(redisClient *c);
+void slowlogCommand(client *c);
diff --git a/src/sort.c b/src/sort.c
index 74b27cb67..4b300d868 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -29,7 +29,7 @@
*/
-#include "redis.h"
+#include "server.h"
#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
#include <math.h> /* isnan() */
@@ -110,13 +110,13 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
if (o == NULL) goto noobj;
if (fieldobj) {
- if (o->type != REDIS_HASH) goto noobj;
+ if (o->type != OBJ_HASH) goto noobj;
- /* Retrieve value from hash by the field name. This operation
- * already increases the refcount of the returned object. */
- o = hashTypeGetObject(o, fieldobj);
+ /* Retrieve value from hash by the field name. The returend object
+ * is a new object with refcount already incremented. */
+ o = hashTypeGetValueObject(o, fieldobj->ptr);
} else {
- if (o->type != REDIS_STRING) goto noobj;
+ if (o->type != OBJ_STRING) goto noobj;
/* Every object that this function returns needs to have its refcount
* increased. sortCommand decreases it again. */
@@ -186,30 +186,30 @@ int sortCompare(const void *s1, const void *s2) {
/* The SORT command is the most complex command in Redis. Warning: this code
* is optimized for speed and a bit less for readability */
-void sortCommand(redisClient *c) {
+void sortCommand(client *c) {
list *operations;
unsigned int outputlen = 0;
int desc = 0, alpha = 0;
long limit_start = 0, limit_count = -1, start, end;
int j, dontsort = 0, vectorlen;
int getop = 0; /* GET operation counter */
- int int_convertion_error = 0;
+ int int_conversion_error = 0;
int syntax_error = 0;
robj *sortval, *sortby = NULL, *storekey = NULL;
redisSortObject *vector; /* Resulting vector to sort */
/* Lookup the key to sort. It must be of the right types */
sortval = lookupKeyRead(c->db,c->argv[1]);
- if (sortval && sortval->type != REDIS_SET &&
- sortval->type != REDIS_LIST &&
- sortval->type != REDIS_ZSET)
+ if (sortval && sortval->type != OBJ_SET &&
+ sortval->type != OBJ_LIST &&
+ sortval->type != OBJ_ZSET)
{
addReply(c,shared.wrongtypeerr);
return;
}
/* Create a list of operations to perform for every sorted element.
- * Operations can be GET/DEL/INCR/DECR */
+ * Operations can be GET */
operations = listCreate();
listSetFreeMethod(operations,zfree);
j = 2; /* options start at argv[2] */
@@ -233,9 +233,9 @@ void sortCommand(redisClient *c) {
alpha = 1;
} else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
if ((getLongFromObjectOrReply(c, c->argv[j+1], &limit_start, NULL)
- != REDIS_OK) ||
+ != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[j+2], &limit_count, NULL)
- != REDIS_OK))
+ != C_OK))
{
syntax_error++;
break;
@@ -267,7 +267,7 @@ void sortCommand(redisClient *c) {
break;
}
listAddNodeTail(operations,createSortOperation(
- REDIS_SORT_GET,c->argv[j+1]));
+ SORT_OP_GET,c->argv[j+1]));
getop++;
j++;
} else {
@@ -292,8 +292,8 @@ void sortCommand(redisClient *c) {
* even if no sort order is requested, so they remain stable across
* scripting and replication. */
if (dontsort &&
- sortval->type == REDIS_SET &&
- (storekey || c->flags & REDIS_LUA_CLIENT))
+ sortval->type == OBJ_SET &&
+ (storekey || c->flags & CLIENT_LUA))
{
/* Force ALPHA sorting */
dontsort = 0;
@@ -302,15 +302,15 @@ void sortCommand(redisClient *c) {
}
/* Destructively convert encoded sorted sets for SORT. */
- if (sortval->type == REDIS_ZSET)
- zsetConvert(sortval, REDIS_ENCODING_SKIPLIST);
+ if (sortval->type == OBJ_ZSET)
+ zsetConvert(sortval, OBJ_ENCODING_SKIPLIST);
/* Objtain the length of the object to sort. */
switch(sortval->type) {
- case REDIS_LIST: vectorlen = listTypeLength(sortval); break;
- case REDIS_SET: vectorlen = setTypeSize(sortval); break;
- case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
- default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
+ case OBJ_LIST: vectorlen = listTypeLength(sortval); break;
+ case OBJ_SET: vectorlen = setTypeSize(sortval); break;
+ case OBJ_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
+ default: vectorlen = 0; serverPanic("Bad SORT type"); /* Avoid GCC warning */
}
/* Perform LIMIT start,count sanity checking. */
@@ -322,17 +322,17 @@ void sortCommand(redisClient *c) {
}
if (end >= vectorlen) end = vectorlen-1;
- /* Optimization:
+ /* Whenever possible, we load elements into the output array in a more
+ * direct way. This is possible if:
*
- * 1) if the object to sort is a sorted set.
+ * 1) The object to sort is a sorted set or a list (internally sorted).
* 2) There is nothing to sort as dontsort is true (BY <constant string>).
- * 3) We have a LIMIT option that actually reduces the number of elements
- * to fetch.
*
- * In this case to load all the objects in the vector is a huge waste of
- * resources. We just allocate a vector that is big enough for the selected
- * range length, and make sure to load just this part in the vector. */
- if (sortval->type == REDIS_ZSET &&
+ * In this special case, if we have a LIMIT option that actually reduces
+ * the number of elements to fetch, we also optimize to just load the
+ * range we are interested in and allocating a vector that is big enough
+ * for the selected range length. */
+ if ((sortval->type == OBJ_ZSET || sortval->type == OBJ_LIST) &&
dontsort &&
(start != 0 || end != vectorlen-1))
{
@@ -343,8 +343,33 @@ void sortCommand(redisClient *c) {
vector = zmalloc(sizeof(redisSortObject)*vectorlen);
j = 0;
- if (sortval->type == REDIS_LIST) {
- listTypeIterator *li = listTypeInitIterator(sortval,0,REDIS_TAIL);
+ if (sortval->type == OBJ_LIST && dontsort) {
+ /* Special handling for a list, if 'dontsort' is true.
+ * This makes sure we return elements in the list original
+ * ordering, accordingly to DESC / ASC options.
+ *
+ * Note that in this case we also handle LIMIT here in a direct
+ * way, just getting the required range, as an optimization. */
+ if (end >= start) {
+ listTypeIterator *li;
+ listTypeEntry entry;
+ li = listTypeInitIterator(sortval,
+ desc ? (long)(listTypeLength(sortval) - start - 1) : start,
+ desc ? LIST_HEAD : LIST_TAIL);
+
+ while(j < vectorlen && listTypeNext(li,&entry)) {
+ vector[j].obj = listTypeGet(&entry);
+ vector[j].u.score = 0;
+ vector[j].u.cmpobj = NULL;
+ j++;
+ }
+ listTypeReleaseIterator(li);
+ /* Fix start/end: output code is not aware of this optimization. */
+ end -= start;
+ start = 0;
+ }
+ } else if (sortval->type == OBJ_LIST) {
+ listTypeIterator *li = listTypeInitIterator(sortval,0,LIST_TAIL);
listTypeEntry entry;
while(listTypeNext(li,&entry)) {
vector[j].obj = listTypeGet(&entry);
@@ -353,17 +378,17 @@ void sortCommand(redisClient *c) {
j++;
}
listTypeReleaseIterator(li);
- } else if (sortval->type == REDIS_SET) {
+ } else if (sortval->type == OBJ_SET) {
setTypeIterator *si = setTypeInitIterator(sortval);
- robj *ele;
- while((ele = setTypeNextObject(si)) != NULL) {
- vector[j].obj = ele;
+ sds sdsele;
+ while((sdsele = setTypeNextObject(si)) != NULL) {
+ vector[j].obj = createObject(OBJ_STRING,sdsele);
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
}
setTypeReleaseIterator(si);
- } else if (sortval->type == REDIS_ZSET && dontsort) {
+ } else if (sortval->type == OBJ_ZSET && dontsort) {
/* Special handling for a sorted set, if 'dontsort' is true.
* This makes sure we return elements in the sorted set original
* ordering, accordingly to DESC / ASC options.
@@ -374,7 +399,7 @@ void sortCommand(redisClient *c) {
zset *zs = sortval->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
- robj *ele;
+ sds sdsele;
int rangelen = vectorlen;
/* Check if starting point is trivial, before doing log(N) lookup. */
@@ -391,40 +416,38 @@ void sortCommand(redisClient *c) {
}
while(rangelen--) {
- redisAssertWithInfo(c,sortval,ln != NULL);
- ele = ln->obj;
- vector[j].obj = ele;
+ serverAssertWithInfo(c,sortval,ln != NULL);
+ sdsele = ln->ele;
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
ln = desc ? ln->backward : ln->level[0].forward;
}
- /* The code producing the output does not know that in the case of
- * sorted set, 'dontsort', and LIMIT, we are able to get just the
- * range, already sorted, so we need to adjust "start" and "end"
- * to make sure start is set to 0. */
+ /* Fix start/end: output code is not aware of this optimization. */
end -= start;
start = 0;
- } else if (sortval->type == REDIS_ZSET) {
+ } else if (sortval->type == OBJ_ZSET) {
dict *set = ((zset*)sortval->ptr)->dict;
dictIterator *di;
dictEntry *setele;
+ sds sdsele;
di = dictGetIterator(set);
while((setele = dictNext(di)) != NULL) {
- vector[j].obj = dictGetKey(setele);
+ sdsele = dictGetKey(setele);
+ vector[j].obj = createStringObject(sdsele,sdslen(sdsele));
vector[j].u.score = 0;
vector[j].u.cmpobj = NULL;
j++;
}
dictReleaseIterator(di);
} else {
- redisPanic("Unknown type");
+ serverPanic("Unknown type");
}
- printf("j: %d; vectorlen: %d\n", j, vectorlen);
- redisAssertWithInfo(c,sortval,j == vectorlen);
+ serverAssertWithInfo(c,sortval,j == vectorlen);
/* Now it's time to load the right scores in the sorting vector */
- if (dontsort == 0) {
+ if (!dontsort) {
for (j = 0; j < vectorlen; j++) {
robj *byval;
if (sortby) {
@@ -446,15 +469,15 @@ void sortCommand(redisClient *c) {
if (eptr[0] != '\0' || errno == ERANGE ||
isnan(vector[j].u.score))
{
- int_convertion_error = 1;
+ int_conversion_error = 1;
}
- } else if (byval->encoding == REDIS_ENCODING_INT) {
+ } else if (byval->encoding == OBJ_ENCODING_INT) {
/* Don't need to decode the object if it's
* integer-encoded (the only encoding supported) so
* far. We can just cast it */
vector[j].u.score = (long)byval->ptr;
} else {
- redisAssertWithInfo(c,sortval,1 != 1);
+ serverAssertWithInfo(c,sortval,1 != 1);
}
}
@@ -464,9 +487,7 @@ void sortCommand(redisClient *c) {
decrRefCount(byval);
}
}
- }
- if (dontsort == 0) {
server.sort_desc = desc;
server.sort_alpha = alpha;
server.sort_bypattern = sortby ? 1 : 0;
@@ -480,7 +501,7 @@ void sortCommand(redisClient *c) {
/* Send command output to the output buffer, performing the specified
* GET/DEL/INCR/DECR operations if any. */
outputlen = getop ? getop*(end-start+1) : end-start+1;
- if (int_convertion_error) {
+ if (int_conversion_error) {
addReplyError(c,"One or more scores can't be converted into double");
} else if (storekey == NULL) {
/* STORE option not specified, sent the sorting result to client */
@@ -496,7 +517,7 @@ void sortCommand(redisClient *c) {
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
- if (sop->type == REDIS_SORT_GET) {
+ if (sop->type == SORT_OP_GET) {
if (!val) {
addReply(c,shared.nullbulk);
} else {
@@ -505,7 +526,7 @@ void sortCommand(redisClient *c) {
}
} else {
/* Always fails */
- redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET);
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
}
}
}
@@ -518,7 +539,7 @@ void sortCommand(redisClient *c) {
listIter li;
if (!getop) {
- listTypePush(sobj,vector[j].obj,REDIS_TAIL);
+ listTypePush(sobj,vector[j].obj,LIST_TAIL);
} else {
listRewind(operations,&li);
while((ln = listNext(&li))) {
@@ -526,29 +547,29 @@ void sortCommand(redisClient *c) {
robj *val = lookupKeyByPattern(c->db,sop->pattern,
vector[j].obj);
- if (sop->type == REDIS_SORT_GET) {
+ if (sop->type == SORT_OP_GET) {
if (!val) val = createStringObject("",0);
/* listTypePush does an incrRefCount, so we should take care
* care of the incremented refcount caused by either
* lookupKeyByPattern or createStringObject("",0) */
- listTypePush(sobj,val,REDIS_TAIL);
+ listTypePush(sobj,val,LIST_TAIL);
decrRefCount(val);
} else {
/* Always fails */
- redisAssertWithInfo(c,sortval,sop->type == REDIS_SORT_GET);
+ serverAssertWithInfo(c,sortval,sop->type == SORT_OP_GET);
}
}
}
}
if (outputlen) {
setKey(c->db,storekey,sobj);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"sortstore",storekey,
+ notifyKeyspaceEvent(NOTIFY_LIST,"sortstore",storekey,
c->db->id);
server.dirty += outputlen;
} else if (dbDelete(c->db,storekey)) {
signalModifiedKey(c->db,storekey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",storekey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id);
server.dirty++;
}
decrRefCount(sobj);
@@ -556,9 +577,9 @@ void sortCommand(redisClient *c) {
}
/* Cleanup */
- if (sortval->type == REDIS_LIST || sortval->type == REDIS_SET)
- for (j = 0; j < vectorlen; j++)
- decrRefCount(vector[j].obj);
+ for (j = 0; j < vectorlen; j++)
+ decrRefCount(vector[j].obj);
+
decrRefCount(sortval);
listRelease(operations);
for (j = 0; j < vectorlen; j++) {
diff --git a/src/sparkline.c b/src/sparkline.c
index 8e2764aee..0a986883d 100644
--- a/src/sparkline.c
+++ b/src/sparkline.c
@@ -30,7 +30,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
diff --git a/src/stream.h b/src/stream.h
new file mode 100644
index 000000000..ef08753b5
--- /dev/null
+++ b/src/stream.h
@@ -0,0 +1,113 @@
+#ifndef STREAM_H
+#define STREAM_H
+
+#include "rax.h"
+#include "listpack.h"
+
+/* Stream item ID: a 128 bit number composed of a milliseconds time and
+ * a sequence counter. IDs generated in the same millisecond (or in a past
+ * millisecond if the clock jumped backward) will use the millisecond time
+ * of the latest generated ID and an incremented sequence. */
+typedef struct streamID {
+ uint64_t ms; /* Unix time in milliseconds. */
+ uint64_t seq; /* Sequence number. */
+} streamID;
+
+typedef struct stream {
+ rax *rax; /* The radix tree holding the stream. */
+ uint64_t length; /* Number of elements inside this stream. */
+ streamID last_id; /* Zero if there are yet no items. */
+ rax *cgroups; /* Consumer groups dictionary: name -> streamCG */
+} stream;
+
+/* We define an iterator to iterate stream items in an abstract way, without
+ * caring about the radix tree + listpack representation. Technically speaking
+ * the iterator is only used inside streamReplyWithRange(), so could just
+ * be implemented inside the function, but practically there is the AOF
+ * rewriting code that also needs to iterate the stream to emit the XADD
+ * commands. */
+typedef struct streamIterator {
+ stream *stream; /* The stream we are iterating. */
+ streamID master_id; /* ID of the master entry at listpack head. */
+ uint64_t master_fields_count; /* Master entries # of fields. */
+ unsigned char *master_fields_start; /* Master entries start in listpack. */
+ unsigned char *master_fields_ptr; /* Master field to emit next. */
+ int entry_flags; /* Flags of entry we are emitting. */
+ int rev; /* True if iterating end to start (reverse). */
+ uint64_t start_key[2]; /* Start key as 128 bit big endian. */
+ uint64_t end_key[2]; /* End key as 128 bit big endian. */
+ raxIterator ri; /* Rax iterator. */
+ unsigned char *lp; /* Current listpack. */
+ unsigned char *lp_ele; /* Current listpack cursor. */
+ unsigned char *lp_flags; /* Current entry flags pointer. */
+ /* Buffers used to hold the string of lpGet() when the element is
+ * integer encoded, so that there is no string representation of the
+ * element inside the listpack itself. */
+ unsigned char field_buf[LP_INTBUF_SIZE];
+ unsigned char value_buf[LP_INTBUF_SIZE];
+} streamIterator;
+
+/* Consumer group. */
+typedef struct streamCG {
+ streamID last_id; /* Last delivered (not acknowledged) ID for this
+ group. Consumers that will just ask for more
+ messages will served with IDs > than this. */
+ rax *pel; /* Pending entries list. This is a radix tree that
+ has every message delivered to consumers (without
+ the NOACK option) that was yet not acknowledged
+ as processed. The key of the radix tree is the
+ ID as a 64 bit big endian number, while the
+ associated value is a streamNACK structure.*/
+ rax *consumers; /* A radix tree representing the consumers by name
+ and their associated representation in the form
+ of streamConsumer structures. */
+} streamCG;
+
+/* A specific consumer in a consumer group. */
+typedef struct streamConsumer {
+ mstime_t seen_time; /* Last time this consumer was active. */
+ sds name; /* Consumer name. This is how the consumer
+ will be identified in the consumer group
+ protocol. Case sensitive. */
+ rax *pel; /* Consumer specific pending entries list: all
+ the pending messages delivered to this
+ consumer not yet acknowledged. Keys are
+ big endian message IDs, while values are
+ the same streamNACK structure referenced
+ in the "pel" of the conumser group structure
+ itself, so the value is shared. */
+} streamConsumer;
+
+/* Pending (yet not acknowledged) message in a consumer group. */
+typedef struct streamNACK {
+ mstime_t delivery_time; /* Last time this message was delivered. */
+ uint64_t delivery_count; /* Number of times this message was delivered.*/
+ streamConsumer *consumer; /* The consumer this message was delivered to
+ in the last delivery. */
+} streamNACK;
+
+/* Stream propagation informations, passed to functions in order to propagate
+ * XCLAIM commands to AOF and slaves. */
+typedef struct sreamPropInfo {
+ robj *keyname;
+ robj *groupname;
+} streamPropInfo;
+
+/* Prototypes of exported APIs. */
+struct client;
+
+stream *streamNew(void);
+void freeStream(stream *s);
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi);
+void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);
+int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);
+void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen);
+void streamIteratorStop(streamIterator *si);
+streamCG *streamLookupCG(stream *s, sds groupname);
+streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int create);
+streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id);
+streamNACK *streamCreateNACK(streamConsumer *consumer);
+void streamDecodeID(void *buf, streamID *id);
+int streamCompareID(streamID *a, streamID *b);
+
+#endif
diff --git a/src/syncio.c b/src/syncio.c
index ac2a4a373..b2843d5fb 100644
--- a/src/syncio.c
+++ b/src/syncio.c
@@ -28,7 +28,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/* ----------------- Blocking sockets I/O with timeouts --------------------- */
@@ -40,7 +40,7 @@
*
* All the functions take the timeout in milliseconds. */
-#define REDIS_SYNCIO_RESOLUTION 10 /* Resolution in milliseconds */
+#define SYNCIO__RESOLUTION 10 /* Resolution in milliseconds */
/* Write the specified payload to 'fd'. If writing the whole payload will be
* done within 'timeout' milliseconds the operation succeeds and 'size' is
@@ -52,8 +52,8 @@ ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
long long remaining = timeout;
while(1) {
- long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ?
- remaining : REDIS_SYNCIO_RESOLUTION;
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
long long elapsed;
/* Optimistically try to write before checking if the file descriptor
@@ -89,8 +89,8 @@ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout) {
if (size == 0) return 0;
while(1) {
- long long wait = (remaining > REDIS_SYNCIO_RESOLUTION) ?
- remaining : REDIS_SYNCIO_RESOLUTION;
+ long long wait = (remaining > SYNCIO__RESOLUTION) ?
+ remaining : SYNCIO__RESOLUTION;
long long elapsed;
/* Optimistically try to read before checking if the file descriptor
diff --git a/src/t_hash.c b/src/t_hash.c
index 7f33bba0c..fa3a893a6 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h>
/*-----------------------------------------------------------------------------
@@ -40,29 +40,21 @@
void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
int i;
- if (o->encoding != REDIS_ENCODING_ZIPLIST) return;
+ if (o->encoding != OBJ_ENCODING_ZIPLIST) return;
for (i = start; i <= end; i++) {
if (sdsEncodedObject(argv[i]) &&
sdslen(argv[i]->ptr) > server.hash_max_ziplist_value)
{
- hashTypeConvert(o, REDIS_ENCODING_HT);
+ hashTypeConvert(o, OBJ_ENCODING_HT);
break;
}
}
}
-/* Encode given objects in-place when the hash uses a dict. */
-void hashTypeTryObjectEncoding(robj *subject, robj **o1, robj **o2) {
- if (subject->encoding == REDIS_ENCODING_HT) {
- if (o1) *o1 = tryObjectEncoding(*o1);
- if (o2) *o2 = tryObjectEncoding(*o2);
- }
-}
-
/* Get the value from a ziplist encoded hash, identified by field.
* Returns -1 when the field cannot be found. */
-int hashTypeGetFromZiplist(robj *o, robj *field,
+int hashTypeGetFromZiplist(robj *o, sds field,
unsigned char **vstr,
unsigned int *vlen,
long long *vll)
@@ -70,26 +62,22 @@ int hashTypeGetFromZiplist(robj *o, robj *field,
unsigned char *zl, *fptr = NULL, *vptr = NULL;
int ret;
- redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST);
-
- field = getDecodedObject(field);
+ serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST);
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
}
}
- decrRefCount(field);
-
if (vptr != NULL) {
ret = ziplistGet(vptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
return 0;
}
@@ -97,154 +85,216 @@ int hashTypeGetFromZiplist(robj *o, robj *field,
}
/* Get the value from a hash table encoded hash, identified by field.
- * Returns -1 when the field cannot be found. */
-int hashTypeGetFromHashTable(robj *o, robj *field, robj **value) {
+ * Returns NULL when the field cannot be found, otherwise the SDS value
+ * is returned. */
+sds hashTypeGetFromHashTable(robj *o, sds field) {
dictEntry *de;
- redisAssert(o->encoding == REDIS_ENCODING_HT);
+ serverAssert(o->encoding == OBJ_ENCODING_HT);
de = dictFind(o->ptr, field);
- if (de == NULL) return -1;
- *value = dictGetVal(de);
- return 0;
+ if (de == NULL) return NULL;
+ return dictGetVal(de);
}
-/* Higher level function of hashTypeGet*() that always returns a Redis
- * object (either new or with refcount incremented), so that the caller
- * can retain a reference or call decrRefCount after the usage.
+/* Higher level function of hashTypeGet*() that returns the hash value
+ * associated with the specified field. If the field is found C_OK
+ * is returned, otherwise C_ERR. The returned object is returned by
+ * reference in either *vstr and *vlen if it's returned in string form,
+ * or stored in *vll if it's returned as a number.
*
- * The lower level function can prevent copy on write so it is
- * the preferred way of doing read operations. */
-robj *hashTypeGetObject(robj *o, robj *field) {
- robj *value = NULL;
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * for C_OK and checking if vll (or vstr) is NULL. */
+int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
+ *vstr = NULL;
+ if (hashTypeGetFromZiplist(o, field, vstr, vlen, vll) == 0)
+ return C_OK;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds value;
+ if ((value = hashTypeGetFromHashTable(o, field)) != NULL) {
+ *vstr = (unsigned char*) value;
+ *vlen = sdslen(value);
+ return C_OK;
+ }
+ } else {
+ serverPanic("Unknown hash encoding");
+ }
+ return C_ERR;
+}
+
+/* Like hashTypeGetValue() but returns a Redis object, which is useful for
+ * interaction with the hash type outside t_hash.c.
+ * The function returns NULL if the field is not found in the hash. Otherwise
+ * a newly allocated string object with the value is returned. */
+robj *hashTypeGetValueObject(robj *o, sds field) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hashTypeGetValue(o,field,&vstr,&vlen,&vll) == C_ERR) return NULL;
+ if (vstr) return createStringObject((char*)vstr,vlen);
+ else return createStringObjectFromLongLong(vll);
+}
+
+/* Higher level function using hashTypeGet*() to return the length of the
+ * object associated with the requested field, or 0 if the field does not
+ * exist. */
+size_t hashTypeGetValueLength(robj *o, sds field) {
+ size_t len = 0;
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
- if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) {
- if (vstr) {
- value = createStringObject((char*)vstr, vlen);
- } else {
- value = createStringObjectFromLongLong(vll);
- }
- }
-
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *aux;
+ if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0)
+ len = vstr ? vlen : sdigits10(vll);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds aux;
- if (hashTypeGetFromHashTable(o, field, &aux) == 0) {
- incrRefCount(aux);
- value = aux;
- }
+ if ((aux = hashTypeGetFromHashTable(o, field)) != NULL)
+ len = sdslen(aux);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return value;
+ return len;
}
/* Test if the specified field exists in the given hash. Returns 1 if the field
* exists, and 0 when it doesn't. */
-int hashTypeExists(robj *o, robj *field) {
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+int hashTypeExists(robj *o, sds field) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
if (hashTypeGetFromZiplist(o, field, &vstr, &vlen, &vll) == 0) return 1;
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *aux;
-
- if (hashTypeGetFromHashTable(o, field, &aux) == 0) return 1;
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ if (hashTypeGetFromHashTable(o, field) != NULL) return 1;
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
return 0;
}
-/* Add an element, discard the old if the key already exists.
+/* Add a new field, overwrite the old with the new value if it already exists.
* Return 0 on insert and 1 on update.
- * This function will take care of incrementing the reference count of the
- * retained fields and value objects. */
-int hashTypeSet(robj *o, robj *field, robj *value) {
+ *
+ * By default, the key and value SDS strings are copied if needed, so the
+ * caller retains ownership of the strings passed. However this behavior
+ * can be effected by passing appropriate flags (possibly bitwise OR-ed):
+ *
+ * HASH_SET_TAKE_FIELD -- The SDS field ownership passes to the function.
+ * HASH_SET_TAKE_VALUE -- The SDS value ownership passes to the function.
+ *
+ * When the flags are used the caller does not need to release the passed
+ * SDS string(s). It's up to the function to use the string to create a new
+ * entry or to free the SDS string before returning to the caller.
+ *
+ * HASH_SET_COPY corresponds to no flags passed, and means the default
+ * semantics of copying the values if needed.
+ *
+ */
+#define HASH_SET_TAKE_FIELD (1<<0)
+#define HASH_SET_TAKE_VALUE (1<<1)
+#define HASH_SET_COPY 0
+int hashTypeSet(robj *o, sds field, sds value, int flags) {
int update = 0;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl, *fptr, *vptr;
- field = getDecodedObject(field);
- value = getDecodedObject(value);
-
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
update = 1;
/* Delete value */
zl = ziplistDelete(zl, &vptr);
/* Insert new value */
- zl = ziplistInsert(zl, vptr, value->ptr, sdslen(value->ptr));
+ zl = ziplistInsert(zl, vptr, (unsigned char*)value,
+ sdslen(value));
}
}
if (!update) {
/* Push new field/value pair onto the tail of the ziplist */
- zl = ziplistPush(zl, field->ptr, sdslen(field->ptr), ZIPLIST_TAIL);
- zl = ziplistPush(zl, value->ptr, sdslen(value->ptr), ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)field, sdslen(field),
+ ZIPLIST_TAIL);
+ zl = ziplistPush(zl, (unsigned char*)value, sdslen(value),
+ ZIPLIST_TAIL);
}
o->ptr = zl;
- decrRefCount(field);
- decrRefCount(value);
/* Check if the ziplist needs to be converted to a hash table */
if (hashTypeLength(o) > server.hash_max_ziplist_entries)
- hashTypeConvert(o, REDIS_ENCODING_HT);
- } else if (o->encoding == REDIS_ENCODING_HT) {
- if (dictReplace(o->ptr, field, value)) { /* Insert */
- incrRefCount(field);
- } else { /* Update */
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ dictEntry *de = dictFind(o->ptr,field);
+ if (de) {
+ sdsfree(dictGetVal(de));
+ if (flags & HASH_SET_TAKE_VALUE) {
+ dictGetVal(de) = value;
+ value = NULL;
+ } else {
+ dictGetVal(de) = sdsdup(value);
+ }
update = 1;
+ } else {
+ sds f,v;
+ if (flags & HASH_SET_TAKE_FIELD) {
+ f = field;
+ field = NULL;
+ } else {
+ f = sdsdup(field);
+ }
+ if (flags & HASH_SET_TAKE_VALUE) {
+ v = value;
+ value = NULL;
+ } else {
+ v = sdsdup(value);
+ }
+ dictAdd(o->ptr,f,v);
}
- incrRefCount(value);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
+
+ /* Free SDS strings we did not referenced elsewhere if the flags
+ * want this function to be responsible. */
+ if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field);
+ if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value);
return update;
}
/* Delete an element from a hash.
* Return 1 on deleted and 0 on not found. */
-int hashTypeDelete(robj *o, robj *field) {
+int hashTypeDelete(robj *o, sds field) {
int deleted = 0;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl, *fptr;
- field = getDecodedObject(field);
-
zl = o->ptr;
fptr = ziplistIndex(zl, ZIPLIST_HEAD);
if (fptr != NULL) {
- fptr = ziplistFind(fptr, field->ptr, sdslen(field->ptr), 1);
+ fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), 1);
if (fptr != NULL) {
- zl = ziplistDelete(zl,&fptr);
- zl = ziplistDelete(zl,&fptr);
+ zl = ziplistDelete(zl,&fptr); /* Delete the key. */
+ zl = ziplistDelete(zl,&fptr); /* Delete the value. */
o->ptr = zl;
deleted = 1;
}
}
-
- decrRefCount(field);
-
- } else if (o->encoding == REDIS_ENCODING_HT) {
- if (dictDelete((dict*)o->ptr, field) == REDIS_OK) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ if (dictDelete((dict*)o->ptr, field) == C_OK) {
deleted = 1;
/* Always check if the dictionary needs a resize after a delete. */
@@ -252,24 +302,22 @@ int hashTypeDelete(robj *o, robj *field) {
}
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return deleted;
}
/* Return the number of elements in a hash. */
-unsigned long hashTypeLength(robj *o) {
+unsigned long hashTypeLength(const robj *o) {
unsigned long length = ULONG_MAX;
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
length = ziplistLen(o->ptr) / 2;
- } else if (o->encoding == REDIS_ENCODING_HT) {
- length = dictSize((dict*)o->ptr);
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ length = dictSize((const dict*)o->ptr);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return length;
}
@@ -278,30 +326,27 @@ hashTypeIterator *hashTypeInitIterator(robj *subject) {
hi->subject = subject;
hi->encoding = subject->encoding;
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
hi->fptr = NULL;
hi->vptr = NULL;
- } else if (hi->encoding == REDIS_ENCODING_HT) {
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
hi->di = dictGetIterator(subject->ptr);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
-
return hi;
}
void hashTypeReleaseIterator(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_HT) {
+ if (hi->encoding == OBJ_ENCODING_HT)
dictReleaseIterator(hi->di);
- }
-
zfree(hi);
}
-/* Move to the next entry in the hash. Return REDIS_OK when the next entry
- * could be found and REDIS_ERR when the iterator reaches the end. */
+/* Move to the next entry in the hash. Return C_OK when the next entry
+ * could be found and C_ERR when the iterator reaches the end. */
int hashTypeNext(hashTypeIterator *hi) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl;
unsigned char *fptr, *vptr;
@@ -311,28 +356,28 @@ int hashTypeNext(hashTypeIterator *hi) {
if (fptr == NULL) {
/* Initialize cursor */
- redisAssert(vptr == NULL);
+ serverAssert(vptr == NULL);
fptr = ziplistIndex(zl, 0);
} else {
/* Advance cursor */
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
fptr = ziplistNext(zl, vptr);
}
- if (fptr == NULL) return REDIS_ERR;
+ if (fptr == NULL) return C_ERR;
/* Grab pointer to the value (fptr points to the field) */
vptr = ziplistNext(zl, fptr);
- redisAssert(vptr != NULL);
+ serverAssert(vptr != NULL);
/* fptr, vptr now point to the first or next pair */
hi->fptr = fptr;
hi->vptr = vptr;
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR;
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ if ((hi->de = dictNext(hi->di)) == NULL) return C_ERR;
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return REDIS_OK;
+ return C_OK;
}
/* Get the field or value at iterator cursor, for an iterator on a hash value
@@ -344,62 +389,72 @@ void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
{
int ret;
- redisAssert(hi->encoding == REDIS_ENCODING_ZIPLIST);
+ serverAssert(hi->encoding == OBJ_ENCODING_ZIPLIST);
- if (what & REDIS_HASH_KEY) {
+ if (what & OBJ_HASH_KEY) {
ret = ziplistGet(hi->fptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
} else {
ret = ziplistGet(hi->vptr, vstr, vlen, vll);
- redisAssert(ret);
+ serverAssert(ret);
}
}
/* Get the field or value at iterator cursor, for an iterator on a hash value
- * encoded as a ziplist. Prototype is similar to `hashTypeGetFromHashTable`. */
-void hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what, robj **dst) {
- redisAssert(hi->encoding == REDIS_ENCODING_HT);
+ * encoded as a hash table. Prototype is similar to
+ * `hashTypeGetFromHashTable`. */
+sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) {
+ serverAssert(hi->encoding == OBJ_ENCODING_HT);
- if (what & REDIS_HASH_KEY) {
- *dst = dictGetKey(hi->de);
+ if (what & OBJ_HASH_KEY) {
+ return dictGetKey(hi->de);
} else {
- *dst = dictGetVal(hi->de);
+ return dictGetVal(hi->de);
}
}
-/* A non copy-on-write friendly but higher level version of hashTypeCurrent*()
- * that returns an object with incremented refcount (or a new object). It is up
- * to the caller to decrRefCount() the object if no reference is retained. */
-robj *hashTypeCurrentObject(hashTypeIterator *hi, int what) {
- robj *dst;
-
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *vstr = NULL;
- unsigned int vlen = UINT_MAX;
- long long vll = LLONG_MAX;
-
- hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
- dst = createStringObject((char*)vstr, vlen);
- } else {
- dst = createStringObjectFromLongLong(vll);
- }
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- hashTypeCurrentFromHashTable(hi, what, &dst);
- incrRefCount(dst);
+/* Higher level function of hashTypeCurrent*() that returns the hash value
+ * at current iterator position.
+ *
+ * The returned element is returned by reference in either *vstr and *vlen if
+ * it's returned in string form, or stored in *vll if it's returned as
+ * a number.
+ *
+ * If *vll is populated *vstr is set to NULL, so the caller
+ * can always check the function return by checking the return value
+ * type checking if vstr == NULL. */
+void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
+ *vstr = NULL;
+ hashTypeCurrentFromZiplist(hi, what, vstr, vlen, vll);
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds ele = hashTypeCurrentFromHashTable(hi, what);
+ *vstr = (unsigned char*) ele;
+ *vlen = sdslen(ele);
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
- return dst;
}
-robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
+/* Return the key or value at the current iterator position as a new
+ * SDS string. */
+sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what) {
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vll;
+
+ hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll);
+ if (vstr) return sdsnewlen(vstr,vlen);
+ return sdsfromlonglong(vll);
+}
+
+robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {
robj *o = lookupKeyWrite(c->db,key);
if (o == NULL) {
o = createHashObject();
dbAdd(c->db,key,o);
} else {
- if (o->type != REDIS_HASH) {
+ if (o->type != OBJ_HASH) {
addReply(c,shared.wrongtypeerr);
return NULL;
}
@@ -408,12 +463,12 @@ robj *hashTypeLookupWriteOrCreate(redisClient *c, robj *key) {
}
void hashTypeConvertZiplist(robj *o, int enc) {
- redisAssert(o->encoding == REDIS_ENCODING_ZIPLIST);
+ serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST);
- if (enc == REDIS_ENCODING_ZIPLIST) {
+ if (enc == OBJ_ENCODING_ZIPLIST) {
/* Nothing to do... */
- } else if (enc == REDIS_ENCODING_HT) {
+ } else if (enc == OBJ_ENCODING_HT) {
hashTypeIterator *hi;
dict *dict;
int ret;
@@ -421,39 +476,34 @@ void hashTypeConvertZiplist(robj *o, int enc) {
hi = hashTypeInitIterator(o);
dict = dictCreate(&hashDictType, NULL);
- while (hashTypeNext(hi) != REDIS_ERR) {
- robj *field, *value;
+ while (hashTypeNext(hi) != C_ERR) {
+ sds key, value;
- field = hashTypeCurrentObject(hi, REDIS_HASH_KEY);
- field = tryObjectEncoding(field);
- value = hashTypeCurrentObject(hi, REDIS_HASH_VALUE);
- value = tryObjectEncoding(value);
- ret = dictAdd(dict, field, value);
+ key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
+ value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
+ ret = dictAdd(dict, key, value);
if (ret != DICT_OK) {
- redisLogHexDump(REDIS_WARNING,"ziplist with dup elements dump",
+ serverLogHexDump(LL_WARNING,"ziplist with dup elements dump",
o->ptr,ziplistBlobLen(o->ptr));
- redisAssert(ret == DICT_OK);
+ serverPanic("Ziplist corruption detected");
}
}
-
hashTypeReleaseIterator(hi);
zfree(o->ptr);
-
- o->encoding = REDIS_ENCODING_HT;
+ o->encoding = OBJ_ENCODING_HT;
o->ptr = dict;
-
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
void hashTypeConvert(robj *o, int enc) {
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
hashTypeConvertZiplist(o, enc);
- } else if (o->encoding == REDIS_ENCODING_HT) {
- redisPanic("Not implemented");
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ serverPanic("Not implemented");
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
@@ -461,39 +511,24 @@ void hashTypeConvert(robj *o, int enc) {
* Hash type commands
*----------------------------------------------------------------------------*/
-void hsetCommand(redisClient *c) {
- int update;
- robj *o;
-
- if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- hashTypeTryConversion(o,c->argv,2,3);
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- update = hashTypeSet(o,c->argv[2],c->argv[3]);
- addReply(c, update ? shared.czero : shared.cone);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
- server.dirty++;
-}
-
-void hsetnxCommand(redisClient *c) {
+void hsetnxCommand(client *c) {
robj *o;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
hashTypeTryConversion(o,c->argv,2,3);
- if (hashTypeExists(o, c->argv[2])) {
+ if (hashTypeExists(o, c->argv[2]->ptr)) {
addReply(c, shared.czero);
} else {
- hashTypeTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
- hashTypeSet(o,c->argv[2],c->argv[3]);
+ hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY);
addReply(c, shared.cone);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty++;
}
}
-void hmsetCommand(redisClient *c) {
- int i;
+void hsetCommand(client *c) {
+ int i, created = 0;
robj *o;
if ((c->argc % 2) == 1) {
@@ -503,29 +538,40 @@ void hmsetCommand(redisClient *c) {
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
hashTypeTryConversion(o,c->argv,2,c->argc-1);
- for (i = 2; i < c->argc; i += 2) {
- hashTypeTryObjectEncoding(o,&c->argv[i], &c->argv[i+1]);
- hashTypeSet(o,c->argv[i],c->argv[i+1]);
+
+ for (i = 2; i < c->argc; i += 2)
+ created += !hashTypeSet(o,c->argv[i]->ptr,c->argv[i+1]->ptr,HASH_SET_COPY);
+
+ /* HMSET (deprecated) and HSET return value is different. */
+ char *cmdname = c->argv[0]->ptr;
+ if (cmdname[1] == 's' || cmdname[1] == 'S') {
+ /* HSET */
+ addReplyLongLong(c, created);
+ } else {
+ /* HMSET */
+ addReply(c, shared.ok);
}
- addReply(c, shared.ok);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty++;
}
-void hincrbyCommand(redisClient *c) {
+void hincrbyCommand(client *c) {
long long value, incr, oldvalue;
- robj *o, *current, *new;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
- if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- if ((current = hashTypeGetObject(o,c->argv[2])) != NULL) {
- if (getLongLongFromObjectOrReply(c,current,&value,
- "hash value is not an integer") != REDIS_OK) {
- decrRefCount(current);
- return;
- }
- decrRefCount(current);
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&value) == C_OK) {
+ if (vstr) {
+ if (string2ll((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not an integer");
+ return;
+ }
+ } /* Else hashTypeGetValue() already stored it into &value */
} else {
value = 0;
}
@@ -537,53 +583,61 @@ void hincrbyCommand(redisClient *c) {
return;
}
value += incr;
- new = createStringObjectFromLongLong(value);
- hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
- hashTypeSet(o,c->argv[2],new);
- decrRefCount(new);
+ new = sdsfromlonglong(value);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
addReplyLongLong(c,value);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id);
server.dirty++;
}
-void hincrbyfloatCommand(redisClient *c) {
- double long value, incr;
- robj *o, *current, *new, *aux;
+void hincrbyfloatCommand(client *c) {
+ long double value, incr;
+ long long ll;
+ robj *o;
+ sds new;
+ unsigned char *vstr;
+ unsigned int vlen;
- if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
+ if (getLongDoubleFromObjectOrReply(c,c->argv[3],&incr,NULL) != C_OK) return;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
- if ((current = hashTypeGetObject(o,c->argv[2])) != NULL) {
- if (getLongDoubleFromObjectOrReply(c,current,&value,
- "hash value is not a valid float") != REDIS_OK) {
- decrRefCount(current);
- return;
+ if (hashTypeGetValue(o,c->argv[2]->ptr,&vstr,&vlen,&ll) == C_OK) {
+ if (vstr) {
+ if (string2ld((char*)vstr,vlen,&value) == 0) {
+ addReplyError(c,"hash value is not a float");
+ return;
+ }
+ } else {
+ value = (long double)ll;
}
- decrRefCount(current);
} else {
value = 0;
}
value += incr;
- new = createStringObjectFromLongDouble(value,1);
- hashTypeTryObjectEncoding(o,&c->argv[2],NULL);
- hashTypeSet(o,c->argv[2],new);
- addReplyBulk(c,new);
+
+ char buf[MAX_LONG_DOUBLE_CHARS];
+ int len = ld2string(buf,sizeof(buf),value,1);
+ new = sdsnewlen(buf,len);
+ hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE);
+ addReplyBulkCBuffer(c,buf,len);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id);
server.dirty++;
/* Always replicate HINCRBYFLOAT as an HSET command with the final value
* in order to make sure that differences in float pricision or formatting
* will not create differences in replicas or after an AOF restart. */
+ robj *aux, *newobj;
aux = createStringObject("HSET",4);
+ newobj = createRawStringObject(buf,len);
rewriteClientCommandArgument(c,0,aux);
decrRefCount(aux);
- rewriteClientCommandArgument(c,3,new);
- decrRefCount(new);
+ rewriteClientCommandArgument(c,3,newobj);
+ decrRefCount(newobj);
}
-static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
+static void addHashFieldToReply(client *c, robj *o, sds field) {
int ret;
if (o == NULL) {
@@ -591,7 +645,7 @@ static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
return;
}
- if (o->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (o->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
@@ -607,57 +661,53 @@ static void addHashFieldToReply(redisClient *c, robj *o, robj *field) {
}
}
- } else if (o->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- ret = hashTypeGetFromHashTable(o, field, &value);
- if (ret < 0) {
+ } else if (o->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeGetFromHashTable(o, field);
+ if (value == NULL)
addReply(c, shared.nullbulk);
- } else {
- addReplyBulk(c, value);
- }
-
+ else
+ addReplyBulkCBuffer(c, value, sdslen(value));
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
-void hgetCommand(redisClient *c) {
+void hgetCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
- addHashFieldToReply(c, o, c->argv[2]);
+ addHashFieldToReply(c, o, c->argv[2]->ptr);
}
-void hmgetCommand(redisClient *c) {
+void hmgetCommand(client *c) {
robj *o;
int i;
/* Don't abort when the key cannot be found. Non-existing keys are empty
* hashes, where HMGET should respond with a series of null bulks. */
o = lookupKeyRead(c->db, c->argv[1]);
- if (o != NULL && o->type != REDIS_HASH) {
+ if (o != NULL && o->type != OBJ_HASH) {
addReply(c, shared.wrongtypeerr);
return;
}
addReplyMultiBulkLen(c, c->argc-2);
for (i = 2; i < c->argc; i++) {
- addHashFieldToReply(c, o, c->argv[i]);
+ addHashFieldToReply(c, o, c->argv[i]->ptr);
}
}
-void hdelCommand(redisClient *c) {
+void hdelCommand(client *c) {
robj *o;
int j, deleted = 0, keyremoved = 0;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
for (j = 2; j < c->argc; j++) {
- if (hashTypeDelete(o,c->argv[j])) {
+ if (hashTypeDelete(o,c->argv[j]->ptr)) {
deleted++;
if (hashTypeLength(o) == 0) {
dbDelete(c->db,c->argv[1]);
@@ -668,104 +718,108 @@ void hdelCommand(redisClient *c) {
}
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
-void hlenCommand(redisClient *c) {
+void hlenCommand(client *c) {
robj *o;
+
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
addReplyLongLong(c,hashTypeLength(o));
}
-static void addHashIteratorCursorToReply(redisClient *c, hashTypeIterator *hi, int what) {
- if (hi->encoding == REDIS_ENCODING_ZIPLIST) {
+void hstrlenCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,o,OBJ_HASH)) return;
+ addReplyLongLong(c,hashTypeGetValueLength(o,c->argv[2]->ptr));
+}
+
+static void addHashIteratorCursorToReply(client *c, hashTypeIterator *hi, int what) {
+ if (hi->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *vstr = NULL;
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
hashTypeCurrentFromZiplist(hi, what, &vstr, &vlen, &vll);
- if (vstr) {
+ if (vstr)
addReplyBulkCBuffer(c, vstr, vlen);
- } else {
+ else
addReplyBulkLongLong(c, vll);
- }
-
- } else if (hi->encoding == REDIS_ENCODING_HT) {
- robj *value;
-
- hashTypeCurrentFromHashTable(hi, what, &value);
- addReplyBulk(c, value);
-
+ } else if (hi->encoding == OBJ_ENCODING_HT) {
+ sds value = hashTypeCurrentFromHashTable(hi, what);
+ addReplyBulkCBuffer(c, value, sdslen(value));
} else {
- redisPanic("Unknown hash encoding");
+ serverPanic("Unknown hash encoding");
}
}
-void genericHgetallCommand(redisClient *c, int flags) {
+void genericHgetallCommand(client *c, int flags) {
robj *o;
hashTypeIterator *hi;
int multiplier = 0;
int length, count = 0;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_HASH)) return;
+ || checkType(c,o,OBJ_HASH)) return;
- if (flags & REDIS_HASH_KEY) multiplier++;
- if (flags & REDIS_HASH_VALUE) multiplier++;
+ if (flags & OBJ_HASH_KEY) multiplier++;
+ if (flags & OBJ_HASH_VALUE) multiplier++;
length = hashTypeLength(o) * multiplier;
addReplyMultiBulkLen(c, length);
hi = hashTypeInitIterator(o);
- while (hashTypeNext(hi) != REDIS_ERR) {
- if (flags & REDIS_HASH_KEY) {
- addHashIteratorCursorToReply(c, hi, REDIS_HASH_KEY);
+ while (hashTypeNext(hi) != C_ERR) {
+ if (flags & OBJ_HASH_KEY) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
count++;
}
- if (flags & REDIS_HASH_VALUE) {
- addHashIteratorCursorToReply(c, hi, REDIS_HASH_VALUE);
+ if (flags & OBJ_HASH_VALUE) {
+ addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
count++;
}
}
hashTypeReleaseIterator(hi);
- redisAssert(count == length);
+ serverAssert(count == length);
}
-void hkeysCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY);
+void hkeysCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY);
}
-void hvalsCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_VALUE);
+void hvalsCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_VALUE);
}
-void hgetallCommand(redisClient *c) {
- genericHgetallCommand(c,REDIS_HASH_KEY|REDIS_HASH_VALUE);
+void hgetallCommand(client *c) {
+ genericHgetallCommand(c,OBJ_HASH_KEY|OBJ_HASH_VALUE);
}
-void hexistsCommand(redisClient *c) {
+void hexistsCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
- addReply(c, hashTypeExists(o,c->argv[2]) ? shared.cone : shared.czero);
+ addReply(c, hashTypeExists(o,c->argv[2]->ptr) ? shared.cone : shared.czero);
}
-void hscanCommand(redisClient *c) {
+void hscanCommand(client *c) {
robj *o;
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,o,REDIS_HASH)) return;
+ checkType(c,o,OBJ_HASH)) return;
scanGenericCommand(c,o,cursor);
}
diff --git a/src/t_list.c b/src/t_list.c
index 232cb5c52..1414ff31a 100644
--- a/src/t_list.c
+++ b/src/t_list.c
@@ -27,7 +27,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* List API
@@ -39,14 +39,14 @@
* There is no need for the caller to increment the refcount of 'value' as
* the function takes care of it if needed. */
void listTypePush(robj *subject, robj *value, int where) {
- if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
- int pos = (where == REDIS_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
+ int pos = (where == LIST_HEAD) ? QUICKLIST_HEAD : QUICKLIST_TAIL;
value = getDecodedObject(value);
size_t len = sdslen(value->ptr);
quicklistPush(subject->ptr, value->ptr, len, pos);
decrRefCount(value);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
@@ -58,24 +58,24 @@ robj *listTypePop(robj *subject, int where) {
long long vlong;
robj *value = NULL;
- int ql_where = where == REDIS_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
- if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+ int ql_where = where == LIST_HEAD ? QUICKLIST_HEAD : QUICKLIST_TAIL;
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
if (quicklistPopCustom(subject->ptr, ql_where, (unsigned char **)&value,
NULL, &vlong, listPopSaver)) {
if (!value)
value = createStringObjectFromLongLong(vlong);
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return value;
}
-unsigned long listTypeLength(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_QUICKLIST) {
+unsigned long listTypeLength(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_QUICKLIST) {
return quicklistCount(subject->ptr);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
@@ -87,15 +87,15 @@ listTypeIterator *listTypeInitIterator(robj *subject, long index,
li->encoding = subject->encoding;
li->direction = direction;
li->iter = NULL;
- /* REDIS_HEAD means start at TAIL and move *towards* head.
- * REDIS_TAIL means start at HEAD and move *towards tail. */
+ /* LIST_HEAD means start at TAIL and move *towards* head.
+ * LIST_TAIL means start at HEAD and move *towards tail. */
int iter_direction =
- direction == REDIS_HEAD ? AL_START_TAIL : AL_START_HEAD;
- if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ direction == LIST_HEAD ? AL_START_TAIL : AL_START_HEAD;
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
li->iter = quicklistGetIteratorAtIdx(li->subject->ptr,
iter_direction, index);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return li;
}
@@ -111,13 +111,13 @@ void listTypeReleaseIterator(listTypeIterator *li) {
* entry is in fact an entry, 0 otherwise. */
int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Protect from converting when iterating */
- redisAssert(li->subject->encoding == li->encoding);
+ serverAssert(li->subject->encoding == li->encoding);
entry->li = li;
- if (li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (li->encoding == OBJ_ENCODING_QUICKLIST) {
return quicklistNext(li->iter, &entry->entry);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return 0;
}
@@ -125,7 +125,7 @@ int listTypeNext(listTypeIterator *li, listTypeEntry *entry) {
/* Return entry or NULL at the current position of the iterator. */
robj *listTypeGet(listTypeEntry *entry) {
robj *value = NULL;
- if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
if (entry->entry.value) {
value = createStringObject((char *)entry->entry.value,
entry->entry.sz);
@@ -133,60 +133,60 @@ robj *listTypeGet(listTypeEntry *entry) {
value = createStringObjectFromLongLong(entry->entry.longval);
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
return value;
}
void listTypeInsert(listTypeEntry *entry, robj *value, int where) {
- if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
value = getDecodedObject(value);
sds str = value->ptr;
size_t len = sdslen(str);
- if (where == REDIS_TAIL) {
+ if (where == LIST_TAIL) {
quicklistInsertAfter((quicklist *)entry->entry.quicklist,
&entry->entry, str, len);
- } else if (where == REDIS_HEAD) {
+ } else if (where == LIST_HEAD) {
quicklistInsertBefore((quicklist *)entry->entry.quicklist,
&entry->entry, str, len);
}
decrRefCount(value);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Compare the given object with the entry at the current position. */
int listTypeEqual(listTypeEntry *entry, robj *o) {
- if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
- redisAssertWithInfo(NULL,o,sdsEncodedObject(o));
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
+ serverAssertWithInfo(NULL,o,sdsEncodedObject(o));
return quicklistCompare(entry->entry.zi,o->ptr,sdslen(o->ptr));
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Delete the element pointed to. */
void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry) {
- if (entry->li->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (entry->li->encoding == OBJ_ENCODING_QUICKLIST) {
quicklistDelEntry(iter->iter, &entry->entry);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
/* Create a quicklist from a single ziplist */
void listTypeConvert(robj *subject, int enc) {
- redisAssertWithInfo(NULL,subject,subject->type==REDIS_LIST);
- redisAssertWithInfo(NULL,subject,subject->encoding==REDIS_ENCODING_ZIPLIST);
+ serverAssertWithInfo(NULL,subject,subject->type==OBJ_LIST);
+ serverAssertWithInfo(NULL,subject,subject->encoding==OBJ_ENCODING_ZIPLIST);
- if (enc == REDIS_ENCODING_QUICKLIST) {
+ if (enc == OBJ_ENCODING_QUICKLIST) {
size_t zlen = server.list_max_ziplist_size;
int depth = server.list_compress_depth;
subject->ptr = quicklistCreateFromZiplist(zlen, depth, subject->ptr);
- subject->encoding = REDIS_ENCODING_QUICKLIST;
+ subject->encoding = OBJ_ENCODING_QUICKLIST;
} else {
- redisPanic("Unsupported list conversion");
+ serverPanic("Unsupported list conversion");
}
}
@@ -194,17 +194,16 @@ void listTypeConvert(robj *subject, int enc) {
* List Commands
*----------------------------------------------------------------------------*/
-void pushGenericCommand(redisClient *c, int where) {
- int j, waiting = 0, pushed = 0;
+void pushGenericCommand(client *c, int where) {
+ int j, pushed = 0;
robj *lobj = lookupKeyWrite(c->db,c->argv[1]);
- if (lobj && lobj->type != REDIS_LIST) {
+ if (lobj && lobj->type != OBJ_LIST) {
addReply(c,shared.wrongtypeerr);
return;
}
for (j = 2; j < c->argc; j++) {
- c->argv[j] = tryObjectEncoding(c->argv[j]);
if (!lobj) {
lobj = createQuicklistObject();
quicklistSetOptions(lobj->ptr, server.list_max_ziplist_size,
@@ -214,104 +213,114 @@ void pushGenericCommand(redisClient *c, int where) {
listTypePush(lobj,c->argv[j],where);
pushed++;
}
- addReplyLongLong(c, waiting + (lobj ? listTypeLength(lobj) : 0));
+ addReplyLongLong(c, (lobj ? listTypeLength(lobj) : 0));
if (pushed) {
- char *event = (where == REDIS_HEAD) ? "lpush" : "rpush";
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
}
server.dirty += pushed;
}
-void lpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_HEAD);
+void lpushCommand(client *c) {
+ pushGenericCommand(c,LIST_HEAD);
}
-void rpushCommand(redisClient *c) {
- pushGenericCommand(c,REDIS_TAIL);
+void rpushCommand(client *c) {
+ pushGenericCommand(c,LIST_TAIL);
}
-void pushxGenericCommand(redisClient *c, robj *refval, robj *val, int where) {
+void pushxGenericCommand(client *c, int where) {
+ int j, pushed = 0;
robj *subject;
- listTypeIterator *iter;
- listTypeEntry entry;
- int inserted = 0;
if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,subject,REDIS_LIST)) return;
-
- if (refval != NULL) {
- /* Seek refval from head to tail */
- iter = listTypeInitIterator(subject,0,REDIS_TAIL);
- while (listTypeNext(iter,&entry)) {
- if (listTypeEqual(&entry,refval)) {
- listTypeInsert(&entry,val,where);
- inserted = 1;
- break;
- }
- }
- listTypeReleaseIterator(iter);
+ checkType(c,subject,OBJ_LIST)) return;
- if (inserted) {
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"linsert",
- c->argv[1],c->db->id);
- server.dirty++;
- } else {
- /* Notify client of a failed insert */
- addReply(c,shared.cnegone);
- return;
- }
- } else {
- char *event = (where == REDIS_HEAD) ? "lpush" : "rpush";
-
- listTypePush(subject,val,where);
- signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
- server.dirty++;
+ for (j = 2; j < c->argc; j++) {
+ listTypePush(subject,c->argv[j],where);
+ pushed++;
}
addReplyLongLong(c,listTypeLength(subject));
+
+ if (pushed) {
+ char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
+ }
+ server.dirty += pushed;
}
-void lpushxCommand(redisClient *c) {
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_HEAD);
+void lpushxCommand(client *c) {
+ pushxGenericCommand(c,LIST_HEAD);
}
-void rpushxCommand(redisClient *c) {
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- pushxGenericCommand(c,NULL,c->argv[2],REDIS_TAIL);
+void rpushxCommand(client *c) {
+ pushxGenericCommand(c,LIST_TAIL);
}
-void linsertCommand(redisClient *c) {
- c->argv[4] = tryObjectEncoding(c->argv[4]);
+void linsertCommand(client *c) {
+ int where;
+ robj *subject;
+ listTypeIterator *iter;
+ listTypeEntry entry;
+ int inserted = 0;
+
if (strcasecmp(c->argv[2]->ptr,"after") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_TAIL);
+ where = LIST_TAIL;
} else if (strcasecmp(c->argv[2]->ptr,"before") == 0) {
- pushxGenericCommand(c,c->argv[3],c->argv[4],REDIS_HEAD);
+ where = LIST_HEAD;
} else {
addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ if ((subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
+ checkType(c,subject,OBJ_LIST)) return;
+
+ /* Seek pivot from head to tail */
+ iter = listTypeInitIterator(subject,0,LIST_TAIL);
+ while (listTypeNext(iter,&entry)) {
+ if (listTypeEqual(&entry,c->argv[3])) {
+ listTypeInsert(&entry,c->argv[4],where);
+ inserted = 1;
+ break;
+ }
+ }
+ listTypeReleaseIterator(iter);
+
+ if (inserted) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
+ c->argv[1],c->db->id);
+ server.dirty++;
+ } else {
+ /* Notify client of a failed insert */
+ addReply(c,shared.cnegone);
+ return;
}
+
+ addReplyLongLong(c,listTypeLength(subject));
}
-void llenCommand(redisClient *c) {
+void llenCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.czero);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
addReplyLongLong(c,listTypeLength(o));
}
-void lindexCommand(redisClient *c) {
+void lindexCommand(client *c) {
robj *o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
long index;
robj *value = NULL;
- if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
return;
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklistEntry entry;
if (quicklistIndex(o->ptr, index, &entry)) {
if (entry.value) {
@@ -325,20 +334,20 @@ void lindexCommand(redisClient *c) {
addReply(c,shared.nullbulk);
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
-void lsetCommand(redisClient *c) {
+void lsetCommand(client *c) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
long index;
robj *value = c->argv[3];
- if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &index, NULL) != C_OK))
return;
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklist *ql = o->ptr;
int replaced = quicklistReplaceAtIndex(ql, index,
value->ptr, sdslen(value->ptr));
@@ -347,29 +356,29 @@ void lsetCommand(redisClient *c) {
} else {
addReply(c,shared.ok);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lset",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id);
server.dirty++;
}
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
}
-void popGenericCommand(redisClient *c, int where) {
+void popGenericCommand(client *c, int where) {
robj *o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk);
- if (o == NULL || checkType(c,o,REDIS_LIST)) return;
+ if (o == NULL || checkType(c,o,OBJ_LIST)) return;
robj *value = listTypePop(o,where);
if (value == NULL) {
addReply(c,shared.nullbulk);
} else {
- char *event = (where == REDIS_HEAD) ? "lpop" : "rpop";
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
addReplyBulk(c,value);
decrRefCount(value);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
if (listTypeLength(o) == 0) {
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[1],c->db->id);
dbDelete(c->db,c->argv[1]);
}
@@ -378,23 +387,23 @@ void popGenericCommand(redisClient *c, int where) {
}
}
-void lpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_HEAD);
+void lpopCommand(client *c) {
+ popGenericCommand(c,LIST_HEAD);
}
-void rpopCommand(redisClient *c) {
- popGenericCommand(c,REDIS_TAIL);
+void rpopCommand(client *c) {
+ popGenericCommand(c,LIST_TAIL);
}
-void lrangeCommand(redisClient *c) {
+void lrangeCommand(client *c) {
robj *o;
long start, end, llen, rangelen;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
- || checkType(c,o,REDIS_LIST)) return;
+ || checkType(c,o,OBJ_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
@@ -413,8 +422,8 @@ void lrangeCommand(redisClient *c) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c,rangelen);
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
- listTypeIterator *iter = listTypeInitIterator(o, start, REDIS_TAIL);
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
+ listTypeIterator *iter = listTypeInitIterator(o, start, LIST_TAIL);
while(rangelen--) {
listTypeEntry entry;
@@ -428,19 +437,19 @@ void lrangeCommand(redisClient *c) {
}
listTypeReleaseIterator(iter);
} else {
- redisPanic("List encoding is not QUICKLIST!");
+ serverPanic("List encoding is not QUICKLIST!");
}
}
-void ltrimCommand(redisClient *c) {
+void ltrimCommand(client *c) {
robj *o;
long start, end, llen, ltrim, rtrim;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
- checkType(c,o,REDIS_LIST)) return;
+ checkType(c,o,OBJ_LIST)) return;
llen = listTypeLength(o);
/* convert negative indexes */
@@ -461,41 +470,41 @@ void ltrimCommand(redisClient *c) {
}
/* Remove list elements to perform the trim */
- if (o->encoding == REDIS_ENCODING_QUICKLIST) {
+ if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklistDelRange(o->ptr,0,ltrim);
quicklistDelRange(o->ptr,-rtrim,rtrim);
} else {
- redisPanic("Unknown list encoding");
+ serverPanic("Unknown list encoding");
}
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
if (listTypeLength(o) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
signalModifiedKey(c->db,c->argv[1]);
server.dirty++;
addReply(c,shared.ok);
}
-void lremCommand(redisClient *c) {
+void lremCommand(client *c) {
robj *subject, *obj;
obj = c->argv[3];
long toremove;
long removed = 0;
- if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &toremove, NULL) != C_OK))
return;
subject = lookupKeyWriteOrReply(c,c->argv[1],shared.czero);
- if (subject == NULL || checkType(c,subject,REDIS_LIST)) return;
+ if (subject == NULL || checkType(c,subject,OBJ_LIST)) return;
listTypeIterator *li;
if (toremove < 0) {
toremove = -toremove;
- li = listTypeInitIterator(subject,-1,REDIS_HEAD);
+ li = listTypeInitIterator(subject,-1,LIST_HEAD);
} else {
- li = listTypeInitIterator(subject,0,REDIS_TAIL);
+ li = listTypeInitIterator(subject,0,LIST_TAIL);
}
listTypeEntry entry;
@@ -509,12 +518,17 @@ void lremCommand(redisClient *c) {
}
listTypeReleaseIterator(li);
+ if (removed) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"lrem",c->argv[1],c->db->id);
+ }
+
if (listTypeLength(subject) == 0) {
dbDelete(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
addReplyLongLong(c,removed);
- if (removed) signalModifiedKey(c->db,c->argv[1]);
}
/* This is the semantic of this command:
@@ -533,7 +547,7 @@ void lremCommand(redisClient *c) {
* as well. This command was originally proposed by Ezra Zygmuntowicz.
*/
-void rpoplpushHandlePush(redisClient *c, robj *dstkey, robj *dstobj, robj *value) {
+void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) {
/* Create the list if the key does not exist */
if (!dstobj) {
dstobj = createQuicklistObject();
@@ -542,16 +556,16 @@ void rpoplpushHandlePush(redisClient *c, robj *dstkey, robj *dstobj, robj *value
dbAdd(c->db,dstkey,dstobj);
}
signalModifiedKey(c->db,dstkey);
- listTypePush(dstobj,value,REDIS_HEAD);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"lpush",dstkey,c->db->id);
+ listTypePush(dstobj,value,LIST_HEAD);
+ notifyKeyspaceEvent(NOTIFY_LIST,"lpush",dstkey,c->db->id);
/* Always send the pushed value to the client. */
addReplyBulk(c,value);
}
-void rpoplpushCommand(redisClient *c) {
+void rpoplpushCommand(client *c) {
robj *sobj, *value;
if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,sobj,REDIS_LIST)) return;
+ checkType(c,sobj,OBJ_LIST)) return;
if (listTypeLength(sobj) == 0) {
/* This may only happen after loading very old RDB files. Recent
@@ -561,8 +575,8 @@ void rpoplpushCommand(redisClient *c) {
robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
robj *touchedkey = c->argv[1];
- if (dobj && checkType(c,dobj,REDIS_LIST)) return;
- value = listTypePop(sobj,REDIS_TAIL);
+ if (dobj && checkType(c,dobj,OBJ_LIST)) return;
+ value = listTypePop(sobj,LIST_TAIL);
/* We saved touched key, and protect it, since rpoplpushHandlePush
* may change the client command argument vector (it does not
* currently). */
@@ -573,10 +587,10 @@ void rpoplpushCommand(redisClient *c) {
decrRefCount(value);
/* Delete the source list when it is empty */
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,"rpop",touchedkey,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_LIST,"rpop",touchedkey,c->db->id);
if (listTypeLength(sobj) == 0) {
dbDelete(c->db,touchedkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
touchedkey,c->db->id);
}
signalModifiedKey(c->db,touchedkey);
@@ -589,119 +603,6 @@ void rpoplpushCommand(redisClient *c) {
* Blocking POP operations
*----------------------------------------------------------------------------*/
-/* This is how the current blocking POP works, we use BLPOP as example:
- * - If the user calls BLPOP and the key exists and contains a non empty list
- * then LPOP is called instead. So BLPOP is semantically the same as LPOP
- * if blocking is not required.
- * - If instead BLPOP is called and the key does not exists or the list is
- * empty we need to block. In order to do so we remove the notification for
- * new data to read in the client socket (so that we'll not serve new
- * requests if the blocking request is not served). Also we put the client
- * in a dictionary (db->blocking_keys) mapping keys to a list of clients
- * blocking for this keys.
- * - If a PUSH operation against a key with blocked clients waiting is
- * performed, we mark this key as "ready", and after the current command,
- * MULTI/EXEC block, or script, is executed, we serve all the clients waiting
- * for this list, from the one that blocked first, to the last, accordingly
- * to the number of elements we have in the ready list.
- */
-
-/* Set a client in blocking mode for the specified key, with the specified
- * timeout */
-void blockForKeys(redisClient *c, robj **keys, int numkeys, mstime_t timeout, robj *target) {
- dictEntry *de;
- list *l;
- int j;
-
- c->bpop.timeout = timeout;
- c->bpop.target = target;
-
- if (target != NULL) incrRefCount(target);
-
- for (j = 0; j < numkeys; j++) {
- /* If the key already exists in the dict ignore it. */
- if (dictAdd(c->bpop.keys,keys[j],NULL) != DICT_OK) continue;
- incrRefCount(keys[j]);
-
- /* And in the other "side", to map keys -> clients */
- de = dictFind(c->db->blocking_keys,keys[j]);
- if (de == NULL) {
- int retval;
-
- /* For every key we take a list of clients blocked for it */
- l = listCreate();
- retval = dictAdd(c->db->blocking_keys,keys[j],l);
- incrRefCount(keys[j]);
- redisAssertWithInfo(c,keys[j],retval == DICT_OK);
- } else {
- l = dictGetVal(de);
- }
- listAddNodeTail(l,c);
- }
- blockClient(c,REDIS_BLOCKED_LIST);
-}
-
-/* Unblock a client that's waiting in a blocking operation such as BLPOP.
- * You should never call this function directly, but unblockClient() instead. */
-void unblockClientWaitingData(redisClient *c) {
- dictEntry *de;
- dictIterator *di;
- list *l;
-
- redisAssertWithInfo(c,NULL,dictSize(c->bpop.keys) != 0);
- di = dictGetIterator(c->bpop.keys);
- /* The client may wait for multiple keys, so unblock it for every key. */
- while((de = dictNext(di)) != NULL) {
- robj *key = dictGetKey(de);
-
- /* Remove this client from the list of clients waiting for this key. */
- l = dictFetchValue(c->db->blocking_keys,key);
- redisAssertWithInfo(c,key,l != NULL);
- listDelNode(l,listSearchKey(l,c));
- /* If the list is empty we need to remove it to avoid wasting memory */
- if (listLength(l) == 0)
- dictDelete(c->db->blocking_keys,key);
- }
- dictReleaseIterator(di);
-
- /* Cleanup the client structure */
- dictEmpty(c->bpop.keys,NULL);
- if (c->bpop.target) {
- decrRefCount(c->bpop.target);
- c->bpop.target = NULL;
- }
-}
-
-/* If the specified key has clients blocked waiting for list pushes, this
- * function will put the key reference into the server.ready_keys list.
- * Note that db->ready_keys is a hash table that allows us to avoid putting
- * the same key again and again in the list in case of multiple pushes
- * made by a script or in the context of MULTI/EXEC.
- *
- * The list will be finally processed by handleClientsBlockedOnLists() */
-void signalListAsReady(redisDb *db, robj *key) {
- readyList *rl;
-
- /* No clients blocking for this key? No need to queue it. */
- if (dictFind(db->blocking_keys,key) == NULL) return;
-
- /* Key was already signaled? No need to queue it again. */
- if (dictFind(db->ready_keys,key) != NULL) return;
-
- /* Ok, we need to queue this key into server.ready_keys. */
- rl = zmalloc(sizeof(*rl));
- rl->key = key;
- rl->db = db;
- incrRefCount(key);
- listAddNodeTail(server.ready_keys,rl);
-
- /* We also add the key in the db->ready_keys dictionary in order
- * to avoid adding it multiple times into a list with a simple O(1)
- * check. */
- incrRefCount(key);
- redisAssert(dictAdd(db->ready_keys,key,NULL) == DICT_OK);
-}
-
/* This is a helper function for handleClientsBlockedOnLists(). It's work
* is to serve a specific client (receiver) that is blocked on 'key'
* in the context of the specified 'db', doing the following:
@@ -712,46 +613,50 @@ void signalListAsReady(redisDb *db, robj *key) {
* 3) Propagate the resulting BRPOP, BLPOP and additional LPUSH if any into
* the AOF and replication channel.
*
- * The argument 'where' is REDIS_TAIL or REDIS_HEAD, and indicates if the
+ * The argument 'where' is LIST_TAIL or LIST_HEAD, and indicates if the
* 'value' element was popped fron the head (BLPOP) or tail (BRPOP) so that
* we can propagate the command properly.
*
- * The function returns REDIS_OK if we are able to serve the client, otherwise
- * REDIS_ERR is returned to signal the caller that the list POP operation
+ * The function returns C_OK if we are able to serve the client, otherwise
+ * C_ERR is returned to signal the caller that the list POP operation
* should be undone as the client was not served: This only happens for
* BRPOPLPUSH that fails to push the value to the destination key as it is
* of the wrong type. */
-int serveClientBlockedOnList(redisClient *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where)
+int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int where)
{
robj *argv[3];
if (dstkey == NULL) {
/* Propagate the [LR]POP operation. */
- argv[0] = (where == REDIS_HEAD) ? shared.lpop :
+ argv[0] = (where == LIST_HEAD) ? shared.lpop :
shared.rpop;
argv[1] = key;
- propagate((where == REDIS_HEAD) ?
+ propagate((where == LIST_HEAD) ?
server.lpopCommand : server.rpopCommand,
- db->id,argv,2,REDIS_PROPAGATE_AOF|REDIS_PROPAGATE_REPL);
+ db->id,argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
/* BRPOP/BLPOP */
addReplyMultiBulkLen(receiver,2);
addReplyBulk(receiver,key);
addReplyBulk(receiver,value);
+
+ /* Notify event. */
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
+ notifyKeyspaceEvent(NOTIFY_LIST,event,key,receiver->db->id);
} else {
/* BRPOPLPUSH */
robj *dstobj =
lookupKeyWrite(receiver->db,dstkey);
if (!(dstobj &&
- checkType(receiver,dstobj,REDIS_LIST)))
+ checkType(receiver,dstobj,OBJ_LIST)))
{
/* Propagate the RPOP operation. */
argv[0] = shared.rpop;
argv[1] = key;
propagate(server.rpopCommand,
db->id,argv,2,
- REDIS_PROPAGATE_AOF|
- REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|
+ PROPAGATE_REPL);
rpoplpushHandlePush(receiver,dstkey,dstobj,
value);
/* Propagate the LPUSH operation. */
@@ -760,139 +665,51 @@ int serveClientBlockedOnList(redisClient *receiver, robj *key, robj *dstkey, red
argv[2] = value;
propagate(server.lpushCommand,
db->id,argv,3,
- REDIS_PROPAGATE_AOF|
- REDIS_PROPAGATE_REPL);
+ PROPAGATE_AOF|
+ PROPAGATE_REPL);
+
+ /* Notify event ("lpush" was notified by rpoplpushHandlePush). */
+ notifyKeyspaceEvent(NOTIFY_LIST,"rpop",key,receiver->db->id);
} else {
/* BRPOPLPUSH failed because of wrong
* destination type. */
- return REDIS_ERR;
- }
- }
- return REDIS_OK;
-}
-
-/* This function should be called by Redis every time a single command,
- * a MULTI/EXEC block, or a Lua script, terminated its execution after
- * being called by a client.
- *
- * All the keys with at least one client blocked that received at least
- * one new element via some PUSH operation are accumulated into
- * the server.ready_keys list. This function will run the list and will
- * serve clients accordingly. Note that the function will iterate again and
- * again as a result of serving BRPOPLPUSH we can have new blocking clients
- * to serve because of the PUSH side of BRPOPLPUSH. */
-void handleClientsBlockedOnLists(void) {
- while(listLength(server.ready_keys) != 0) {
- list *l;
-
- /* Point server.ready_keys to a fresh list and save the current one
- * locally. This way as we run the old list we are free to call
- * signalListAsReady() that may push new elements in server.ready_keys
- * when handling clients blocked into BRPOPLPUSH. */
- l = server.ready_keys;
- server.ready_keys = listCreate();
-
- while(listLength(l) != 0) {
- listNode *ln = listFirst(l);
- readyList *rl = ln->value;
-
- /* First of all remove this key from db->ready_keys so that
- * we can safely call signalListAsReady() against this key. */
- dictDelete(rl->db->ready_keys,rl->key);
-
- /* If the key exists and it's a list, serve blocked clients
- * with data. */
- robj *o = lookupKeyWrite(rl->db,rl->key);
- if (o != NULL && o->type == REDIS_LIST) {
- dictEntry *de;
-
- /* We serve clients in the same order they blocked for
- * this key, from the first blocked to the last. */
- de = dictFind(rl->db->blocking_keys,rl->key);
- if (de) {
- list *clients = dictGetVal(de);
- int numclients = listLength(clients);
-
- while(numclients--) {
- listNode *clientnode = listFirst(clients);
- redisClient *receiver = clientnode->value;
- robj *dstkey = receiver->bpop.target;
- int where = (receiver->lastcmd &&
- receiver->lastcmd->proc == blpopCommand) ?
- REDIS_HEAD : REDIS_TAIL;
- robj *value = listTypePop(o,where);
-
- if (value) {
- /* Protect receiver->bpop.target, that will be
- * freed by the next unblockClient()
- * call. */
- if (dstkey) incrRefCount(dstkey);
- unblockClient(receiver);
-
- if (serveClientBlockedOnList(receiver,
- rl->key,dstkey,rl->db,value,
- where) == REDIS_ERR)
- {
- /* If we failed serving the client we need
- * to also undo the POP operation. */
- listTypePush(o,value,where);
- }
-
- if (dstkey) decrRefCount(dstkey);
- decrRefCount(value);
- } else {
- break;
- }
- }
- }
-
- if (listTypeLength(o) == 0) {
- dbDelete(rl->db,rl->key);
- }
- /* We don't call signalModifiedKey() as it was already called
- * when an element was pushed on the list. */
- }
-
- /* Free this item. */
- decrRefCount(rl->key);
- zfree(rl);
- listDelNode(l,ln);
+ return C_ERR;
}
- listRelease(l); /* We have the new list on place at this point. */
}
+ return C_OK;
}
/* Blocking RPOP/LPOP */
-void blockingPopGenericCommand(redisClient *c, int where) {
+void blockingPopGenericCommand(client *c, int where) {
robj *o;
mstime_t timeout;
int j;
if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout,UNIT_SECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
for (j = 1; j < c->argc-1; j++) {
o = lookupKeyWrite(c->db,c->argv[j]);
if (o != NULL) {
- if (o->type != REDIS_LIST) {
+ if (o->type != OBJ_LIST) {
addReply(c,shared.wrongtypeerr);
return;
} else {
if (listTypeLength(o) != 0) {
/* Non empty list, this is like a non normal [LR]POP. */
- char *event = (where == REDIS_HEAD) ? "lpop" : "rpop";
+ char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
robj *value = listTypePop(o,where);
- redisAssert(value != NULL);
+ serverAssert(value != NULL);
addReplyMultiBulkLen(c,2);
addReplyBulk(c,c->argv[j]);
addReplyBulk(c,value);
decrRefCount(value);
- notifyKeyspaceEvent(REDIS_NOTIFY_LIST,event,
+ notifyKeyspaceEvent(NOTIFY_LIST,event,
c->argv[j],c->db->id);
if (listTypeLength(o) == 0) {
dbDelete(c->db,c->argv[j]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
c->argv[j],c->db->id);
}
signalModifiedKey(c->db,c->argv[j]);
@@ -900,7 +717,7 @@ void blockingPopGenericCommand(redisClient *c, int where) {
/* Replicate it as an [LR]POP instead of B[LR]POP. */
rewriteClientCommandVector(c,2,
- (where == REDIS_HEAD) ? shared.lpop : shared.rpop,
+ (where == LIST_HEAD) ? shared.lpop : shared.rpop,
c->argv[j]);
return;
}
@@ -910,47 +727,47 @@ void blockingPopGenericCommand(redisClient *c, int where) {
/* If we are inside a MULTI/EXEC and the list is empty the only thing
* we can do is treating it as a timeout (even with timeout 0). */
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
addReply(c,shared.nullmultibulk);
return;
}
/* If the list is empty or the key does not exists we must block */
- blockForKeys(c, c->argv + 1, c->argc - 2, timeout, NULL);
+ blockForKeys(c,BLOCKED_LIST,c->argv + 1,c->argc - 2,timeout,NULL,NULL);
}
-void blpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_HEAD);
+void blpopCommand(client *c) {
+ blockingPopGenericCommand(c,LIST_HEAD);
}
-void brpopCommand(redisClient *c) {
- blockingPopGenericCommand(c,REDIS_TAIL);
+void brpopCommand(client *c) {
+ blockingPopGenericCommand(c,LIST_TAIL);
}
-void brpoplpushCommand(redisClient *c) {
+void brpoplpushCommand(client *c) {
mstime_t timeout;
if (getTimeoutFromObjectOrReply(c,c->argv[3],&timeout,UNIT_SECONDS)
- != REDIS_OK) return;
+ != C_OK) return;
robj *key = lookupKeyWrite(c->db, c->argv[1]);
if (key == NULL) {
- if (c->flags & REDIS_MULTI) {
+ if (c->flags & CLIENT_MULTI) {
/* Blocking against an empty list in a multi state
* returns immediately. */
addReply(c, shared.nullbulk);
} else {
/* The list is empty and the client blocks. */
- blockForKeys(c, c->argv + 1, 1, timeout, c->argv[2]);
+ blockForKeys(c,BLOCKED_LIST,c->argv + 1,1,timeout,c->argv[2],NULL);
}
} else {
- if (key->type != REDIS_LIST) {
+ if (key->type != OBJ_LIST) {
addReply(c, shared.wrongtypeerr);
} else {
/* The list exists and has elements, so
* the regular rpoplpushCommand is executed. */
- redisAssertWithInfo(c,key,listTypeLength(key) > 0);
+ serverAssertWithInfo(c,key,listTypeLength(key) > 0);
rpoplpushCommand(c);
}
}
diff --git a/src/t_set.c b/src/t_set.c
index f3f8bbaca..f67073fe6 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -27,88 +27,93 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
/*-----------------------------------------------------------------------------
* Set Commands
*----------------------------------------------------------------------------*/
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *dstkey, int op);
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
-robj *setTypeCreate(robj *value) {
- if (isObjectRepresentableAsLongLong(value,NULL) == REDIS_OK)
+robj *setTypeCreate(sds value) {
+ if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
return createIntsetObject();
return createSetObject();
}
-int setTypeAdd(robj *subject, robj *value) {
+/* Add the specified value into a set.
+ *
+ * If the value was already member of the set, nothing is done and 0 is
+ * returned, otherwise the new element is added and 1 is returned. */
+int setTypeAdd(robj *subject, sds value) {
long long llval;
- if (subject->encoding == REDIS_ENCODING_HT) {
- if (dictAdd(subject->ptr,value,NULL) == DICT_OK) {
- incrRefCount(value);
+ if (subject->encoding == OBJ_ENCODING_HT) {
+ dict *ht = subject->ptr;
+ dictEntry *de = dictAddRaw(ht,value,NULL);
+ if (de) {
+ dictSetKey(ht,de,sdsdup(value));
+ dictSetVal(ht,de,NULL);
return 1;
}
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
uint8_t success = 0;
subject->ptr = intsetAdd(subject->ptr,llval,&success);
if (success) {
/* Convert to regular set when the intset contains
* too many entries. */
if (intsetLen(subject->ptr) > server.set_max_intset_entries)
- setTypeConvert(subject,REDIS_ENCODING_HT);
+ setTypeConvert(subject,OBJ_ENCODING_HT);
return 1;
}
} else {
/* Failed to get integer from object, convert to regular set. */
- setTypeConvert(subject,REDIS_ENCODING_HT);
+ setTypeConvert(subject,OBJ_ENCODING_HT);
/* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
- redisAssertWithInfo(NULL,value,
- dictAdd(subject->ptr,value,NULL) == DICT_OK);
- incrRefCount(value);
+ serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
return 1;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
-int setTypeRemove(robj *setobj, robj *value) {
+int setTypeRemove(robj *setobj, sds value) {
long long llval;
- if (setobj->encoding == REDIS_ENCODING_HT) {
+ if (setobj->encoding == OBJ_ENCODING_HT) {
if (dictDelete(setobj->ptr,value) == DICT_OK) {
if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
return 1;
}
- } else if (setobj->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
int success;
setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
if (success) return 1;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
-int setTypeIsMember(robj *subject, robj *value) {
+int setTypeIsMember(robj *subject, sds value) {
long long llval;
- if (subject->encoding == REDIS_ENCODING_HT) {
+ if (subject->encoding == OBJ_ENCODING_HT) {
return dictFind((dict*)subject->ptr,value) != NULL;
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- if (isObjectRepresentableAsLongLong(value,&llval) == REDIS_OK) {
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
return intsetFind((intset*)subject->ptr,llval);
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return 0;
}
@@ -117,18 +122,18 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
setTypeIterator *si = zmalloc(sizeof(setTypeIterator));
si->subject = subject;
si->encoding = subject->encoding;
- if (si->encoding == REDIS_ENCODING_HT) {
+ if (si->encoding == OBJ_ENCODING_HT) {
si->di = dictGetIterator(subject->ptr);
- } else if (si->encoding == REDIS_ENCODING_INTSET) {
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
si->ii = 0;
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return si;
}
void setTypeReleaseIterator(setTypeIterator *si) {
- if (si->encoding == REDIS_ENCODING_HT)
+ if (si->encoding == OBJ_ENCODING_HT)
dictReleaseIterator(si->di);
zfree(si);
}
@@ -136,55 +141,60 @@ void setTypeReleaseIterator(setTypeIterator *si) {
/* Move to the next entry in the set. Returns the object at the current
* position.
*
- * Since set elements can be internally be stored as redis objects or
+ * Since set elements can be internally be stored as SDS strings or
* simple arrays of integers, setTypeNext returns the encoding of the
* set object you are iterating, and will populate the appropriate pointer
- * (eobj) or (llobj) accordingly.
+ * (sdsele) or (llele) accordingly.
+ *
+ * Note that both the sdsele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused.
*
- * When there are no longer elements -1 is returned.
- * Returned objects ref count is not incremented, so this function is
- * copy on write friendly. */
-int setTypeNext(setTypeIterator *si, robj **objele, int64_t *llele) {
- if (si->encoding == REDIS_ENCODING_HT) {
+ * When there are no longer elements -1 is returned. */
+int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
+ if (si->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -1;
- *objele = dictGetKey(de);
- } else if (si->encoding == REDIS_ENCODING_INTSET) {
+ *sdsele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (si->encoding == OBJ_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -1;
+ *sdsele = NULL; /* Not needed. Defensive. */
+ } else {
+ serverPanic("Wrong set encoding in setTypeNext");
}
return si->encoding;
}
/* The not copy on write friendly version but easy to use version
- * of setTypeNext() is setTypeNextObject(), returning new objects
- * or incrementing the ref count of returned objects. So if you don't
- * retain a pointer to this object you should call decrRefCount() against it.
+ * of setTypeNext() is setTypeNextObject(), returning new SDS
+ * strings. So if you don't retain a pointer to this object you should call
+ * sdsfree() against it.
*
* This function is the way to go for write operations where COW is not
- * an issue as the result will be anyway of incrementing the ref count. */
-robj *setTypeNextObject(setTypeIterator *si) {
+ * an issue. */
+sds setTypeNextObject(setTypeIterator *si) {
int64_t intele;
- robj *objele;
+ sds sdsele;
int encoding;
- encoding = setTypeNext(si,&objele,&intele);
+ encoding = setTypeNext(si,&sdsele,&intele);
switch(encoding) {
case -1: return NULL;
- case REDIS_ENCODING_INTSET:
- return createStringObjectFromLongLong(intele);
- case REDIS_ENCODING_HT:
- incrRefCount(objele);
- return objele;
+ case OBJ_ENCODING_INTSET:
+ return sdsfromlonglong(intele);
+ case OBJ_ENCODING_HT:
+ return sdsdup(sdsele);
default:
- redisPanic("Unsupported encoding");
+ serverPanic("Unsupported encoding");
}
return NULL; /* just to suppress warnings */
}
/* Return random element from a non empty set.
* The returned element can be a int64_t value if the set is encoded
- * as an "intset" blob of integers, or a redis object if the set
+ * as an "intset" blob of integers, or an SDS string if the set
* is a regular set.
*
* The caller provides both pointers to be populated with the right
@@ -192,128 +202,30 @@ robj *setTypeNextObject(setTypeIterator *si) {
* field of the object and is used by the caller to check if the
* int64_t pointer or the redis object pointer was populated.
*
- * When an object is returned (the set was a real set) the ref count
- * of the object is not incremented so this function can be considered
- * copy on write friendly. */
-int setTypeRandomElement(robj *setobj, robj **objele, int64_t *llele) {
- if (setobj->encoding == REDIS_ENCODING_HT) {
+ * Note that both the sdsele and llele pointers should be passed and cannot
+ * be NULL since the function will try to defensively populate the non
+ * used field with values which are easy to trap if misused. */
+int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
+ if (setobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetRandomKey(setobj->ptr);
- *objele = dictGetKey(de);
- } else if (setobj->encoding == REDIS_ENCODING_INTSET) {
+ *sdsele = dictGetKey(de);
+ *llele = -123456789; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
*llele = intsetRandom(setobj->ptr);
+ *sdsele = NULL; /* Not needed. Defensive. */
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
return setobj->encoding;
}
-/* Return a number of random elements from a non empty set.
- *
- * This is a version of setTypeRandomElement() that is modified in order to
- * return multiple entries, using dictGetRandomKeys() and intsetRandomMembers().
- *
- * The elements are stored into 'aux_set' which should be of a set type.
- *
- * The function returns the number of items stored into 'aux_set', that may
- * be less than 'count' if the hash table has less than 'count' elements
- * inside.
- *
- * Note that this function is not suitable when you need a good distribution
- * of the returned items, but only when you need to "sample" a given number
- * of continuous elements to run some kind of algorithm or to produce
- * statistics. However the function is much faster than setTypeRandomElement()
- * at producing N elements, and the elements are guaranteed to be non
- * repeating.
- */
-unsigned long setTypeRandomElements(robj *set, unsigned long count,
- robj *aux_set) {
- unsigned long set_size;
- unsigned long elements_to_return = count;
- unsigned long elements_copied = 0;
- unsigned long current_element = 0;
-
- /* Like all setType* functions, we assume good behavior on part of the
- * caller, so no extra parameter checks are made. */
-
- /* If the number of elements in the the set is less than the count
- * requested, just return all of them. */
- set_size = setTypeSize(set);
- if (set_size < count) {
- elements_to_return = set_size;
- }
-
- /* TODO: It is definitely faster adding items to the set by directly
- * handling the Dict or intset inside it, avoiding the constant encoding
- * checks inside setTypeAdd(). However, We don't want to touch the set
- * internals in non setType* functions. So, we just call setTypeAdd()
- * multiple times, but this isn't an optimal solution.
- * Another option would be to create a bulk-add function:
- * setTypeAddBulk(). */
- if (set->encoding == REDIS_ENCODING_HT) {
- /* Allocate result array */
- dictEntry **random_elements =
- zmalloc(sizeof(dictEntry*) * elements_to_return);
-
- /* Get the random elements */
- elements_copied =
- dictGetRandomKeys(set->ptr, random_elements, elements_to_return);
- redisAssert(elements_copied == elements_to_return);
-
- /* Put them into the set */
- for (current_element = 0; current_element < elements_copied;
- current_element++) {
-
- /* We get the key and duplicate it, as we know it is a string */
- setTypeAdd(aux_set,
- dictGetKey(random_elements[current_element]));
- }
-
- zfree(random_elements);
-
- } else if (set->encoding == REDIS_ENCODING_INTSET) {
- /* Allocate result array */
- int64_t *random_elements =
- zmalloc(sizeof(int64_t) * elements_to_return);
- robj* element_as_str = NULL;
-
- elements_copied =
- intsetRandomMembers((intset*) set->ptr,
- random_elements,
- elements_to_return);
-
- redisAssert(elements_copied == elements_to_return);
-
- /* Put them into the set */
- for (current_element = 0; current_element < elements_copied;
- current_element++) {
-
- element_as_str = createStringObjectFromLongLong(
- random_elements[current_element]);
-
- /* Put the values in the set */
- setTypeAdd(aux_set,
- element_as_str);
-
- decrRefCount(element_as_str);
- }
-
- zfree(random_elements);
+unsigned long setTypeSize(const robj *subject) {
+ if (subject->encoding == OBJ_ENCODING_HT) {
+ return dictSize((const dict*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_INTSET) {
+ return intsetLen((const intset*)subject->ptr);
} else {
- redisPanic("Unknown set encoding");
- }
-
- /* We have a set with random elements. Return the actual elements in
- the aux_set. */
- return elements_copied;
-}
-
-unsigned long setTypeSize(robj *subject) {
- if (subject->encoding == REDIS_ENCODING_HT) {
- return dictSize((dict*)subject->ptr);
- } else if (subject->encoding == REDIS_ENCODING_INTSET) {
- return intsetLen((intset*)subject->ptr);
- } else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
}
@@ -322,70 +234,68 @@ unsigned long setTypeSize(robj *subject) {
* set. */
void setTypeConvert(robj *setobj, int enc) {
setTypeIterator *si;
- redisAssertWithInfo(NULL,setobj,setobj->type == REDIS_SET &&
- setobj->encoding == REDIS_ENCODING_INTSET);
+ serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
+ setobj->encoding == OBJ_ENCODING_INTSET);
- if (enc == REDIS_ENCODING_HT) {
+ if (enc == OBJ_ENCODING_HT) {
int64_t intele;
dict *d = dictCreate(&setDictType,NULL);
- robj *element;
+ sds element;
/* Presize the dict to avoid rehashing */
dictExpand(d,intsetLen(setobj->ptr));
/* To add the elements we extract integers and create redis objects */
si = setTypeInitIterator(setobj);
- while (setTypeNext(si,NULL,&intele) != -1) {
- element = createStringObjectFromLongLong(intele);
- redisAssertWithInfo(NULL,element,
- dictAdd(d,element,NULL) == DICT_OK);
+ while (setTypeNext(si,&element,&intele) != -1) {
+ element = sdsfromlonglong(intele);
+ serverAssert(dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
- setobj->encoding = REDIS_ENCODING_HT;
+ setobj->encoding = OBJ_ENCODING_HT;
zfree(setobj->ptr);
setobj->ptr = d;
} else {
- redisPanic("Unsupported set conversion");
+ serverPanic("Unsupported set conversion");
}
}
-void saddCommand(redisClient *c) {
+void saddCommand(client *c) {
robj *set;
int j, added = 0;
set = lookupKeyWrite(c->db,c->argv[1]);
if (set == NULL) {
- set = setTypeCreate(c->argv[2]);
+ set = setTypeCreate(c->argv[2]->ptr);
dbAdd(c->db,c->argv[1],set);
} else {
- if (set->type != REDIS_SET) {
+ if (set->type != OBJ_SET) {
addReply(c,shared.wrongtypeerr);
return;
}
}
for (j = 2; j < c->argc; j++) {
- c->argv[j] = tryObjectEncoding(c->argv[j]);
- if (setTypeAdd(set,c->argv[j])) added++;
+ if (setTypeAdd(set,c->argv[j]->ptr)) added++;
}
if (added) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sadd",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
}
server.dirty += added;
addReplyLongLong(c,added);
}
-void sremCommand(redisClient *c) {
+void sremCommand(client *c) {
robj *set;
int j, deleted = 0, keyremoved = 0;
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
for (j = 2; j < c->argc; j++) {
- if (setTypeRemove(set,c->argv[j])) {
+ if (setTypeRemove(set,c->argv[j]->ptr)) {
deleted++;
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
@@ -396,20 +306,20 @@ void sremCommand(redisClient *c) {
}
if (deleted) {
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"srem",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],
c->db->id);
server.dirty += deleted;
}
addReplyLongLong(c,deleted);
}
-void smoveCommand(redisClient *c) {
+void smoveCommand(client *c) {
robj *srcset, *dstset, *ele;
srcset = lookupKeyWrite(c->db,c->argv[1]);
dstset = lookupKeyWrite(c->db,c->argv[2]);
- ele = c->argv[3] = tryObjectEncoding(c->argv[3]);
+ ele = c->argv[3];
/* If the source key does not exist return 0 */
if (srcset == NULL) {
@@ -419,81 +329,85 @@ void smoveCommand(redisClient *c) {
/* If the source key has the wrong type, or the destination key
* is set and has the wrong type, return with an error. */
- if (checkType(c,srcset,REDIS_SET) ||
- (dstset && checkType(c,dstset,REDIS_SET))) return;
+ if (checkType(c,srcset,OBJ_SET) ||
+ (dstset && checkType(c,dstset,OBJ_SET))) return;
/* If srcset and dstset are equal, SMOVE is a no-op */
if (srcset == dstset) {
- addReply(c,shared.cone);
+ addReply(c,setTypeIsMember(srcset,ele->ptr) ?
+ shared.cone : shared.czero);
return;
}
/* If the element cannot be removed from the src set, return 0. */
- if (!setTypeRemove(srcset,ele)) {
+ if (!setTypeRemove(srcset,ele->ptr)) {
addReply(c,shared.czero);
return;
}
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"srem",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
/* Remove the src set from the database when empty */
if (setTypeSize(srcset) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
- signalModifiedKey(c->db,c->argv[1]);
- signalModifiedKey(c->db,c->argv[2]);
- server.dirty++;
/* Create the destination set when it doesn't exist */
if (!dstset) {
- dstset = setTypeCreate(ele);
+ dstset = setTypeCreate(ele->ptr);
dbAdd(c->db,c->argv[2],dstset);
}
+ signalModifiedKey(c->db,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[2]);
+ server.dirty++;
+
/* An extra key has changed when ele was successfully added to dstset */
- if (setTypeAdd(dstset,ele)) {
+ if (setTypeAdd(dstset,ele->ptr)) {
server.dirty++;
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sadd",c->argv[2],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id);
}
addReply(c,shared.cone);
}
-void sismemberCommand(redisClient *c) {
+void sismemberCommand(client *c) {
robj *set;
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- if (setTypeIsMember(set,c->argv[2]))
+ if (setTypeIsMember(set,c->argv[2]->ptr))
addReply(c,shared.cone);
else
addReply(c,shared.czero);
}
-void scardCommand(redisClient *c) {
+void scardCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_SET)) return;
+ checkType(c,o,OBJ_SET)) return;
addReplyLongLong(c,setTypeSize(o));
}
-/* handle the "SPOP key <count>" variant. The normal version of the
+/* Handle the "SPOP key <count>" variant. The normal version of the
* command is handled by the spopCommand() function itself. */
-void spopWithCountCommand(redisClient *c) {
+/* How many times bigger should be the set compared to the remaining size
+ * for us to use the "create new set" strategy? Read later in the
+ * implementation for more info. */
+#define SPOP_MOVE_STRATEGY_MUL 5
+
+void spopWithCountCommand(client *c) {
long l;
unsigned long count, size;
- unsigned long elements_returned;
- robj *set, *aux, *aux_set;
- int64_t llele;
+ robj *set;
/* Get the count argument */
- if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
if (l >= 0) {
- count = (unsigned) l;
+ count = (unsigned long) l;
} else {
addReply(c,shared.outofrangeerr);
return;
@@ -502,7 +416,7 @@ void spopWithCountCommand(redisClient *c) {
/* Make sure a key with the name inputted exists, and that it's type is
* indeed a set. Otherwise, return nil */
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
- == NULL || checkType(c,set,REDIS_SET)) return;
+ == NULL || checkType(c,set,OBJ_SET)) return;
/* If count is zero, serve an empty multibulk ASAP to avoid special
* cases later. */
@@ -511,96 +425,134 @@ void spopWithCountCommand(redisClient *c) {
return;
}
- /* Get the size of the set. It is always > 0, as empty sets get
- * deleted. */
size = setTypeSize(set);
/* Generate an SPOP keyspace notification */
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ server.dirty += count;
/* CASE 1:
* The number of requested elements is greater than or equal to
* the number of elements inside the set: simply return the whole set. */
if (count >= size) {
-
/* We just return the entire set */
- sunionDiffGenericCommand(c,c->argv+1,1,NULL,REDIS_OP_UNION);
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
/* Delete the set as it is now empty */
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
-
- /* Replicate/AOF this command as an SREM operation */
- aux = createStringObject("DEL",3);
- rewriteClientCommandVector(c,2,aux,c->argv[1]);
- decrRefCount(aux);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ /* Propagate this command as an DEL operation */
+ rewriteClientCommandVector(c,2,shared.del,c->argv[1]);
+ signalModifiedKey(c->db,c->argv[1]);
+ server.dirty++;
return;
}
- /* CASE 2:
- * The number of requested elements is less than the number
- * of elements inside the set. */
-
- /* We need an auxiliary set. Optimistically, we create a set using an
- * Intset internally. */
- aux = createStringObjectFromLongLong(0);
- aux_set = setTypeCreate(aux);
- decrRefCount(aux);
-
- /* Get the count requested of random elements from the set into our
- * auxiliary set. */
- elements_returned = setTypeRandomElements(set, count, aux_set);
- redisAssert(elements_returned == count);
-
- {
- setTypeIterator *si;
- robj *objele;
- int element_encoding;
+ /* Case 2 and 3 require to replicate SPOP as a set of SREM commands.
+ * Prepare our replication argument vector. Also send the array length
+ * which is common to both the code paths. */
+ robj *propargv[3];
+ propargv[0] = createStringObject("SREM",4);
+ propargv[1] = c->argv[1];
+ addReplyMultiBulkLen(c,count);
- addReplyMultiBulkLen(c, elements_returned);
-
- /* Replicate/AOF this command as an SREM operation */
- aux = createStringObject("SREM",4);
-
- si = setTypeInitIterator(aux_set);
- while ((element_encoding = setTypeNext(si, &objele, &llele)) != -1) {
- if (element_encoding == REDIS_ENCODING_HT) {
-
- addReplyBulk(c, objele);
+ /* Common iteration vars. */
+ sds sdsele;
+ robj *objele;
+ int encoding;
+ int64_t llele;
+ unsigned long remaining = size-count; /* Elements left after SPOP. */
- /* Replicate/AOF this command as an SREM commands */
- rewriteClientCommandVector(c, 3, aux, c->argv[1], objele);
- setTypeRemove(set, objele);
- }
- else if (element_encoding == REDIS_ENCODING_INTSET) {
- /* TODO: setTypeRemove() forces us to convert all of the ints
- * to string... isn't there a nicer way to do this? */
+ /* If we are here, the number of requested elements is less than the
+ * number of elements inside the set. Also we are sure that count < size.
+ * Use two different strategies.
+ *
+ * CASE 2: The number of elements to return is small compared to the
+ * set size. We can just extract random elements and return them to
+ * the set. */
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ while(count--) {
+ /* Emit and remove. */
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ addReplyBulkLongLong(c,llele);
objele = createStringObjectFromLongLong(llele);
- addReplyBulk(c, objele);
-
- /* Replicate/AOF this command as an SREM commands */
- rewriteClientCommandVector(c, 3, aux, c->argv[1], objele);
- setTypeRemove(set, objele);
+ set->ptr = intsetRemove(set->ptr,llele,NULL);
+ } else {
+ addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
+ objele = createStringObject(sdsele,sdslen(sdsele));
+ setTypeRemove(set,sdsele);
+ }
- /* We created it, we kill it. */
- decrRefCount(objele);
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
+ }
+ } else {
+ /* CASE 3: The number of elements to return is very big, approaching
+ * the size of the set itself. After some time extracting random elements
+ * from such a set becomes computationally expensive, so we use
+ * a different strategy, we extract random elements that we don't
+ * want to return (the elements that will remain part of the set),
+ * creating a new set as we do this (that will be stored as the original
+ * set). Then we return the elements left in the original set and
+ * release it. */
+ robj *newset = NULL;
+
+ /* Create a new set with just the remaining elements. */
+ while(remaining--) {
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ sdsele = sdsfromlonglong(llele);
+ } else {
+ sdsele = sdsdup(sdsele);
}
- else {
- redisPanic("Unknown set encoding");
+ if (!newset) newset = setTypeCreate(sdsele);
+ setTypeAdd(newset,sdsele);
+ setTypeRemove(set,sdsele);
+ sdsfree(sdsele);
+ }
+
+ /* Transfer the old set to the client. */
+ setTypeIterator *si;
+ si = setTypeInitIterator(set);
+ while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) {
+ if (encoding == OBJ_ENCODING_INTSET) {
+ addReplyBulkLongLong(c,llele);
+ objele = createStringObjectFromLongLong(llele);
+ } else {
+ addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
+ objele = createStringObject(sdsele,sdslen(sdsele));
}
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(server.sremCommand,c->db->id,propargv,3,
+ PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
}
setTypeReleaseIterator(si);
- decrRefCount(aux);
+ /* Assign the new set as the key value. */
+ dbOverwrite(c->db,c->argv[1],newset);
}
- /* Free the auxiliary set - we need it no more. */
- decrRefCount(aux_set);
+ /* Don't propagate the command itself even if we incremented the
+ * dirty counter. We don't want to propagate an SPOP command since
+ * we propagated the command as a set of SREMs operations using
+ * the alsoPropagate() API. */
+ decrRefCount(propargv[0]);
+ preventCommandPropagation(c);
+ signalModifiedKey(c->db,c->argv[1]);
+ server.dirty++;
}
-void spopCommand(redisClient *c) {
+void spopCommand(client *c) {
robj *set, *ele, *aux;
+ sds sdsele;
int64_t llele;
int encoding;
@@ -615,35 +567,35 @@ void spopCommand(redisClient *c) {
/* Make sure a key with the name inputted exists, and that it's type is
* indeed a set */
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
/* Get a random element from the set */
- encoding = setTypeRandomElement(set,&ele,&llele);
+ encoding = setTypeRandomElement(set,&sdsele,&llele);
/* Remove the element from the set */
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
ele = createStringObjectFromLongLong(llele);
set->ptr = intsetRemove(set->ptr,llele,NULL);
} else {
- incrRefCount(ele);
- setTypeRemove(set,ele);
+ ele = createStringObject(sdsele,sdslen(sdsele));
+ setTypeRemove(set,ele->ptr);
}
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"spop",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
/* Replicate/AOF this command as an SREM operation */
aux = createStringObject("SREM",4);
rewriteClientCommandVector(c,3,aux,c->argv[1],ele);
- decrRefCount(ele);
decrRefCount(aux);
/* Add the element to the reply */
addReplyBulk(c,ele);
+ decrRefCount(ele);
/* Delete the set if it's empty */
if (setTypeSize(set) == 0) {
dbDelete(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
/* Set has been modified */
@@ -659,19 +611,20 @@ void spopCommand(redisClient *c) {
* implementation for more info. */
#define SRANDMEMBER_SUB_STRATEGY_MUL 3
-void srandmemberWithCountCommand(redisClient *c) {
+void srandmemberWithCountCommand(client *c) {
long l;
unsigned long count, size;
int uniq = 1;
- robj *set, *ele;
+ robj *set;
+ sds ele;
int64_t llele;
int encoding;
dict *d;
- if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+ if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
if (l >= 0) {
- count = (unsigned) l;
+ count = (unsigned long) l;
} else {
/* A negative count means: return the same elements multiple times
* (i.e. don't remove the extracted element after every extraction). */
@@ -680,7 +633,7 @@ void srandmemberWithCountCommand(redisClient *c) {
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
- == NULL || checkType(c,set,REDIS_SET)) return;
+ == NULL || checkType(c,set,OBJ_SET)) return;
size = setTypeSize(set);
/* If count is zero, serve it ASAP to avoid special cases later. */
@@ -697,10 +650,10 @@ void srandmemberWithCountCommand(redisClient *c) {
addReplyMultiBulkLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulk(c,ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
}
}
return;
@@ -710,12 +663,12 @@ void srandmemberWithCountCommand(redisClient *c) {
* The number of requested elements is greater than the number of
* elements inside the set: simply return the whole set. */
if (count >= size) {
- sunionDiffGenericCommand(c,c->argv+1,1,NULL,REDIS_OP_UNION);
+ sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
return;
}
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
- d = dictCreate(&setDictType,NULL);
+ d = dictCreate(&objectKeyPointerValueDictType,NULL);
/* CASE 3:
* The number of elements inside the set is not greater than
@@ -734,15 +687,15 @@ void srandmemberWithCountCommand(redisClient *c) {
while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval = DICT_ERR;
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else {
- retval = dictAdd(d,dupStringObject(ele),NULL);
+ retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
}
- redisAssert(retval == DICT_OK);
+ serverAssert(retval == DICT_OK);
}
setTypeReleaseIterator(si);
- redisAssert(dictSize(d) == size);
+ serverAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
while(size > count) {
@@ -760,21 +713,22 @@ void srandmemberWithCountCommand(redisClient *c) {
* to reach the specified count. */
else {
unsigned long added = 0;
+ robj *objele;
while(added < count) {
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
- ele = createStringObjectFromLongLong(llele);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ objele = createStringObjectFromLongLong(llele);
} else {
- ele = dupStringObject(ele);
+ objele = createStringObject(ele,sdslen(ele));
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
- if (dictAdd(d,ele,NULL) == DICT_OK)
+ if (dictAdd(d,objele,NULL) == DICT_OK)
added++;
else
- decrRefCount(ele);
+ decrRefCount(objele);
}
}
@@ -792,8 +746,9 @@ void srandmemberWithCountCommand(redisClient *c) {
}
}
-void srandmemberCommand(redisClient *c) {
- robj *set, *ele;
+void srandmemberCommand(client *c) {
+ robj *set;
+ sds ele;
int64_t llele;
int encoding;
@@ -806,33 +761,40 @@ void srandmemberCommand(redisClient *c) {
}
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulk(c,ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
}
}
int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
- return setTypeSize(*(robj**)s1)-setTypeSize(*(robj**)s2);
+ if (setTypeSize(*(robj**)s1) > setTypeSize(*(robj**)s2)) return 1;
+ if (setTypeSize(*(robj**)s1) < setTypeSize(*(robj**)s2)) return -1;
+ return 0;
}
/* This is used by SDIFF and in this case we can receive NULL that should
* be handled as empty sets. */
int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
robj *o1 = *(robj**)s1, *o2 = *(robj**)s2;
+ unsigned long first = o1 ? setTypeSize(o1) : 0;
+ unsigned long second = o2 ? setTypeSize(o2) : 0;
- return (o2 ? setTypeSize(o2) : 0) - (o1 ? setTypeSize(o1) : 0);
+ if (first < second) return 1;
+ if (first > second) return -1;
+ return 0;
}
-void sinterGenericCommand(redisClient *c, robj **setkeys,
+void sinterGenericCommand(client *c, robj **setkeys,
unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
- robj *eleobj, *dstset = NULL;
+ robj *dstset = NULL;
+ sds elesds;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
@@ -855,7 +817,7 @@ void sinterGenericCommand(redisClient *c, robj **setkeys,
}
return;
}
- if (checkType(c,setobj,REDIS_SET)) {
+ if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
@@ -882,38 +844,28 @@ void sinterGenericCommand(redisClient *c, robj **setkeys,
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
si = setTypeInitIterator(sets[0]);
- while((encoding = setTypeNext(si,&eleobj,&intobj)) != -1) {
+ while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
for (j = 1; j < setnum; j++) {
if (sets[j] == sets[0]) continue;
- if (encoding == REDIS_ENCODING_INTSET) {
+ if (encoding == OBJ_ENCODING_INTSET) {
/* intset with intset is simple... and fast */
- if (sets[j]->encoding == REDIS_ENCODING_INTSET &&
+ if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,intobj))
{
break;
/* in order to compare an integer with an object we
* have to use the generic function, creating an object
* for this */
- } else if (sets[j]->encoding == REDIS_ENCODING_HT) {
- eleobj = createStringObjectFromLongLong(intobj);
- if (!setTypeIsMember(sets[j],eleobj)) {
- decrRefCount(eleobj);
+ } else if (sets[j]->encoding == OBJ_ENCODING_HT) {
+ elesds = sdsfromlonglong(intobj);
+ if (!setTypeIsMember(sets[j],elesds)) {
+ sdsfree(elesds);
break;
}
- decrRefCount(eleobj);
+ sdsfree(elesds);
}
- } else if (encoding == REDIS_ENCODING_HT) {
- /* Optimization... if the source object is integer
- * encoded AND the target set is an intset, we can get
- * a much faster path. */
- if (eleobj->encoding == REDIS_ENCODING_INT &&
- sets[j]->encoding == REDIS_ENCODING_INTSET &&
- !intsetFind((intset*)sets[j]->ptr,(long)eleobj->ptr))
- {
- break;
- /* else... object to object check is easy as we use the
- * type agnostic API here. */
- } else if (!setTypeIsMember(sets[j],eleobj)) {
+ } else if (encoding == OBJ_ENCODING_HT) {
+ if (!setTypeIsMember(sets[j],elesds)) {
break;
}
}
@@ -922,18 +874,18 @@ void sinterGenericCommand(redisClient *c, robj **setkeys,
/* Only take action when all sets contain the member */
if (j == setnum) {
if (!dstkey) {
- if (encoding == REDIS_ENCODING_HT)
- addReplyBulk(c,eleobj);
+ if (encoding == OBJ_ENCODING_HT)
+ addReplyBulkCBuffer(c,elesds,sdslen(elesds));
else
addReplyBulkLongLong(c,intobj);
cardinality++;
} else {
- if (encoding == REDIS_ENCODING_INTSET) {
- eleobj = createStringObjectFromLongLong(intobj);
- setTypeAdd(dstset,eleobj);
- decrRefCount(eleobj);
+ if (encoding == OBJ_ENCODING_INTSET) {
+ elesds = sdsfromlonglong(intobj);
+ setTypeAdd(dstset,elesds);
+ sdsfree(elesds);
} else {
- setTypeAdd(dstset,eleobj);
+ setTypeAdd(dstset,elesds);
}
}
}
@@ -947,13 +899,13 @@ void sinterGenericCommand(redisClient *c, robj **setkeys,
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,"sinterstore",
+ notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
@@ -964,23 +916,24 @@ void sinterGenericCommand(redisClient *c, robj **setkeys,
zfree(sets);
}
-void sinterCommand(redisClient *c) {
+void sinterCommand(client *c) {
sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
}
-void sinterstoreCommand(redisClient *c) {
+void sinterstoreCommand(client *c) {
sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
}
-#define REDIS_OP_UNION 0
-#define REDIS_OP_DIFF 1
-#define REDIS_OP_INTER 2
+#define SET_OP_UNION 0
+#define SET_OP_DIFF 1
+#define SET_OP_INTER 2
-void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
+void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
- robj *ele, *dstset = NULL;
+ robj *dstset = NULL;
+ sds ele;
int j, cardinality = 0;
int diff_algo = 1;
@@ -992,7 +945,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
sets[j] = NULL;
continue;
}
- if (checkType(c,setobj,REDIS_SET)) {
+ if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
@@ -1008,7 +961,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
* the sets.
*
* We compute what is the best bet with the current input here. */
- if (op == REDIS_OP_DIFF && sets[0]) {
+ if (op == SET_OP_DIFF && sets[0]) {
long long algo_one_work = 0, algo_two_work = 0;
for (j = 0; j < setnum; j++) {
@@ -1037,7 +990,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
* this set object will be the resulting object to set into the target key*/
dstset = createIntsetObject();
- if (op == REDIS_OP_UNION) {
+ if (op == SET_OP_UNION) {
/* Union is trivial, just add every element of every set to the
* temporary set. */
for (j = 0; j < setnum; j++) {
@@ -1046,11 +999,11 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (setTypeAdd(dstset,ele)) cardinality++;
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
}
- } else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 1) {
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 1) {
/* DIFF Algorithm 1:
*
* We perform the diff by iterating all the elements of the first set,
@@ -1071,10 +1024,10 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
setTypeAdd(dstset,ele);
cardinality++;
}
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
- } else if (op == REDIS_OP_DIFF && sets[0] && diff_algo == 2) {
+ } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
/* DIFF Algorithm 2:
*
* Add all the elements of the first set to the auxiliary set.
@@ -1092,7 +1045,7 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
} else {
if (setTypeRemove(dstset,ele)) cardinality--;
}
- decrRefCount(ele);
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
@@ -1107,8 +1060,8 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
addReplyMultiBulkLen(c,cardinality);
si = setTypeInitIterator(dstset);
while((ele = setTypeNextObject(si)) != NULL) {
- addReplyBulk(c,ele);
- decrRefCount(ele);
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
+ sdsfree(ele);
}
setTypeReleaseIterator(si);
decrRefCount(dstset);
@@ -1119,14 +1072,14 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
if (setTypeSize(dstset) > 0) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
- notifyKeyspaceEvent(REDIS_NOTIFY_SET,
- op == REDIS_OP_UNION ? "sunionstore" : "sdiffstore",
+ notifyKeyspaceEvent(NOTIFY_SET,
+ op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
@@ -1135,28 +1088,28 @@ void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum,
zfree(sets);
}
-void sunionCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
+void sunionCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_UNION);
}
-void sunionstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
+void sunionstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_UNION);
}
-void sdiffCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
+void sdiffCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,SET_OP_DIFF);
}
-void sdiffstoreCommand(redisClient *c) {
- sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
+void sdiffstoreCommand(client *c) {
+ sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],SET_OP_DIFF);
}
-void sscanCommand(redisClient *c) {
+void sscanCommand(client *c) {
robj *set;
unsigned long cursor;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,set,REDIS_SET)) return;
+ checkType(c,set,OBJ_SET)) return;
scanGenericCommand(c,set,cursor);
}
diff --git a/src/t_stream.c b/src/t_stream.c
new file mode 100644
index 000000000..77fbf4645
--- /dev/null
+++ b/src/t_stream.c
@@ -0,0 +1,2369 @@
+/*
+ * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "endianconv.h"
+#include "stream.h"
+
+#define STREAM_BYTES_PER_LISTPACK 2048
+
+/* Every stream item inside the listpack, has a flags field that is used to
+ * mark the entry as deleted, or having the same field as the "master"
+ * entry at the start of the listpack> */
+#define STREAM_ITEM_FLAG_NONE 0 /* No special flags. */
+#define STREAM_ITEM_FLAG_DELETED (1<<0) /* Entry is delted. Skip it. */
+#define STREAM_ITEM_FLAG_SAMEFIELDS (1<<1) /* Same fields as master entry. */
+
+void streamFreeCG(streamCG *cg);
+void streamFreeNACK(streamNACK *na);
+size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
+
+/* -----------------------------------------------------------------------
+ * Low level stream encoding: a radix tree of listpacks.
+ * ----------------------------------------------------------------------- */
+
+/* Create a new stream data structure. */
+stream *streamNew(void) {
+ stream *s = zmalloc(sizeof(*s));
+ s->rax = raxNew();
+ s->length = 0;
+ s->last_id.ms = 0;
+ s->last_id.seq = 0;
+ s->cgroups = NULL; /* Created on demand to save memory when not used. */
+ return s;
+}
+
+/* Free a stream, including the listpacks stored inside the radix tree. */
+void freeStream(stream *s) {
+ raxFreeWithCallback(s->rax,(void(*)(void*))lpFree);
+ if (s->cgroups)
+ raxFreeWithCallback(s->cgroups,(void(*)(void*))streamFreeCG);
+ zfree(s);
+}
+
+/* Generate the next stream item ID given the previous one. If the current
+ * milliseconds Unix time is greater than the previous one, just use this
+ * as time part and start with sequence part of zero. Otherwise we use the
+ * previous time (and never go backward) and increment the sequence. */
+void streamNextID(streamID *last_id, streamID *new_id) {
+ uint64_t ms = mstime();
+ if (ms > last_id->ms) {
+ new_id->ms = ms;
+ new_id->seq = 0;
+ } else {
+ new_id->ms = last_id->ms;
+ new_id->seq = last_id->seq+1;
+ }
+}
+
+/* This is just a wrapper for lpAppend() to directly use a 64 bit integer
+ * instead of a string. */
+unsigned char *lpAppendInteger(unsigned char *lp, int64_t value) {
+ char buf[LONG_STR_SIZE];
+ int slen = ll2string(buf,sizeof(buf),value);
+ return lpAppend(lp,(unsigned char*)buf,slen);
+}
+
+/* This is just a wrapper for lpReplace() to directly use a 64 bit integer
+ * instead of a string to replace the current element. The function returns
+ * the new listpack as return value, and also updates the current cursor
+ * by updating '*pos'. */
+unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **pos, int64_t value) {
+ char buf[LONG_STR_SIZE];
+ int slen = ll2string(buf,sizeof(buf),value);
+ return lpInsert(lp, (unsigned char*)buf, slen, *pos, LP_REPLACE, pos);
+}
+
+/* This is a wrapper function for lpGet() to directly get an integer value
+ * from the listpack (that may store numbers as a string), converting
+ * the string if needed. */
+int64_t lpGetInteger(unsigned char *ele) {
+ int64_t v;
+ unsigned char *e = lpGet(ele,&v,NULL);
+ if (e == NULL) return v;
+ /* The following code path should never be used for how listpacks work:
+ * they should always be able to store an int64_t value in integer
+ * encoded form. However the implementation may change. */
+ long long ll;
+ int retval = string2ll((char*)e,v,&ll);
+ serverAssert(retval != 0);
+ v = ll;
+ return v;
+}
+
+/* Debugging function to log the full content of a listpack. Useful
+ * for development and debugging. */
+void streamLogListpackContent(unsigned char *lp) {
+ unsigned char *p = lpFirst(lp);
+ while(p) {
+ unsigned char buf[LP_INTBUF_SIZE];
+ int64_t v;
+ unsigned char *ele = lpGet(p,&v,buf);
+ serverLog(LL_WARNING,"- [%d] '%.*s'", (int)v, (int)v, ele);
+ p = lpNext(lp,p);
+ }
+}
+
+/* Convert the specified stream entry ID as a 128 bit big endian number, so
+ * that the IDs can be sorted lexicographically. */
+void streamEncodeID(void *buf, streamID *id) {
+ uint64_t e[2];
+ e[0] = htonu64(id->ms);
+ e[1] = htonu64(id->seq);
+ memcpy(buf,e,sizeof(e));
+}
+
+/* This is the reverse of streamEncodeID(): the decoded ID will be stored
+ * in the 'id' structure passed by reference. The buffer 'buf' must point
+ * to a 128 bit big-endian encoded ID. */
+void streamDecodeID(void *buf, streamID *id) {
+ uint64_t e[2];
+ memcpy(e,buf,sizeof(e));
+ id->ms = ntohu64(e[0]);
+ id->seq = ntohu64(e[1]);
+}
+
+/* Compare two stream IDs. Return -1 if a < b, 0 if a == b, 1 if a > b. */
+int streamCompareID(streamID *a, streamID *b) {
+ if (a->ms > b->ms) return 1;
+ else if (a->ms < b->ms) return -1;
+ /* The ms part is the same. Check the sequence part. */
+ else if (a->seq > b->seq) return 1;
+ else if (a->seq < b->seq) return -1;
+ /* Everything is the same: IDs are equal. */
+ return 0;
+}
+
+/* Adds a new item into the stream 's' having the specified number of
+ * field-value pairs as specified in 'numfields' and stored into 'argv'.
+ * Returns the new entry ID populating the 'added_id' structure.
+ *
+ * If 'use_id' is not NULL, the ID is not auto-generated by the function,
+ * but instead the passed ID is uesd to add the new entry. In this case
+ * adding the entry may fail as specified later in this comment.
+ *
+ * The function returns C_OK if the item was added, this is always true
+ * if the ID was generated by the function. However the function may return
+ * C_ERR if an ID was given via 'use_id', but adding it failed since the
+ * current top ID is greater or equal. */
+int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id) {
+ /* If an ID was given, check that it's greater than the last entry ID
+ * or return an error. */
+ if (use_id && streamCompareID(use_id,&s->last_id) <= 0) return C_ERR;
+
+ /* Add the new entry. */
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"$",NULL,0);
+
+ size_t lp_bytes = 0; /* Total bytes in the tail listpack. */
+ unsigned char *lp = NULL; /* Tail listpack pointer. */
+
+ /* Get a reference to the tail node listpack. */
+ if (raxNext(&ri)) {
+ lp = ri.data;
+ lp_bytes = lpBytes(lp);
+ }
+ raxStop(&ri);
+
+ /* Generate the new entry ID. */
+ streamID id;
+ if (use_id)
+ id = *use_id;
+ else
+ streamNextID(&s->last_id,&id);
+
+ /* We have to add the key into the radix tree in lexicographic order,
+ * to do so we consider the ID as a single 128 bit number written in
+ * big endian, so that the most significant bytes are the first ones. */
+ uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/
+ streamID master_id; /* ID of the master entry in the listpack. */
+
+ /* Create a new listpack and radix tree node if needed. Note that when
+ * a new listpack is created, we populate it with a "master entry". This
+ * is just a set of fields that is taken as references in order to compress
+ * the stream entries that we'll add inside the listpack.
+ *
+ * Note that while we use the first added entry fields to create
+ * the master entry, the first added entry is NOT represented in the master
+ * entry, which is a stand alone object. But of course, the first entry
+ * will compress well because it's used as reference.
+ *
+ * The master entry is composed like in the following example:
+ *
+ * +-------+---------+------------+---------+--/--+---------+---------+-+
+ * | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0|
+ * +-------+---------+------------+---------+--/--+---------+---------+-+
+ *
+ * count and deleted just represent respectively the total number of
+ * entries inside the listpack that are valid, and marked as deleted
+ * (delted flag in the entry flags set). So the total number of items
+ * actually inside the listpack (both deleted and not) is count+deleted.
+ *
+ * The real entries will be encoded with an ID that is just the
+ * millisecond and sequence difference compared to the key stored at
+ * the radix tree node containing the listpack (delta encoding), and
+ * if the fields of the entry are the same as the master enty fields, the
+ * entry flags will specify this fact and the entry fields and number
+ * of fields will be omitted (see later in the code of this function).
+ *
+ * The "0" entry at the end is the same as the 'lp-count' entry in the
+ * regular stream entries (see below), and marks the fact that there are
+ * no more entries, when we scan the stream from right to left. */
+
+ /* First of all, check if we can append to the current macro node or
+ * if we need to switch to the next one. 'lp' will be set to NULL if
+ * the current node is full. */
+ if (lp != NULL) {
+ if (server.stream_node_max_bytes &&
+ lp_bytes > server.stream_node_max_bytes)
+ {
+ lp = NULL;
+ } else if (server.stream_node_max_entries) {
+ int64_t count = lpGetInteger(lpFirst(lp));
+ if (count > server.stream_node_max_entries) lp = NULL;
+ }
+ }
+
+ int flags = STREAM_ITEM_FLAG_NONE;
+ if (lp == NULL || lp_bytes > server.stream_node_max_bytes) {
+ master_id = id;
+ streamEncodeID(rax_key,&id);
+ /* Create the listpack having the master entry ID and fields. */
+ lp = lpNew();
+ lp = lpAppendInteger(lp,1); /* One item, the one we are adding. */
+ lp = lpAppendInteger(lp,0); /* Zero deleted so far. */
+ lp = lpAppendInteger(lp,numfields);
+ for (int64_t i = 0; i < numfields; i++) {
+ sds field = argv[i*2]->ptr;
+ lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
+ }
+ lp = lpAppendInteger(lp,0); /* Master entry zero terminator. */
+ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL);
+ /* The first entry we insert, has obviously the same fields of the
+ * master entry. */
+ flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
+ } else {
+ serverAssert(ri.key_len == sizeof(rax_key));
+ memcpy(rax_key,ri.key,sizeof(rax_key));
+
+ /* Read the master ID from the radix tree key. */
+ streamDecodeID(rax_key,&master_id);
+ unsigned char *lp_ele = lpFirst(lp);
+
+ /* Update count and skip the deleted fields. */
+ int64_t count = lpGetInteger(lp_ele);
+ lp = lpReplaceInteger(lp,&lp_ele,count+1);
+ lp_ele = lpNext(lp,lp_ele); /* seek deleted. */
+ lp_ele = lpNext(lp,lp_ele); /* seek master entry num fields. */
+
+ /* Check if the entry we are adding, have the same fields
+ * as the master entry. */
+ int64_t master_fields_count = lpGetInteger(lp_ele);
+ lp_ele = lpNext(lp,lp_ele);
+ if (numfields == master_fields_count) {
+ int64_t i;
+ for (i = 0; i < master_fields_count; i++) {
+ sds field = argv[i*2]->ptr;
+ int64_t e_len;
+ unsigned char buf[LP_INTBUF_SIZE];
+ unsigned char *e = lpGet(lp_ele,&e_len,buf);
+ /* Stop if there is a mismatch. */
+ if (sdslen(field) != (size_t)e_len ||
+ memcmp(e,field,e_len) != 0) break;
+ lp_ele = lpNext(lp,lp_ele);
+ }
+ /* All fields are the same! We can compress the field names
+ * setting a single bit in the flags. */
+ if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
+ }
+ }
+
+ /* Populate the listpack with the new entry. We use the following
+ * encoding:
+ *
+ * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+
+ * |flags|entry-id|num-fields|field-1|value-1|...|field-N|value-N|lp-count|
+ * +-----+--------+----------+-------+-------+-/-+-------+-------+--------+
+ *
+ * However if the SAMEFIELD flag is set, we have just to populate
+ * the entry with the values, so it becomes:
+ *
+ * +-----+--------+-------+-/-+-------+--------+
+ * |flags|entry-id|value-1|...|value-N|lp-count|
+ * +-----+--------+-------+-/-+-------+--------+
+ *
+ * The entry-id field is actually two separated fields: the ms
+ * and seq difference compared to the master entry.
+ *
+ * The lp-count field is a number that states the number of listpack pieces
+ * that compose the entry, so that it's possible to travel the entry
+ * in reverse order: we can just start from the end of the listpack, read
+ * the entry, and jump back N times to seek the "flags" field to read
+ * the stream full entry. */
+ lp = lpAppendInteger(lp,flags);
+ lp = lpAppendInteger(lp,id.ms - master_id.ms);
+ lp = lpAppendInteger(lp,id.seq - master_id.seq);
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
+ lp = lpAppendInteger(lp,numfields);
+ for (int64_t i = 0; i < numfields; i++) {
+ sds field = argv[i*2]->ptr, value = argv[i*2+1]->ptr;
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS))
+ lp = lpAppend(lp,(unsigned char*)field,sdslen(field));
+ lp = lpAppend(lp,(unsigned char*)value,sdslen(value));
+ }
+ /* Compute and store the lp-count field. */
+ int64_t lp_count = numfields;
+ lp_count += 3; /* Add the 3 fixed fields flags + ms-diff + seq-diff. */
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) {
+ /* If the item is not compressed, it also has the fields other than
+ * the values, and an additional num-fileds field. */
+ lp_count += numfields+1;
+ }
+ lp = lpAppendInteger(lp,lp_count);
+
+ /* Insert back into the tree in order to update the listpack pointer. */
+ raxInsert(s->rax,(unsigned char*)&rax_key,sizeof(rax_key),lp,NULL);
+ s->length++;
+ s->last_id = id;
+ if (added_id) *added_id = id;
+ return C_OK;
+}
+
+/* Trim the stream 's' to have no more than maxlen elements, and return the
+ * number of elements removed from the stream. The 'approx' option, if non-zero,
+ * specifies that the trimming must be performed in a approximated way in
+ * order to maximize performances. This means that the stream may contain
+ * more elements than 'maxlen', and elements are only removed if we can remove
+ * a *whole* node of the radix tree. The elements are removed from the head
+ * of the stream (older elements).
+ *
+ * The function may return zero if:
+ *
+ * 1) The stream is already shorter or equal to the specified max length.
+ * 2) The 'approx' option is true and the head node had not enough elements
+ * to be deleted, leaving the stream with a number of elements >= maxlen.
+ */
+int64_t streamTrimByLength(stream *s, size_t maxlen, int approx) {
+ if (s->length <= maxlen) return 0;
+
+ raxIterator ri;
+ raxStart(&ri,s->rax);
+ raxSeek(&ri,"^",NULL,0);
+
+ int64_t deleted = 0;
+ while(s->length > maxlen && raxNext(&ri)) {
+ unsigned char *lp = ri.data, *p = lpFirst(lp);
+ int64_t entries = lpGetInteger(p);
+
+ /* Check if we can remove the whole node, and still have at
+ * least maxlen elements. */
+ if (s->length - entries >= maxlen) {
+ lpFree(lp);
+ raxRemove(s->rax,ri.key,ri.key_len,NULL);
+ raxSeek(&ri,">=",ri.key,ri.key_len);
+ s->length -= entries;
+ deleted += entries;
+ continue;
+ }
+
+ /* If we cannot remove a whole element, and approx is true,
+ * stop here. */
+ if (approx) break;
+
+ /* Otherwise, we have to mark single entries inside the listpack
+ * as deleted. We start by updating the entries/deleted counters. */
+ int64_t to_delete = s->length - maxlen;
+ serverAssert(to_delete < entries);
+ lp = lpReplaceInteger(lp,&p,entries-to_delete);
+ p = lpNext(lp,p); /* Seek deleted field. */
+ int64_t marked_deleted = lpGetInteger(p);
+ lp = lpReplaceInteger(lp,&p,marked_deleted+to_delete);
+ p = lpNext(lp,p); /* Seek num-of-fields in the master entry. */
+
+ /* Skip all the master fields. */
+ int64_t master_fields_count = lpGetInteger(p);
+ p = lpNext(lp,p); /* Seek the first field. */
+ for (int64_t j = 0; j < master_fields_count; j++)
+ p = lpNext(lp,p); /* Skip all master fields. */
+ p = lpNext(lp,p); /* Skip the zero master entry terminator. */
+
+ /* 'p' is now pointing to the first entry inside the listpack.
+ * We have to run entry after entry, marking entries as deleted
+ * if they are already not deleted. */
+ while(p) {
+ int flags = lpGetInteger(p);
+ int to_skip;
+
+ /* Mark the entry as deleted. */
+ if (!(flags & STREAM_ITEM_FLAG_DELETED)) {
+ flags |= STREAM_ITEM_FLAG_DELETED;
+ lp = lpReplaceInteger(lp,&p,flags);
+ deleted++;
+ s->length--;
+ if (s->length <= maxlen) break; /* Enough entries deleted. */
+ }
+
+ p = lpNext(lp,p); /* Skip ID ms delta. */
+ p = lpNext(lp,p); /* Skip ID seq delta. */
+ p = lpNext(lp,p); /* Seek num-fields or values (if compressed). */
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ to_skip = master_fields_count;
+ } else {
+ to_skip = lpGetInteger(p);
+ to_skip = 1+(to_skip*2);
+ }
+
+ while(to_skip--) p = lpNext(lp,p); /* Skip the whole entry. */
+ p = lpNext(lp,p); /* Skip the final lp-count field. */
+ }
+
+ /* Here we should perform garbage collection in case at this point
+ * there are too many entries deleted inside the listpack. */
+ entries -= to_delete;
+ marked_deleted += to_delete;
+ if (entries + marked_deleted > 10 && marked_deleted > entries/2) {
+ /* TODO: perform a garbage collection. */
+ }
+
+ /* Update the listpack with the new pointer. */
+ raxInsert(s->rax,ri.key,ri.key_len,lp,NULL);
+
+ break; /* If we are here, there was enough to delete in the current
+ node, so no need to go to the next node. */
+ }
+
+ raxStop(&ri);
+ return deleted;
+}
+
+/* Initialize the stream iterator, so that we can call iterating functions
+ * to get the next items. This requires a corresponding streamIteratorStop()
+ * at the end. The 'rev' parameter controls the direction. If it's zero the
+ * iteration is from the start to the end element (inclusive), otherwise
+ * if rev is non-zero, the iteration is reversed.
+ *
+ * Once the iterator is initialized, we iterate like this:
+ *
+ * streamIterator myiterator;
+ * streamIteratorStart(&myiterator,...);
+ * int64_t numfields;
+ * while(streamIteratorGetID(&myiterator,&ID,&numfields)) {
+ * while(numfields--) {
+ * unsigned char *key, *value;
+ * size_t key_len, value_len;
+ * streamIteratorGetField(&myiterator,&key,&value,&key_len,&value_len);
+ *
+ * ... do what you want with key and value ...
+ * }
+ * }
+ * streamIteratorStop(&myiterator); */
+void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev) {
+ /* Intialize the iterator and translates the iteration start/stop
+ * elements into a 128 big big-endian number. */
+ if (start) {
+ streamEncodeID(si->start_key,start);
+ } else {
+ si->start_key[0] = 0;
+ si->start_key[0] = 0;
+ }
+
+ if (end) {
+ streamEncodeID(si->end_key,end);
+ } else {
+ si->end_key[0] = UINT64_MAX;
+ si->end_key[0] = UINT64_MAX;
+ }
+
+ /* Seek the correct node in the radix tree. */
+ raxStart(&si->ri,s->rax);
+ if (!rev) {
+ if (start && (start->ms || start->seq)) {
+ raxSeek(&si->ri,"<=",(unsigned char*)si->start_key,
+ sizeof(si->start_key));
+ if (raxEOF(&si->ri)) raxSeek(&si->ri,"^",NULL,0);
+ } else {
+ raxSeek(&si->ri,"^",NULL,0);
+ }
+ } else {
+ if (end && (end->ms || end->seq)) {
+ raxSeek(&si->ri,"<=",(unsigned char*)si->end_key,
+ sizeof(si->end_key));
+ if (raxEOF(&si->ri)) raxSeek(&si->ri,"$",NULL,0);
+ } else {
+ raxSeek(&si->ri,"$",NULL,0);
+ }
+ }
+ si->stream = s;
+ si->lp = NULL; /* There is no current listpack right now. */
+ si->lp_ele = NULL; /* Current listpack cursor. */
+ si->rev = rev; /* Direction, if non-zero reversed, from end to start. */
+}
+
+/* Return 1 and store the current item ID at 'id' if there are still
+ * elements within the iteration range, otherwise return 0 in order to
+ * signal the iteration terminated. */
+int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
+ while(1) { /* Will stop when element > stop_key or end of radix tree. */
+ /* If the current listpack is set to NULL, this is the start of the
+ * iteration or the previous listpack was completely iterated.
+ * Go to the next node. */
+ if (si->lp == NULL || si->lp_ele == NULL) {
+ if (!si->rev && !raxNext(&si->ri)) return 0;
+ else if (si->rev && !raxPrev(&si->ri)) return 0;
+ serverAssert(si->ri.key_len == sizeof(streamID));
+ /* Get the master ID. */
+ streamDecodeID(si->ri.key,&si->master_id);
+ /* Get the master fields count. */
+ si->lp = si->ri.data;
+ si->lp_ele = lpFirst(si->lp); /* Seek items count */
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek deleted count. */
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek num fields. */
+ si->master_fields_count = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek first field. */
+ si->master_fields_start = si->lp_ele;
+ /* We are now pointing to the first field of the master entry.
+ * We need to seek either the first or the last entry depending
+ * on the direction of the iteration. */
+ if (!si->rev) {
+ /* If we are iterating in normal order, skip the master fields
+ * to seek the first actual entry. */
+ for (uint64_t i = 0; i < si->master_fields_count; i++)
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ } else {
+ /* If we are iterating in reverse direction, just seek the
+ * last part of the last entry in the listpack (that is, the
+ * fields count). */
+ si->lp_ele = lpLast(si->lp);
+ }
+ } else if (si->rev) {
+ /* If we are itereating in the reverse order, and this is not
+ * the first entry emitted for this listpack, then we already
+ * emitted the current entry, and have to go back to the previous
+ * one. */
+ int lp_count = lpGetInteger(si->lp_ele);
+ while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ /* Seek lp-count of prev entry. */
+ si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+
+ /* For every radix tree node, iterate the corresponding listpack,
+ * returning elements when they are within range. */
+ while(1) {
+ if (!si->rev) {
+ /* If we are going forward, skip the previous entry
+ * lp-count field (or in case of the master entry, the zero
+ * term field) */
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ if (si->lp_ele == NULL) break;
+ } else {
+ /* If we are going backward, read the number of elements this
+ * entry is composed of, and jump backward N times to seek
+ * its start. */
+ int64_t lp_count = lpGetInteger(si->lp_ele);
+ if (lp_count == 0) { /* We reached the master entry. */
+ si->lp = NULL;
+ si->lp_ele = NULL;
+ break;
+ }
+ while(lp_count--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+
+ /* Get the flags entry. */
+ si->lp_flags = si->lp_ele;
+ int flags = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele); /* Seek ID. */
+
+ /* Get the ID: it is encoded as difference between the master
+ * ID and this entry ID. */
+ *id = si->master_id;
+ id->ms += lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ id->seq += lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,id);
+
+ /* The number of entries is here or not depending on the
+ * flags. */
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ *numfields = si->master_fields_count;
+ } else {
+ *numfields = lpGetInteger(si->lp_ele);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ }
+
+ /* If current >= start, and the entry is not marked as
+ * deleted, emit it. */
+ if (!si->rev) {
+ if (memcmp(buf,si->start_key,sizeof(streamID)) >= 0 &&
+ !(flags & STREAM_ITEM_FLAG_DELETED))
+ {
+ if (memcmp(buf,si->end_key,sizeof(streamID)) > 0)
+ return 0; /* We are already out of range. */
+ si->entry_flags = flags;
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)
+ si->master_fields_ptr = si->master_fields_start;
+ return 1; /* Valid item returned. */
+ }
+ } else {
+ if (memcmp(buf,si->end_key,sizeof(streamID)) <= 0 &&
+ !(flags & STREAM_ITEM_FLAG_DELETED))
+ {
+ if (memcmp(buf,si->start_key,sizeof(streamID)) < 0)
+ return 0; /* We are already out of range. */
+ si->entry_flags = flags;
+ if (flags & STREAM_ITEM_FLAG_SAMEFIELDS)
+ si->master_fields_ptr = si->master_fields_start;
+ return 1; /* Valid item returned. */
+ }
+ }
+
+ /* If we do not emit, we have to discard if we are going
+ * forward, or seek the previous entry if we are going
+ * backward. */
+ if (!si->rev) {
+ int64_t to_discard = (flags & STREAM_ITEM_FLAG_SAMEFIELDS) ?
+ *numfields : *numfields*2;
+ for (int64_t i = 0; i < to_discard; i++)
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ } else {
+ int64_t prev_times = 4; /* flag + id ms + id seq + one more to
+ go back to the previous entry "count"
+ field. */
+ /* If the entry was not flagged SAMEFIELD we also read the
+ * number of fields, so go back one more. */
+ if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) prev_times++;
+ while(prev_times--) si->lp_ele = lpPrev(si->lp,si->lp_ele);
+ }
+ }
+
+ /* End of listpack reached. Try the next/prev radix tree node. */
+ }
+}
+
+/* Get the field and value of the current item we are iterating. This should
+ * be called immediately after streamIteratorGetID(), and for each field
+ * according to the number of fields returned by streamIteratorGetID().
+ * The function populates the field and value pointers and the corresponding
+ * lengths by reference, that are valid until the next iterator call, assuming
+ * no one touches the stream meanwhile. */
+void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen) {
+ if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
+ *fieldptr = lpGet(si->master_fields_ptr,fieldlen,si->field_buf);
+ si->master_fields_ptr = lpNext(si->lp,si->master_fields_ptr);
+ } else {
+ *fieldptr = lpGet(si->lp_ele,fieldlen,si->field_buf);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+ }
+ *valueptr = lpGet(si->lp_ele,valuelen,si->value_buf);
+ si->lp_ele = lpNext(si->lp,si->lp_ele);
+}
+
+/* Remove the current entry from the stream: can be called after the
+ * GetID() API or after any GetField() call, however we need to iterate
+ * a valid entry while calling this function. Moreover the function
+ * requires the entry ID we are currently iterating, that was previously
+ * returned by GetID().
+ *
+ * Note that after calling this function, next calls to GetField() can't
+ * be performed: the entry is now deleted. Instead the iterator will
+ * automatically re-seek to the next entry, so the caller should continue
+ * with GetID(). */
+void streamIteratorRemoveEntry(streamIterator *si, streamID *current) {
+ unsigned char *lp = si->lp;
+ int64_t aux;
+
+ /* We do not really delete the entry here. Instead we mark it as
+ * deleted flagging it, and also incrementing the count of the
+ * deleted entries in the listpack header.
+ *
+ * We start flagging: */
+ int flags = lpGetInteger(si->lp_flags);
+ flags |= STREAM_ITEM_FLAG_DELETED;
+ lp = lpReplaceInteger(lp,&si->lp_flags,flags);
+
+ /* Change the valid/deleted entries count in the master entry. */
+ unsigned char *p = lpFirst(lp);
+ aux = lpGetInteger(p);
+
+ if (aux == 1) {
+ /* If this is the last element in the listpack, we can remove the whole
+ * node. */
+ lpFree(lp);
+ raxRemove(si->stream->rax,si->ri.key,si->ri.key_len,NULL);
+ } else {
+ /* In the base case we alter the counters of valid/deleted entries. */
+ lp = lpReplaceInteger(lp,&p,aux-1);
+ p = lpNext(lp,p); /* Seek deleted field. */
+ aux = lpGetInteger(p);
+ lp = lpReplaceInteger(lp,&p,aux+1);
+ }
+
+ /* Update the number of entries counter. */
+ si->stream->length--;
+
+ /* Re-seek the iterator to fix the now messed up state. */
+ streamID start, end;
+ if (si->rev) {
+ streamDecodeID(si->start_key,&start);
+ end = *current;
+ } else {
+ start = *current;
+ streamDecodeID(si->end_key,&end);
+ }
+ streamIteratorStop(si);
+ streamIteratorStart(si,si->stream,&start,&end,si->rev);
+
+ /* TODO: perform a garbage collection here if the ration between
+ * deleted and valid goes over a certain limit. */
+}
+
+/* Stop the stream iterator. The only cleanup we need is to free the rax
+ * itereator, since the stream iterator itself is supposed to be stack
+ * allocated. */
+void streamIteratorStop(streamIterator *si) {
+ raxStop(&si->ri);
+}
+
+/* Delete the specified item ID from the stream, returning 1 if the item
+ * was deleted 0 otherwise (if it does not exist). */
+int streamDeleteItem(stream *s, streamID *id) {
+ int deleted = 0;
+ streamIterator si;
+ streamIteratorStart(&si,s,id,id,0);
+ streamID myid;
+ int64_t numfields;
+ if (streamIteratorGetID(&si,&myid,&numfields)) {
+ streamIteratorRemoveEntry(&si,&myid);
+ deleted = 1;
+ }
+ streamIteratorStop(&si);
+ return deleted;
+}
+
+/* Emit a reply in the client output buffer by formatting a Stream ID
+ * in the standard <ms>-<seq> format, using the simple string protocol
+ * of REPL. */
+void addReplyStreamID(client *c, streamID *id) {
+ sds replyid = sdscatfmt(sdsempty(),"+%U-%U\r\n",id->ms,id->seq);
+ addReplySds(c,replyid);
+}
+
+/* Similar to the above function, but just creates an object, usually useful
+ * for replication purposes to create arguments. */
+robj *createObjectFromStreamID(streamID *id) {
+ return createObject(OBJ_STRING, sdscatfmt(sdsempty(),"%U-%U",
+ id->ms,id->seq));
+}
+
+/* As a result of an explicit XCLAIM or XREADGROUP command, new entries
+ * are created in the pending list of the stream and consumers. We need
+ * to propagate this changes in the form of XCLAIM commands. */
+void streamPropagateXCLAIM(client *c, robj *key, robj *group, robj *id, streamNACK *nack) {
+ /* We need to generate an XCLAIM that will work in a idempotent fashion:
+ *
+ * XCLAIM <key> <group> <consumer> 0 <id> TIME <milliseconds-unix-time>
+ * RETRYCOUNT <count> FORCE JUSTID.
+ *
+ * Note that JUSTID is useful in order to avoid that XCLAIM will do
+ * useless work in the slave side, trying to fetch the stream item. */
+ robj *argv[12];
+ argv[0] = createStringObject("XCLAIM",6);
+ argv[1] = key;
+ argv[2] = group;
+ argv[3] = createStringObject(nack->consumer->name,sdslen(nack->consumer->name));
+ argv[4] = createStringObjectFromLongLong(0);
+ argv[5] = id;
+ argv[6] = createStringObject("TIME",4);
+ argv[7] = createStringObjectFromLongLong(nack->delivery_time);
+ argv[8] = createStringObject("RETRYCOUNT",10);
+ argv[9] = createStringObjectFromLongLong(nack->delivery_count);
+ argv[10] = createStringObject("FORCE",5);
+ argv[11] = createStringObject("JUSTID",6);
+ propagate(server.xclaimCommand,c->db->id,argv,12,PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(argv[0]);
+ decrRefCount(argv[3]);
+ decrRefCount(argv[4]);
+ decrRefCount(argv[6]);
+ decrRefCount(argv[7]);
+ decrRefCount(argv[8]);
+ decrRefCount(argv[9]);
+ decrRefCount(argv[10]);
+ decrRefCount(argv[11]);
+}
+
+/* Send the specified range to the client 'c'. The range the client will
+ * receive is between start and end inclusive, if 'count' is non zero, no more
+ * than 'count' elemnets are sent. The 'end' pointer can be NULL to mean that
+ * we want all the elements from 'start' till the end of the stream. If 'rev'
+ * is non zero, elements are produced in reversed order from end to start.
+ *
+ * If group and consumer are not NULL, the function performs additional work:
+ * 1. It updates the last delivered ID in the group in case we are
+ * sending IDs greater than the current last ID.
+ * 2. If the requested IDs are already assigned to some other consumer, the
+ * function will not return it to the client.
+ * 3. An entry in the pending list will be created for every entry delivered
+ * for the first time to this consumer.
+ *
+ * The behavior may be modified passing non-zero flags:
+ *
+ * STREAM_RWR_NOACK: Do not craete PEL entries, that is, the point "3" above
+ * is not performed.
+ * STREAM_RWR_RAWENTRIES: Do not emit array boundaries, but just the entries,
+ * and return the number of entries emitted as usually.
+ * This is used when the function is just used in order
+ * to emit data and there is some higher level logic.
+ *
+ * The final argument 'spi' (stream propagatino info pointer) is a structure
+ * filled with information needed to propagte the command execution to AOF
+ * and slaves, in the case a consumer group was passed: we need to generate
+ * XCLAIM commands to create the pending list into AOF/slaves in that case.
+ *
+ * If 'spi' is set to NULL no propagation will happen even if the group was
+ * given, but currently such a feature is never used by the code base that
+ * will always pass 'spi' and propagate when a group is passed.
+ *
+ * Note that this function is recursive in certain cases. When it's called
+ * with a non NULL group and consumer argument, it may call
+ * streamReplyWithRangeFromConsumerPEL() in order to get entries from the
+ * consumer pending entries list. However such a function will then call
+ * streamReplyWithRange() in order to emit single entries (found in the
+ * PEL by ID) to the client. This is the use case for the STREAM_RWR_RAWENTRIES
+ * flag.
+ */
+#define STREAM_RWR_NOACK (1<<0) /* Do not create entries in the PEL. */
+#define STREAM_RWR_RAWENTRIES (1<<1) /* Do not emit protocol for array
+ boundaries, just the entries. */
+size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end, size_t count, int rev, streamCG *group, streamConsumer *consumer, int flags, streamPropInfo *spi) {
+ void *arraylen_ptr = NULL;
+ size_t arraylen = 0;
+ streamIterator si;
+ int64_t numfields;
+ streamID id;
+
+ /* If a group was passed, we check if the request is about messages
+ * never delivered so far (normally this happens when ">" ID is passed).
+ *
+ * If instead the client is asking for some history, we serve it
+ * using a different function, so that we return entries *solely*
+ * from its own PEL. This ensures each consumer will always and only
+ * see the history of messages delivered to it and not yet confirmed
+ * as delivered. */
+ if (group && streamCompareID(start,&group->last_id) <= 0) {
+ return streamReplyWithRangeFromConsumerPEL(c,s,start,end,count,
+ consumer);
+ }
+
+ if (!(flags & STREAM_RWR_RAWENTRIES))
+ arraylen_ptr = addDeferredMultiBulkLength(c);
+ streamIteratorStart(&si,s,start,end,rev);
+ while(streamIteratorGetID(&si,&id,&numfields)) {
+ /* Update the group last_id if needed. */
+ if (group && streamCompareID(&id,&group->last_id) > 0)
+ group->last_id = id;
+
+ /* Emit a two elements array for each item. The first is
+ * the ID, the second is an array of field-value pairs. */
+ addReplyMultiBulkLen(c,2);
+ addReplyStreamID(c,&id);
+ addReplyMultiBulkLen(c,numfields*2);
+
+ /* Emit the field-value pairs. */
+ while(numfields--) {
+ unsigned char *key, *value;
+ int64_t key_len, value_len;
+ streamIteratorGetField(&si,&key,&value,&key_len,&value_len);
+ addReplyBulkCBuffer(c,key,key_len);
+ addReplyBulkCBuffer(c,value,value_len);
+ }
+
+ /* If a group is passed, we need to create an entry in the
+ * PEL (pending entries list) of this group *and* this consumer.
+ *
+ * Note that we cannot be sure about the fact the message is not
+ * already owned by another consumer, because the admin is able
+ * to change the consumer group last delivered ID using the
+ * XGROUP SETID command. So if we find that there is already
+ * a NACK for the entry, we need to associate it to the new
+ * consumer. */
+ if (group && !(flags & STREAM_RWR_NOACK)) {
+ unsigned char buf[sizeof(streamID)];
+ streamEncodeID(buf,&id);
+
+ /* Try to add a new NACK. Most of the time this will work and
+ * will not require extra lookups. We'll fix the problem later
+ * if we find that there is already a entry for this ID. */
+ streamNACK *nack = streamCreateNACK(consumer);
+ int group_inserted =
+ raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
+ int consumer_inserted =
+ raxTryInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+
+ /* Now we can check if the entry was already busy, and
+ * in that case reassign the entry to the new consumer,
+ * or update it if the consumer is the same as before. */
+ if (group_inserted == 0) {
+ streamFreeNACK(nack);
+ nack = raxFind(group->pel,buf,sizeof(buf));
+ serverAssert(nack != raxNotFound);
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ /* Update the consumer and NACK metadata. */
+ nack->consumer = consumer;
+ nack->delivery_time = mstime();
+ nack->delivery_count = 1;
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+ } else if (group_inserted == 1 && consumer_inserted == 0) {
+ serverPanic("NACK half-created. Should not be possible.");
+ }
+
+ /* Propagate as XCLAIM. */
+ if (spi) {
+ robj *idarg = createObjectFromStreamID(&id);
+ streamPropagateXCLAIM(c,spi->keyname,spi->groupname,idarg,nack);
+ decrRefCount(idarg);
+ }
+ }
+
+ arraylen++;
+ if (count && count == arraylen) break;
+ }
+ streamIteratorStop(&si);
+ if (arraylen_ptr) setDeferredMultiBulkLength(c,arraylen_ptr,arraylen);
+ return arraylen;
+}
+
+/* This is an helper function for streamReplyWithRange() when called with
+ * group and consumer arguments, but with a range that is referring to already
+ * delivered messages. In this case we just emit messages that are already
+ * in the history of the consumer, fetching the IDs from its PEL.
+ *
+ * Note that this function does not have a 'rev' argument because it's not
+ * possible to iterate in reverse using a group. Basically this function
+ * is only called as a result of the XREADGROUP command.
+ *
+ * This function is more expensive because it needs to inspect the PEL and then
+ * seek into the radix tree of the messages in order to emit the full message
+ * to the client. However clients only reach this code path when they are
+ * fetching the history of already retrieved messages, which is rare. */
+size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer) {
+ raxIterator ri;
+ unsigned char startkey[sizeof(streamID)];
+ unsigned char endkey[sizeof(streamID)];
+ streamEncodeID(startkey,start);
+ if (end) streamEncodeID(endkey,end);
+
+ size_t arraylen = 0;
+ void *arraylen_ptr = addDeferredMultiBulkLength(c);
+ raxStart(&ri,consumer->pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ while(raxNext(&ri) && (!count || arraylen < count)) {
+ if (end && memcmp(ri.key,end,ri.key_len) > 0) break;
+ streamID thisid;
+ streamDecodeID(ri.key,&thisid);
+ if (streamReplyWithRange(c,s,&thisid,NULL,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL) == 0)
+ {
+ /* Note that we may have a not acknowledged entry in the PEL
+ * about a message that's no longer here because was removed
+ * by the user by other means. In that case we signal it emitting
+ * the ID but then a NULL entry for the fields. */
+ addReplyMultiBulkLen(c,2);
+ streamID id;
+ streamDecodeID(ri.key,&id);
+ addReplyStreamID(c,&id);
+ addReply(c,shared.nullmultibulk);
+ } else {
+ streamNACK *nack = ri.data;
+ nack->delivery_time = mstime();
+ nack->delivery_count++;
+ }
+ arraylen++;
+ }
+ raxStop(&ri);
+ setDeferredMultiBulkLength(c,arraylen_ptr,arraylen);
+ return arraylen;
+}
+
+/* -----------------------------------------------------------------------
+ * Stream commands implementation
+ * ----------------------------------------------------------------------- */
+
+/* Look the stream at 'key' and return the corresponding stream object.
+ * The function creates a key setting it to an empty stream if needed. */
+robj *streamTypeLookupWriteOrCreate(client *c, robj *key) {
+ robj *o = lookupKeyWrite(c->db,key);
+ if (o == NULL) {
+ o = createStreamObject();
+ dbAdd(c->db,key,o);
+ } else {
+ if (o->type != OBJ_STREAM) {
+ addReply(c,shared.wrongtypeerr);
+ return NULL;
+ }
+ }
+ return o;
+}
+
+/* Helper function to convert a string to an unsigned long long value.
+ * The function attempts to use the faster string2ll() function inside
+ * Redis: if it fails, strtoull() is used instead. The function returns
+ * 1 if the conversion happened successfully or 0 if the number is
+ * invalid or out of range. */
+int string2ull(const char *s, unsigned long long *value) {
+ long long ll;
+ if (string2ll(s,strlen(s),&ll)) {
+ if (ll < 0) return 0; /* Negative values are out of range. */
+ *value = ll;
+ return 1;
+ }
+ errno = 0;
+ char *endptr = NULL;
+ *value = strtoull(s,&endptr,10);
+ if (errno == EINVAL || errno == ERANGE || !(*s != '\0' && *endptr == '\0'))
+ return 0; /* strtoull() failed. */
+ return 1; /* Conversion done! */
+}
+
+/* Parse a stream ID in the format given by clients to Redis, that is
+ * <ms>-<seq>, and converts it into a streamID structure. If
+ * the specified ID is invalid C_ERR is returned and an error is reported
+ * to the client, otherwise C_OK is returned. The ID may be in incomplete
+ * form, just stating the milliseconds time part of the stream. In such a case
+ * the missing part is set according to the value of 'missing_seq' parameter.
+ *
+ * The IDs "-" and "+" specify respectively the minimum and maximum IDs
+ * that can be represented. If 'strict' is set to 1, "-" and "+" will be
+ * treated as an invalid ID.
+ *
+ * If 'c' is set to NULL, no reply is sent to the client. */
+int streamGenericParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int strict) {
+ char buf[128];
+ if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid;
+ memcpy(buf,o->ptr,sdslen(o->ptr)+1);
+
+ if (strict && (buf[0] == '-' || buf[0] == '+') && buf[1] == '\0')
+ goto invalid;
+
+ /* Handle the "-" and "+" special cases. */
+ if (buf[0] == '-' && buf[1] == '\0') {
+ id->ms = 0;
+ id->seq = 0;
+ return C_OK;
+ } else if (buf[0] == '+' && buf[1] == '\0') {
+ id->ms = UINT64_MAX;
+ id->seq = UINT64_MAX;
+ return C_OK;
+ }
+
+ /* Parse <ms>-<seq> form. */
+ char *dot = strchr(buf,'-');
+ if (dot) *dot = '\0';
+ unsigned long long ms, seq;
+ if (string2ull(buf,&ms) == 0) goto invalid;
+ if (dot && string2ull(dot+1,&seq) == 0) goto invalid;
+ if (!dot) seq = missing_seq;
+ id->ms = ms;
+ id->seq = seq;
+ return C_OK;
+
+invalid:
+ if (c) addReplyError(c,"Invalid stream ID specified as stream "
+ "command argument");
+ return C_ERR;
+}
+
+/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
+ * 0, to be used when - and + are accetable IDs. */
+int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) {
+ return streamGenericParseIDOrReply(c,o,id,missing_seq,0);
+}
+
+/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
+ * 1, to be used when we want to return an error if the special IDs + or -
+ * are provided. */
+int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) {
+ return streamGenericParseIDOrReply(c,o,id,missing_seq,1);
+}
+
+
+/* XADD key [MAXLEN <count>] <ID or *> [field value] [field value] ... */
+void xaddCommand(client *c) {
+ streamID id;
+ int id_given = 0; /* Was an ID different than "*" specified? */
+ long long maxlen = -1; /* If left to -1 no trimming is performed. */
+ int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
+ the maxium length is not applied verbatim. */
+ int maxlen_arg_idx = 0; /* Index of the count in MAXLEN, for rewriting. */
+
+ /* Parse options. */
+ int i = 2; /* This is the first argument position where we could
+ find an option, or the ID. */
+ for (; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (opt[0] == '*' && opt[1] == '\0') {
+ /* This is just a fast path for the common case of auto-ID
+ * creation. */
+ break;
+ } else if (!strcasecmp(opt,"maxlen") && moreargs) {
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MAXLEN ~ <count>. */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ approx_maxlen = 1;
+ i++;
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
+ != C_OK) return;
+
+ if (maxlen < 0) {
+ addReplyError(c,"The MAXLEN argument must be >= 0.");
+ return;
+ }
+ i++;
+ maxlen_arg_idx = i;
+ } else {
+ /* If we are here is a syntax error or a valid ID. */
+ if (streamParseStrictIDOrReply(c,c->argv[i],&id,0) != C_OK) return;
+ id_given = 1;
+ break;
+ }
+ }
+ int field_pos = i+1;
+
+ /* Check arity. */
+ if ((c->argc - field_pos) < 2 || ((c->argc-field_pos) % 2) == 1) {
+ addReplyError(c,"wrong number of arguments for XADD");
+ return;
+ }
+
+ /* Lookup the stream at key. */
+ robj *o;
+ stream *s;
+ if ((o = streamTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
+ s = o->ptr;
+
+ /* Append using the low level function and return the ID. */
+ if (streamAppendItem(s,c->argv+field_pos,(c->argc-field_pos)/2,
+ &id, id_given ? &id : NULL)
+ == C_ERR)
+ {
+ addReplyError(c,"The ID specified in XADD is equal or smaller than the "
+ "target stream top item");
+ return;
+ }
+ addReplyStreamID(c,&id);
+
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id);
+ server.dirty++;
+
+ /* Remove older elements if MAXLEN was specified. */
+ if (maxlen >= 0) {
+ if (!streamTrimByLength(s,maxlen,approx_maxlen)) {
+ /* If no trimming was performed, for instance because approximated
+ * trimming length was specified, rewrite the MAXLEN argument
+ * as zero, so that the command is propagated without trimming. */
+ robj *zeroobj = createStringObjectFromLongLong(0);
+ rewriteClientCommandArgument(c,maxlen_arg_idx,zeroobj);
+ decrRefCount(zeroobj);
+ } else {
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
+ }
+ }
+
+ /* Let's rewrite the ID argument with the one actually generated for
+ * AOF/replication propagation. */
+ robj *idarg = createObjectFromStreamID(&id);
+ rewriteClientCommandArgument(c,i,idarg);
+ decrRefCount(idarg);
+
+ /* We need to signal to blocked clients that there is new data on this
+ * stream. */
+ if (server.blocked_clients_by_type[BLOCKED_STREAM])
+ signalKeyAsReady(c->db, c->argv[1]);
+}
+
+/* XRANGE/XREVRANGE actual implementation. */
+void xrangeGenericCommand(client *c, int rev) {
+ robj *o;
+ stream *s;
+ streamID startid, endid;
+ long long count = 0;
+ robj *startarg = rev ? c->argv[3] : c->argv[2];
+ robj *endarg = rev ? c->argv[2] : c->argv[3];
+
+ if (streamParseIDOrReply(c,startarg,&startid,0) == C_ERR) return;
+ if (streamParseIDOrReply(c,endarg,&endid,UINT64_MAX) == C_ERR) return;
+
+ /* Parse the COUNT option if any. */
+ if (c->argc > 4) {
+ for (int j = 4; j < c->argc; j++) {
+ int additional = c->argc-j-1;
+ if (strcasecmp(c->argv[j]->ptr,"COUNT") == 0 && additional >= 1) {
+ if (getLongLongFromObjectOrReply(c,c->argv[j+1],&count,NULL)
+ != C_OK) return;
+ if (count < 0) count = 0;
+ j++; /* Consume additional arg. */
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+ }
+
+ /* Return the specified range to the user. */
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ s = o->ptr;
+ streamReplyWithRange(c,s,&startid,&endid,count,rev,NULL,NULL,0,NULL);
+}
+
+/* XRANGE key start end [COUNT <n>] */
+void xrangeCommand(client *c) {
+ xrangeGenericCommand(c,0);
+}
+
+/* XREVRANGE key end start [COUNT <n>] */
+void xrevrangeCommand(client *c) {
+ xrangeGenericCommand(c,1);
+}
+
+/* XLEN */
+void xlenCommand(client *c) {
+ robj *o;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+ addReplyLongLong(c,s->length);
+}
+
+/* XREAD [BLOCK <milliseconds>] [COUNT <count>] STREAMS key_1 key_2 ... key_N
+ * ID_1 ID_2 ... ID_N
+ *
+ * This function also implements the XREAD-GROUP command, which is like XREAD
+ * but accepting the [GROUP group-name consumer-name] additional option.
+ * This is useful because while XREAD is a read command and can be called
+ * on slaves, XREAD-GROUP is not. */
+#define XREAD_BLOCKED_DEFAULT_COUNT 1000
+void xreadCommand(client *c) {
+ long long timeout = -1; /* -1 means, no BLOCK argument given. */
+ long long count = 0;
+ int streams_count = 0;
+ int streams_arg = 0;
+ int noack = 0; /* True if NOACK option was specified. */
+ #define STREAMID_STATIC_VECTOR_LEN 8
+ streamID static_ids[STREAMID_STATIC_VECTOR_LEN];
+ streamID *ids = static_ids;
+ streamCG **groups = NULL;
+ int xreadgroup = sdslen(c->argv[0]->ptr) == 10; /* XREAD or XREADGROUP? */
+ robj *groupname = NULL;
+ robj *consumername = NULL;
+
+ /* Parse arguments. */
+ for (int i = 1; i < c->argc; i++) {
+ int moreargs = c->argc-i-1;
+ char *o = c->argv[i]->ptr;
+ if (!strcasecmp(o,"BLOCK") && moreargs) {
+ i++;
+ if (getTimeoutFromObjectOrReply(c,c->argv[i],&timeout,
+ UNIT_MILLISECONDS) != C_OK) return;
+ } else if (!strcasecmp(o,"COUNT") && moreargs) {
+ i++;
+ if (getLongLongFromObjectOrReply(c,c->argv[i],&count,NULL) != C_OK)
+ return;
+ if (count < 0) count = 0;
+ } else if (!strcasecmp(o,"STREAMS") && moreargs) {
+ streams_arg = i+1;
+ streams_count = (c->argc-streams_arg);
+ if ((streams_count % 2) != 0) {
+ addReplyError(c,"Unbalanced XREAD list of streams: "
+ "for each stream key an ID or '$' must be "
+ "specified.");
+ return;
+ }
+ streams_count /= 2; /* We have two arguments for each stream. */
+ break;
+ } else if (!strcasecmp(o,"GROUP") && moreargs >= 2) {
+ if (!xreadgroup) {
+ addReplyError(c,"The GROUP option is only supported by "
+ "XREADGROUP. You called XREAD instead.");
+ return;
+ }
+ groupname = c->argv[i+1];
+ consumername = c->argv[i+2];
+ i += 2;
+ } else if (!strcasecmp(o,"NOACK")) {
+ if (!xreadgroup) {
+ addReplyError(c,"The NOACK option is only supported by "
+ "XREADGROUP. You called XREAD instead.");
+ return;
+ }
+ noack = 1;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* STREAMS option is mandatory. */
+ if (streams_arg == 0) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* If the user specified XREADGROUP then it must also
+ * provide the GROUP option. */
+ if (xreadgroup && groupname == NULL) {
+ addReplyError(c,"Missing GROUP option for XREADGROUP");
+ return;
+ }
+
+ /* Parse the IDs and resolve the group name. */
+ if (streams_count > STREAMID_STATIC_VECTOR_LEN)
+ ids = zmalloc(sizeof(streamID)*streams_count);
+ if (groupname) groups = zmalloc(sizeof(streamCG*)*streams_count);
+
+ for (int i = streams_arg + streams_count; i < c->argc; i++) {
+ /* Specifying "$" as last-known-id means that the client wants to be
+ * served with just the messages that will arrive into the stream
+ * starting from now. */
+ int id_idx = i - streams_arg - streams_count;
+ robj *key = c->argv[i-streams_count];
+ robj *o = lookupKeyRead(c->db,key);
+ if (o && checkType(c,o,OBJ_STREAM)) goto cleanup;
+ streamCG *group = NULL;
+
+ /* If a group was specified, than we need to be sure that the
+ * key and group actually exist. */
+ if (groupname) {
+ if (o == NULL ||
+ (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL)
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer "
+ "group '%s' in XREADGROUP with GROUP "
+ "option",
+ (char*)key->ptr,(char*)groupname->ptr);
+ goto cleanup;
+ }
+ groups[id_idx] = group;
+ }
+
+ if (strcmp(c->argv[i]->ptr,"$") == 0) {
+ if (xreadgroup) {
+ addReplyError(c,"The $ ID is meaningless in the context of "
+ "XREADGROUP: you want to read the history of "
+ "this consumer by specifying a proper ID, or "
+ "use the > ID to get new messages. The $ ID would "
+ "just return an empty result set.");
+ goto cleanup;
+ }
+ if (o) {
+ stream *s = o->ptr;
+ ids[id_idx] = s->last_id;
+ } else {
+ ids[id_idx].ms = 0;
+ ids[id_idx].seq = 0;
+ }
+ continue;
+ } else if (strcmp(c->argv[i]->ptr,">") == 0) {
+ if (!xreadgroup) {
+ addReplyError(c,"The > ID can be specified only when calling "
+ "XREADGROUP using the GROUP <group> "
+ "<consumer> option.");
+ goto cleanup;
+ }
+ /* We use just the maximum ID to signal this is a ">" ID, anyway
+ * the code handling the blocking clients will have to update the
+ * ID later in order to match the changing consumer group last ID. */
+ ids[id_idx].ms = UINT64_MAX;
+ ids[id_idx].seq = UINT64_MAX;
+ continue;
+ }
+ if (streamParseStrictIDOrReply(c,c->argv[i],ids+id_idx,0) != C_OK)
+ goto cleanup;
+ }
+
+ /* Try to serve the client synchronously. */
+ size_t arraylen = 0;
+ void *arraylen_ptr = NULL;
+ for (int i = 0; i < streams_count; i++) {
+ robj *o = lookupKeyRead(c->db,c->argv[streams_arg+i]);
+ if (o == NULL) continue;
+ stream *s = o->ptr;
+ streamID *gt = ids+i; /* ID must be greater than this. */
+ int serve_synchronously = 0;
+
+ /* Check if there are the conditions to serve the client synchronously. */
+ if (groups) {
+ /* If the consumer is blocked on a group, we always serve it
+ * synchronously (serving its local history) if the ID specified
+ * was not the special ">" ID. */
+ if (gt->ms != UINT64_MAX ||
+ gt->seq != UINT64_MAX)
+ {
+ serve_synchronously = 1;
+ } else {
+ /* We also want to serve a consumer in a consumer group
+ * synchronously in case the group top item delivered is smaller
+ * than what the stream has inside. */
+ streamID *last = &groups[i]->last_id;
+ if (streamCompareID(&s->last_id, last) > 0) {
+ serve_synchronously = 1;
+ *gt = *last;
+ }
+ }
+ } else {
+ /* For consumers without a group, we serve synchronously if we can
+ * actually provide at least one item from the stream. */
+ if (streamCompareID(&s->last_id, gt) > 0) {
+ serve_synchronously = 1;
+ }
+ }
+
+ if (serve_synchronously) {
+ arraylen++;
+ if (arraylen == 1) arraylen_ptr = addDeferredMultiBulkLength(c);
+ /* streamReplyWithRange() handles the 'start' ID as inclusive,
+ * so start from the next ID, since we want only messages with
+ * IDs greater than start. */
+ streamID start = *gt;
+ start.seq++; /* uint64_t can't overflow in this context. */
+
+ /* Emit the two elements sub-array consisting of the name
+ * of the stream and the data we extracted from it. */
+ addReplyMultiBulkLen(c,2);
+ addReplyBulk(c,c->argv[streams_arg+i]);
+ streamConsumer *consumer = NULL;
+ if (groups) consumer = streamLookupConsumer(groups[i],
+ consumername->ptr,1);
+ streamPropInfo spi = {c->argv[i+streams_arg],groupname};
+ streamReplyWithRange(c,s,&start,NULL,count,0,
+ groups ? groups[i] : NULL,
+ consumer, noack, &spi);
+ if (groups) server.dirty++;
+ }
+ }
+
+ /* We replied synchronously! Set the top array len and return to caller. */
+ if (arraylen) {
+ setDeferredMultiBulkLength(c,arraylen_ptr,arraylen);
+ goto cleanup;
+ }
+
+ /* Block if needed. */
+ if (timeout != -1) {
+ /* If we are inside a MULTI/EXEC and the list is empty the only thing
+ * we can do is treating it as a timeout (even with timeout 0). */
+ if (c->flags & CLIENT_MULTI) {
+ addReply(c,shared.nullmultibulk);
+ goto cleanup;
+ }
+ blockForKeys(c, BLOCKED_STREAM, c->argv+streams_arg, streams_count,
+ timeout, NULL, ids);
+ /* If no COUNT is given and we block, set a relatively small count:
+ * in case the ID provided is too low, we do not want the server to
+ * block just to serve this client a huge stream of messages. */
+ c->bpop.xread_count = count ? count : XREAD_BLOCKED_DEFAULT_COUNT;
+
+ /* If this is a XREADGROUP + GROUP we need to remember for which
+ * group and consumer name we are blocking, so later when one of the
+ * keys receive more data, we can call streamReplyWithRange() passing
+ * the right arguments. */
+ if (groupname) {
+ incrRefCount(groupname);
+ incrRefCount(consumername);
+ c->bpop.xread_group = groupname;
+ c->bpop.xread_consumer = consumername;
+ c->bpop.xread_group_noack = noack;
+ } else {
+ c->bpop.xread_group = NULL;
+ c->bpop.xread_consumer = NULL;
+ }
+ goto cleanup;
+ }
+
+ /* No BLOCK option, nor any stream we can serve. Reply as with a
+ * timeout happened. */
+ addReply(c,shared.nullmultibulk);
+ /* Continue to cleanup... */
+
+cleanup: /* Cleanup. */
+
+ /* The command is propagated (in the READGROUP form) as a side effect
+ * of calling lower level APIs. So stop any implicit propagation. */
+ preventCommandPropagation(c);
+ if (ids != static_ids) zfree(ids);
+ zfree(groups);
+}
+
+/* -----------------------------------------------------------------------
+ * Low level implementation of consumer groups
+ * ----------------------------------------------------------------------- */
+
+/* Create a NACK entry setting the delivery count to 1 and the delivery
+ * time to the current time. The NACK consumer will be set to the one
+ * specified as argument of the function. */
+streamNACK *streamCreateNACK(streamConsumer *consumer) {
+ streamNACK *nack = zmalloc(sizeof(*nack));
+ nack->delivery_time = mstime();
+ nack->delivery_count = 1;
+ nack->consumer = consumer;
+ return nack;
+}
+
+/* Free a NACK entry. */
+void streamFreeNACK(streamNACK *na) {
+ zfree(na);
+}
+
+/* Free a consumer and associated data structures. Note that this function
+ * will not reassign the pending messages associated with this consumer
+ * nor will delete them from the stream, so when this function is called
+ * to delete a consumer, and not when the whole stream is destroyed, the caller
+ * should do some work before. */
+void streamFreeConsumer(streamConsumer *sc) {
+ raxFree(sc->pel); /* No value free callback: the PEL entries are shared
+ between the consumer and the main stream PEL. */
+ sdsfree(sc->name);
+ zfree(sc);
+}
+
+/* Create a new consumer group in the context of the stream 's', having the
+ * specified name and last server ID. If a consumer group with the same name
+ * already existed NULL is returned, otherwise the pointer to the consumer
+ * group is returned. */
+streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id) {
+ if (s->cgroups == NULL) s->cgroups = raxNew();
+ if (raxFind(s->cgroups,(unsigned char*)name,namelen) != raxNotFound)
+ return NULL;
+
+ streamCG *cg = zmalloc(sizeof(*cg));
+ cg->pel = raxNew();
+ cg->consumers = raxNew();
+ cg->last_id = *id;
+ raxInsert(s->cgroups,(unsigned char*)name,namelen,cg,NULL);
+ return cg;
+}
+
+/* Free a consumer group and all its associated data. */
+void streamFreeCG(streamCG *cg) {
+ raxFreeWithCallback(cg->pel,(void(*)(void*))streamFreeNACK);
+ raxFreeWithCallback(cg->consumers,(void(*)(void*))streamFreeConsumer);
+ zfree(cg);
+}
+
+/* Lookup the consumer group in the specified stream and returns its
+ * pointer, otherwise if there is no such group, NULL is returned. */
+streamCG *streamLookupCG(stream *s, sds groupname) {
+ if (s->cgroups == NULL) return NULL;
+ streamCG *cg = raxFind(s->cgroups,(unsigned char*)groupname,
+ sdslen(groupname));
+ return (cg == raxNotFound) ? NULL : cg;
+}
+
+/* Lookup the consumer with the specified name in the group 'cg': if the
+ * consumer does not exist it is automatically created as a side effect
+ * of calling this function, otherwise its last seen time is updated and
+ * the existing consumer reference returned. */
+streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int create) {
+ streamConsumer *consumer = raxFind(cg->consumers,(unsigned char*)name,
+ sdslen(name));
+ if (consumer == raxNotFound) {
+ if (!create) return NULL;
+ consumer = zmalloc(sizeof(*consumer));
+ consumer->name = sdsdup(name);
+ consumer->pel = raxNew();
+ raxInsert(cg->consumers,(unsigned char*)name,sdslen(name),
+ consumer,NULL);
+ }
+ consumer->seen_time = mstime();
+ return consumer;
+}
+
+/* Delete the consumer specified in the consumer group 'cg'. The consumer
+ * may have pending messages: they are removed from the PEL, and the number
+ * of pending messages "lost" is returned. */
+uint64_t streamDelConsumer(streamCG *cg, sds name) {
+ streamConsumer *consumer = streamLookupConsumer(cg,name,0);
+ if (consumer == NULL) return 0;
+
+ uint64_t retval = raxSize(consumer->pel);
+
+ /* Iterate all the consumer pending messages, deleting every corresponding
+ * entry from the global entry. */
+ raxIterator ri;
+ raxStart(&ri,consumer->pel);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamNACK *nack = ri.data;
+ raxRemove(cg->pel,ri.key,ri.key_len,NULL);
+ streamFreeNACK(nack);
+ }
+ raxStop(&ri);
+
+ /* Deallocate the consumer. */
+ raxRemove(cg->consumers,(unsigned char*)name,sdslen(name),NULL);
+ streamFreeConsumer(consumer);
+ return retval;
+}
+
+/* -----------------------------------------------------------------------
+ * Consumer groups commands
+ * ----------------------------------------------------------------------- */
+
+/* XGROUP CREATE <key> <groupname> <id or $>
+ * XGROUP SETID <key> <id or $>
+ * XGROUP DESTROY <key> <groupname>
+ * XGROUP DELCONSUMER <key> <groupname> <consumername> */
+void xgroupCommand(client *c) {
+ const char *help[] = {
+"CREATE <key> <groupname> <id or $> -- Create a new consumer group.",
+"SETID <key> <groupname> <id or $> -- Set the current group ID.",
+"DESTROY <key> <groupname> -- Remove the specified group.",
+"DELCONSUMER <key> <groupname> <consumer> -- Remove the specified consumer.",
+"HELP -- Prints this help.",
+NULL
+ };
+ stream *s = NULL;
+ sds grpname = NULL;
+ streamCG *cg = NULL;
+ char *opt = c->argv[1]->ptr; /* Subcommand name. */
+
+ /* Lookup the key now, this is common for all the subcommands but HELP. */
+ if (c->argc >= 4) {
+ robj *o = lookupKeyWriteOrReply(c,c->argv[2],shared.nokeyerr);
+ if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
+ s = o->ptr;
+ grpname = c->argv[3]->ptr;
+
+ /* Certain subcommands require the group to exist. */
+ if ((cg = streamLookupCG(s,grpname)) == NULL &&
+ (!strcasecmp(opt,"SETID") ||
+ !strcasecmp(opt,"DELCONSUMER")))
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' "
+ "for key name '%s'",
+ (char*)grpname, (char*)c->argv[2]->ptr);
+ return;
+ }
+ }
+
+ /* Dispatch the different subcommands. */
+ if (!strcasecmp(opt,"CREATE") && c->argc == 5) {
+ streamID id;
+ if (!strcmp(c->argv[4]->ptr,"$")) {
+ id = s->last_id;
+ } else if (streamParseStrictIDOrReply(c,c->argv[4],&id,0) != C_OK) {
+ return;
+ }
+ streamCG *cg = streamCreateCG(s,grpname,sdslen(grpname),&id);
+ if (cg) {
+ addReply(c,shared.ok);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-create",
+ c->argv[2],c->db->id);
+ } else {
+ addReplySds(c,
+ sdsnew("-BUSYGROUP Consumer Group name already exists\r\n"));
+ }
+ } else if (!strcasecmp(opt,"SETID") && c->argc == 5) {
+ streamID id;
+ if (!strcmp(c->argv[4]->ptr,"$")) {
+ id = s->last_id;
+ } else if (streamParseIDOrReply(c,c->argv[4],&id,0) != C_OK) {
+ return;
+ }
+ cg->last_id = id;
+ addReply(c,shared.ok);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-setid",c->argv[2],c->db->id);
+ } else if (!strcasecmp(opt,"DESTROY") && c->argc == 4) {
+ if (cg) {
+ raxRemove(s->cgroups,(unsigned char*)grpname,sdslen(grpname),NULL);
+ streamFreeCG(cg);
+ addReply(c,shared.cone);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-destroy",
+ c->argv[2],c->db->id);
+ } else {
+ addReply(c,shared.czero);
+ }
+ } else if (!strcasecmp(opt,"DELCONSUMER") && c->argc == 5) {
+ /* Delete the consumer and returns the number of pending messages
+ * that were yet associated with such a consumer. */
+ long long pending = streamDelConsumer(cg,c->argv[4]->ptr);
+ addReplyLongLong(c,pending);
+ server.dirty++;
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xgroup-delconsumer",
+ c->argv[2],c->db->id);
+ } else if (!strcasecmp(opt,"HELP")) {
+ addReplyHelp(c, help);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
+/* XACK <key> <group> <id> <id> ... <id>
+ *
+ * Acknowledge a message as processed. In practical terms we just check the
+ * pendine entries list (PEL) of the group, and delete the PEL entry both from
+ * the group and the consumer (pending messages are referenced in both places).
+ *
+ * Return value of the command is the number of messages successfully
+ * acknowledged, that is, the IDs we were actually able to resolve in the PEL.
+ */
+void xackCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Nothing to ack. */
+ if (o == NULL || group == NULL) {
+ addReply(c,shared.czero);
+ return;
+ }
+
+ int acknowledged = 0;
+ for (int j = 3; j < c->argc; j++) {
+ streamID id;
+ unsigned char buf[sizeof(streamID)];
+ if (streamParseStrictIDOrReply(c,c->argv[j],&id,0) != C_OK) return;
+ streamEncodeID(buf,&id);
+
+ /* Lookup the ID in the group PEL: it will have a reference to the
+ * NACK structure that will have a reference to the consumer, so that
+ * we are able to remove the entry from both PELs. */
+ streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
+ if (nack != raxNotFound) {
+ raxRemove(group->pel,buf,sizeof(buf),NULL);
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ streamFreeNACK(nack);
+ acknowledged++;
+ server.dirty++;
+ }
+ }
+ addReplyLongLong(c,acknowledged);
+}
+
+/* XPENDING <key> <group> [<start> <stop> <count>] [<consumer>]
+ *
+ * If start and stop are omitted, the command just outputs information about
+ * the amount of pending messages for the key/group pair, together with
+ * the minimum and maxium ID of pending messages.
+ *
+ * If start and stop are provided instead, the pending messages are returned
+ * with informations about the current owner, number of deliveries and last
+ * delivery time and so forth. */
+void xpendingCommand(client *c) {
+ int justinfo = c->argc == 3; /* Without the range just outputs general
+ informations about the PEL. */
+ robj *key = c->argv[1];
+ robj *groupname = c->argv[2];
+ robj *consumername = (c->argc == 7) ? c->argv[6] : NULL;
+ streamID startid, endid;
+ long long count;
+
+ /* Start and stop, and the consumer, can be omitted. */
+ if (c->argc != 3 && c->argc != 6 && c->argc != 7) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+
+ /* Parse start/end/count arguments ASAP if needed, in order to report
+ * syntax errors before any other error. */
+ if (c->argc >= 6) {
+ if (getLongLongFromObjectOrReply(c,c->argv[5],&count,NULL) == C_ERR)
+ return;
+ if (streamParseIDOrReply(c,c->argv[3],&startid,0) == C_ERR)
+ return;
+ if (streamParseIDOrReply(c,c->argv[4],&endid,UINT64_MAX) == C_ERR)
+ return;
+ }
+
+ /* Lookup the key and the group inside the stream. */
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ streamCG *group;
+
+ if (o && checkType(c,o,OBJ_STREAM)) return;
+ if (o == NULL ||
+ (group = streamLookupCG(o->ptr,groupname->ptr)) == NULL)
+ {
+ addReplyErrorFormat(c, "-NOGROUP No such key '%s' or consumer "
+ "group '%s'",
+ (char*)key->ptr,(char*)groupname->ptr);
+ return;
+ }
+
+ /* XPENDING <key> <group> variant. */
+ if (justinfo) {
+ addReplyMultiBulkLen(c,4);
+ /* Total number of messages in the PEL. */
+ addReplyLongLong(c,raxSize(group->pel));
+ /* First and last IDs. */
+ if (raxSize(group->pel) == 0) {
+ addReply(c,shared.nullbulk); /* Start. */
+ addReply(c,shared.nullbulk); /* End. */
+ addReply(c,shared.nullmultibulk); /* Clients. */
+ } else {
+ /* Start. */
+ raxIterator ri;
+ raxStart(&ri,group->pel);
+ raxSeek(&ri,"^",NULL,0);
+ raxNext(&ri);
+ streamDecodeID(ri.key,&startid);
+ addReplyStreamID(c,&startid);
+
+ /* End. */
+ raxSeek(&ri,"$",NULL,0);
+ raxNext(&ri);
+ streamDecodeID(ri.key,&endid);
+ addReplyStreamID(c,&endid);
+ raxStop(&ri);
+
+ /* Consumers with pending messages. */
+ raxStart(&ri,group->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ void *arraylen_ptr = addDeferredMultiBulkLength(c);
+ size_t arraylen = 0;
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+ if (raxSize(consumer->pel) == 0) continue;
+ addReplyMultiBulkLen(c,2);
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ addReplyBulkLongLong(c,raxSize(consumer->pel));
+ arraylen++;
+ }
+ setDeferredMultiBulkLength(c,arraylen_ptr,arraylen);
+ raxStop(&ri);
+ }
+ }
+ /* XPENDING <key> <group> <start> <stop> <count> [<consumer>] variant. */
+ else {
+ streamConsumer *consumer = consumername ?
+ streamLookupConsumer(group,consumername->ptr,0):
+ NULL;
+
+ /* If a consumer name was mentioned but it does not exist, we can
+ * just return an empty array. */
+ if (consumername && consumer == NULL) {
+ addReplyMultiBulkLen(c,0);
+ return;
+ }
+
+ rax *pel = consumer ? consumer->pel : group->pel;
+ unsigned char startkey[sizeof(streamID)];
+ unsigned char endkey[sizeof(streamID)];
+ raxIterator ri;
+ mstime_t now = mstime();
+
+ streamEncodeID(startkey,&startid);
+ streamEncodeID(endkey,&endid);
+ raxStart(&ri,pel);
+ raxSeek(&ri,">=",startkey,sizeof(startkey));
+ void *arraylen_ptr = addDeferredMultiBulkLength(c);
+ size_t arraylen = 0;
+
+ while(count && raxNext(&ri) && memcmp(ri.key,endkey,ri.key_len) <= 0) {
+ streamNACK *nack = ri.data;
+
+ arraylen++;
+ count--;
+ addReplyMultiBulkLen(c,4);
+
+ /* Entry ID. */
+ streamID id;
+ streamDecodeID(ri.key,&id);
+ addReplyStreamID(c,&id);
+
+ /* Consumer name. */
+ addReplyBulkCBuffer(c,nack->consumer->name,
+ sdslen(nack->consumer->name));
+
+ /* Milliseconds elapsed since last delivery. */
+ mstime_t elapsed = now - nack->delivery_time;
+ if (elapsed < 0) elapsed = 0;
+ addReplyLongLong(c,elapsed);
+
+ /* Number of deliveries. */
+ addReplyLongLong(c,nack->delivery_count);
+ }
+ raxStop(&ri);
+ setDeferredMultiBulkLength(c,arraylen_ptr,arraylen);
+ }
+}
+
+/* XCLAIM <key> <group> <consumer> <min-idle-time> <ID-1> <ID-2>
+ * [IDLE <milliseconds>] [TIME <mstime>] [RETRYCOUNT <count>]
+ * [FORCE] [JUSTID]
+ *
+ * Gets ownership of one or multiple messages in the Pending Entries List
+ * of a given stream consumer group.
+ *
+ * If the message ID (among the specified ones) exists, and its idle
+ * time greater or equal to <min-idle-time>, then the message new owner
+ * becomes the specified <consumer>. If the minimum idle time specified
+ * is zero, messages are claimed regardless of their idle time.
+ *
+ * All the messages that cannot be found inside the pending entries list
+ * are ignored, but in case the FORCE option is used. In that case we
+ * create the NACK (representing a not yet acknowledged message) entry in
+ * the consumer group PEL.
+ *
+ * This command creates the consumer as side effect if it does not yet
+ * exists. Moreover the command reset the idle time of the message to 0,
+ * even if by using the IDLE or TIME options, the user can control the
+ * new idle time.
+ *
+ * The options at the end can be used in order to specify more attributes
+ * to set in the representation of the pending message:
+ *
+ * 1. IDLE <ms>:
+ * Set the idle time (last time it was delivered) of the message.
+ * If IDLE is not specified, an IDLE of 0 is assumed, that is,
+ * the time count is reset because the message has now a new
+ * owner trying to process it.
+ *
+ * 2. TIME <ms-unix-time>:
+ * This is the same as IDLE but instead of a relative amount of
+ * milliseconds, it sets the idle time to a specific unix time
+ * (in milliseconds). This is useful in order to rewrite the AOF
+ * file generating XCLAIM commands.
+ *
+ * 3. RETRYCOUNT <count>:
+ * Set the retry counter to the specified value. This counter is
+ * incremented every time a message is delivered again. Normally
+ * XCLAIM does not alter this counter, which is just served to clients
+ * when the XPENDING command is called: this way clients can detect
+ * anomalies, like messages that are never processed for some reason
+ * after a big number of delivery attempts.
+ *
+ * 4. FORCE:
+ * Creates the pending message entry in the PEL even if certain
+ * specified IDs are not already in the PEL assigned to a different
+ * client. However the message must be exist in the stream, otherwise
+ * the IDs of non existing messages are ignored.
+ *
+ * 5. JUSTID:
+ * Return just an array of IDs of messages successfully claimed,
+ * without returning the actual message.
+ *
+ * The command returns an array of messages that the user
+ * successfully claimed, so that the caller is able to understand
+ * what messages it is now in charge of. */
+void xclaimCommand(client *c) {
+ streamCG *group = NULL;
+ robj *o = lookupKeyRead(c->db,c->argv[1]);
+ long long minidle; /* Minimum idle time argument. */
+ long long retrycount = -1; /* -1 means RETRYCOUNT option not given. */
+ mstime_t deliverytime = -1; /* -1 means IDLE/TIME options not given. */
+ int force = 0;
+ int justid = 0;
+
+ if (o) {
+ if (checkType(c,o,OBJ_STREAM)) return; /* Type error. */
+ group = streamLookupCG(o->ptr,c->argv[2]->ptr);
+ }
+
+ /* No key or group? Send an error given that the group creation
+ * is mandatory. */
+ if (o == NULL || group == NULL) {
+ addReplyErrorFormat(c,"-NOGROUP No such key '%s' or "
+ "consumer group '%s'", (char*)c->argv[1]->ptr,
+ (char*)c->argv[2]->ptr);
+ return;
+ }
+
+ if (getLongLongFromObjectOrReply(c,c->argv[4],&minidle,
+ "Invalid min-idle-time argument for XCLAIM")
+ != C_OK) return;
+ if (minidle < 0) minidle = 0;
+
+ /* Start parsing the IDs, so that we abort ASAP if there is a syntax
+ * error: the return value of this command cannot be an error in case
+ * the client successfully claimed some message, so it should be
+ * executed in a "all or nothing" fashion. */
+ int j;
+ for (j = 5; j < c->argc; j++) {
+ streamID id;
+ if (streamParseStrictIDOrReply(NULL,c->argv[j],&id,0) != C_OK) break;
+ }
+ int last_id_arg = j-1; /* Next time we iterate the IDs we now the range. */
+
+ /* If we stopped because some IDs cannot be parsed, perhaps they
+ * are trailing options. */
+ time_t now = mstime();
+ for (; j < c->argc; j++) {
+ int moreargs = (c->argc-1) - j; /* Number of additional arguments. */
+ char *opt = c->argv[j]->ptr;
+ if (!strcasecmp(opt,"FORCE")) {
+ force = 1;
+ } else if (!strcasecmp(opt,"JUSTID")) {
+ justid = 1;
+ } else if (!strcasecmp(opt,"IDLE") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime,
+ "Invalid IDLE option argument for XCLAIM")
+ != C_OK) return;
+ deliverytime = now - deliverytime;
+ } else if (!strcasecmp(opt,"TIME") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&deliverytime,
+ "Invalid TIME option argument for XCLAIM")
+ != C_OK) return;
+ } else if (!strcasecmp(opt,"RETRYCOUNT") && moreargs) {
+ j++;
+ if (getLongLongFromObjectOrReply(c,c->argv[j],&retrycount,
+ "Invalid RETRYCOUNT option argument for XCLAIM")
+ != C_OK) return;
+ } else {
+ addReplyErrorFormat(c,"Unrecognized XCLAIM option '%s'",opt);
+ return;
+ }
+ }
+
+ if (deliverytime != -1) {
+ /* If a delivery time was passed, either with IDLE or TIME, we
+ * do some sanity check on it, and set the deliverytime to now
+ * (which is a sane choice usually) if the value is bogus.
+ * To raise an error here is not wise because clients may compute
+ * the idle time doing some math startin from their local time,
+ * and this is not a good excuse to fail in case, for instance,
+ * the computed time is a bit in the future from our POV. */
+ if (deliverytime < 0 || deliverytime > now) deliverytime = now;
+ } else {
+ /* If no IDLE/TIME option was passed, we want the last delivery
+ * time to be now, so that the idle time of the message will be
+ * zero. */
+ deliverytime = now;
+ }
+
+ /* Do the actual claiming. */
+ streamConsumer *consumer = streamLookupConsumer(group,c->argv[3]->ptr,1);
+ void *arraylenptr = addDeferredMultiBulkLength(c);
+ size_t arraylen = 0;
+ for (int j = 5; j <= last_id_arg; j++) {
+ streamID id;
+ unsigned char buf[sizeof(streamID)];
+ if (streamParseStrictIDOrReply(c,c->argv[j],&id,0) != C_OK) return;
+ streamEncodeID(buf,&id);
+
+ /* Lookup the ID in the group PEL. */
+ streamNACK *nack = raxFind(group->pel,buf,sizeof(buf));
+
+ /* If FORCE is passed, let's check if at least the entry
+ * exists in the Stream. In such case, we'll crate a new
+ * entry in the PEL from scratch, so that XCLAIM can also
+ * be used to create entries in the PEL. Useful for AOF
+ * and replication of consumer groups. */
+ if (force && nack == raxNotFound) {
+ streamIterator myiterator;
+ streamIteratorStart(&myiterator,o->ptr,&id,&id,0);
+ int64_t numfields;
+ int found = 0;
+ streamID item_id;
+ if (streamIteratorGetID(&myiterator,&item_id,&numfields)) found = 1;
+ streamIteratorStop(&myiterator);
+
+ /* Item must exist for us to create a NACK for it. */
+ if (!found) continue;
+
+ /* Create the NACK. */
+ nack = streamCreateNACK(NULL);
+ raxInsert(group->pel,buf,sizeof(buf),nack,NULL);
+ }
+
+ if (nack != raxNotFound) {
+ /* We need to check if the minimum idle time requested
+ * by the caller is satisfied by this entry. */
+ if (minidle) {
+ mstime_t this_idle = now - nack->delivery_time;
+ if (this_idle < minidle) continue;
+ }
+ /* Remove the entry from the old consumer.
+ * Note that nack->consumer is NULL if we created the
+ * NACK above because of the FORCE option. */
+ if (nack->consumer)
+ raxRemove(nack->consumer->pel,buf,sizeof(buf),NULL);
+ /* Update the consumer and idle time. */
+ nack->consumer = consumer;
+ nack->delivery_time = deliverytime;
+ /* Set the delivery attempts counter if given. */
+ if (retrycount >= 0) nack->delivery_count = retrycount;
+ /* Add the entry in the new consumer local PEL. */
+ raxInsert(consumer->pel,buf,sizeof(buf),nack,NULL);
+ /* Send the reply for this entry. */
+ if (justid) {
+ addReplyStreamID(c,&id);
+ } else {
+ streamReplyWithRange(c,o->ptr,&id,NULL,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ }
+ arraylen++;
+
+ /* Propagate this change. */
+ streamPropagateXCLAIM(c,c->argv[1],c->argv[3],c->argv[j],nack);
+ server.dirty++;
+ }
+ }
+ setDeferredMultiBulkLength(c,arraylenptr,arraylen);
+ preventCommandPropagation(c);
+}
+
+
+/* XDEL <key> [<ID1> <ID2> ... <IDN>]
+ *
+ * Removes the specified entries from the stream. Returns the number
+ * of items actually deleted, that may be different from the number
+ * of IDs passed in case certain IDs do not exist. */
+void xdelCommand(client *c) {
+ robj *o;
+
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+
+ /* We need to sanity check the IDs passed to start. Even if not
+ * a big issue, it is not great that the command is only partially
+ * executed because at some point an invalid ID is parsed. */
+ streamID id;
+ for (int j = 2; j < c->argc; j++) {
+ if (streamParseStrictIDOrReply(c,c->argv[j],&id,0) != C_OK) return;
+ }
+
+ /* Actually apply the command. */
+ int deleted = 0;
+ for (int j = 2; j < c->argc; j++) {
+ streamParseStrictIDOrReply(c,c->argv[j],&id,0); /* Retval already checked. */
+ deleted += streamDeleteItem(s,&id);
+ }
+
+ /* Propagate the write if needed. */
+ if (deleted) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+/* General form: XTRIM <key> [... options ...]
+ *
+ * List of options:
+ *
+ * MAXLEN [~] <count> -- Trim so that the stream will be capped at
+ * the specified length. Use ~ before the
+ * count in order to demand approximated trimming
+ * (like XADD MAXLEN option).
+ */
+
+#define TRIM_STRATEGY_NONE 0
+#define TRIM_STRATEGY_MAXLEN 1
+void xtrimCommand(client *c) {
+ robj *o;
+
+ /* If the key does not exist, we are ok returning zero, that is, the
+ * number of elements removed from the stream. */
+ if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL
+ || checkType(c,o,OBJ_STREAM)) return;
+ stream *s = o->ptr;
+
+ /* Argument parsing. */
+ int trim_strategy = TRIM_STRATEGY_NONE;
+ long long maxlen = 0; /* 0 means no maximum length. */
+ int approx_maxlen = 0; /* If 1 only delete whole radix tree nodes, so
+ the maxium length is not applied verbatim. */
+
+ /* Parse options. */
+ int i = 2; /* Start of options. */
+ for (; i < c->argc; i++) {
+ int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
+ char *opt = c->argv[i]->ptr;
+ if (!strcasecmp(opt,"maxlen") && moreargs) {
+ trim_strategy = TRIM_STRATEGY_MAXLEN;
+ char *next = c->argv[i+1]->ptr;
+ /* Check for the form MAXLEN ~ <count>. */
+ if (moreargs >= 2 && next[0] == '~' && next[1] == '\0') {
+ approx_maxlen = 1;
+ i++;
+ }
+ if (getLongLongFromObjectOrReply(c,c->argv[i+1],&maxlen,NULL)
+ != C_OK) return;
+ i++;
+ } else {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ }
+
+ /* Perform the trimming. */
+ int64_t deleted = 0;
+ if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
+ deleted = streamTrimByLength(s,maxlen,approx_maxlen);
+ } else {
+ addReplyError(c,"XTRIM called without an option to trim the stream");
+ return;
+ }
+
+ /* Propagate the write if needed. */
+ if (deleted) {
+ signalModifiedKey(c->db,c->argv[1]);
+ notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id);
+ server.dirty += deleted;
+ }
+ addReplyLongLong(c,deleted);
+}
+
+/* XINFO CONSUMERS key group
+ * XINFO GROUPS <key>
+ * XINFO STREAM <key>
+ * XINFO HELP. */
+void xinfoCommand(client *c) {
+ const char *help[] = {
+"CONSUMERS <key> <groupname> -- Show consumer groups of group <groupname>.",
+"GROUPS <key> -- Show the stream consumer groups.",
+"STREAM <key> -- Show information about the stream.",
+"HELP -- Print this help.",
+NULL
+ };
+ stream *s = NULL;
+ char *opt;
+ robj *key;
+
+ /* HELP is special. Handle it ASAP. */
+ if (!strcasecmp(c->argv[1]->ptr,"HELP")) {
+ addReplyHelp(c, help);
+ return;
+ } else if (c->argc < 3) {
+ addReplyError(c,"syntax error, try 'XINFO HELP'");
+ return;
+ }
+
+ /* With the exception of HELP handled before any other sub commands, all
+ * the ones are in the form of "<subcommand> <key>". */
+ opt = c->argv[1]->ptr;
+ key = c->argv[2];
+
+ /* Lookup the key now, this is common for all the subcommands but HELP. */
+ robj *o = lookupKeyWriteOrReply(c,key,shared.nokeyerr);
+ if (o == NULL || checkType(c,o,OBJ_STREAM)) return;
+ s = o->ptr;
+
+ /* Dispatch the different subcommands. */
+ if (!strcasecmp(opt,"CONSUMERS") && c->argc == 4) {
+ /* XINFO CONSUMERS <key> <group>. */
+ streamCG *cg = streamLookupCG(s,c->argv[3]->ptr);
+ if (cg == NULL) {
+ addReplyErrorFormat(c, "-NOGROUP No such consumer group '%s' "
+ "for key name '%s'",
+ (char*)c->argv[3]->ptr, (char*)key->ptr);
+ return;
+ }
+
+ addReplyMultiBulkLen(c,raxSize(cg->consumers));
+ raxIterator ri;
+ raxStart(&ri,cg->consumers);
+ raxSeek(&ri,"^",NULL,0);
+ mstime_t now = mstime();
+ while(raxNext(&ri)) {
+ streamConsumer *consumer = ri.data;
+ mstime_t idle = now - consumer->seen_time;
+ if (idle < 0) idle = 0;
+
+ addReplyMultiBulkLen(c,6);
+ addReplyStatus(c,"name");
+ addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name));
+ addReplyStatus(c,"pending");
+ addReplyLongLong(c,raxSize(consumer->pel));
+ addReplyStatus(c,"idle");
+ addReplyLongLong(c,idle);
+ }
+ raxStop(&ri);
+ } else if (!strcasecmp(opt,"GROUPS") && c->argc == 3) {
+ /* XINFO GROUPS <key>. */
+ if (s->cgroups == NULL) {
+ addReplyMultiBulkLen(c,0);
+ return;
+ }
+
+ addReplyMultiBulkLen(c,raxSize(s->cgroups));
+ raxIterator ri;
+ raxStart(&ri,s->cgroups);
+ raxSeek(&ri,"^",NULL,0);
+ while(raxNext(&ri)) {
+ streamCG *cg = ri.data;
+ addReplyMultiBulkLen(c,8);
+ addReplyStatus(c,"name");
+ addReplyBulkCBuffer(c,ri.key,ri.key_len);
+ addReplyStatus(c,"consumers");
+ addReplyLongLong(c,raxSize(cg->consumers));
+ addReplyStatus(c,"pending");
+ addReplyLongLong(c,raxSize(cg->pel));
+ addReplyStatus(c,"last-delivered-id");
+ addReplyStreamID(c,&cg->last_id);
+ }
+ raxStop(&ri);
+ } else if (!strcasecmp(opt,"STREAM") && c->argc == 3) {
+ /* XINFO STREAM <key> (or the alias XINFO <key>). */
+ addReplyMultiBulkLen(c,14);
+ addReplyStatus(c,"length");
+ addReplyLongLong(c,s->length);
+ addReplyStatus(c,"radix-tree-keys");
+ addReplyLongLong(c,raxSize(s->rax));
+ addReplyStatus(c,"radix-tree-nodes");
+ addReplyLongLong(c,s->rax->numnodes);
+ addReplyStatus(c,"groups");
+ addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0);
+ addReplyStatus(c,"last-generated-id");
+ addReplyStreamID(c,&s->last_id);
+
+ /* To emit the first/last entry we us the streamReplyWithRange()
+ * API. */
+ int count;
+ streamID start, end;
+ start.ms = start.seq = 0;
+ end.ms = end.seq = UINT64_MAX;
+ addReplyStatus(c,"first-entry");
+ count = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ if (!count) addReply(c,shared.nullbulk);
+ addReplyStatus(c,"last-entry");
+ count = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL,
+ STREAM_RWR_RAWENTRIES,NULL);
+ if (!count) addReply(c,shared.nullbulk);
+ } else {
+ addReplySubcommandSyntaxError(c);
+ }
+}
+
diff --git a/src/t_string.c b/src/t_string.c
index e3c1e5f4a..e121df73e 100644
--- a/src/t_string.c
+++ b/src/t_string.c
@@ -27,19 +27,19 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "redis.h"
+#include "server.h"
#include <math.h> /* isnan(), isinf() */
/*-----------------------------------------------------------------------------
* String Commands
*----------------------------------------------------------------------------*/
-static int checkStringLength(redisClient *c, long long size) {
+static int checkStringLength(client *c, long long size) {
if (size > 512*1024*1024) {
addReplyError(c,"string exceeds maximum allowed size (512MB)");
- return REDIS_ERR;
+ return C_ERR;
}
- return REDIS_OK;
+ return C_OK;
}
/* The setGenericCommand() function implements the SET operation with different
@@ -58,15 +58,17 @@ static int checkStringLength(redisClient *c, long long size) {
* If ok_reply is NULL "+OK" is used.
* If abort_reply is NULL, "$-1" is used. */
-#define REDIS_SET_NO_FLAGS 0
-#define REDIS_SET_NX (1<<0) /* Set if key not exists. */
-#define REDIS_SET_XX (1<<1) /* Set if key exists. */
+#define OBJ_SET_NO_FLAGS 0
+#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
+#define OBJ_SET_XX (1<<1) /* Set if key exists. */
+#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */
+#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */
-void setGenericCommand(redisClient *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
+void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
if (expire) {
- if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK)
return;
if (milliseconds <= 0) {
addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name);
@@ -75,45 +77,55 @@ void setGenericCommand(redisClient *c, int flags, robj *key, robj *val, robj *ex
if (unit == UNIT_SECONDS) milliseconds *= 1000;
}
- if ((flags & REDIS_SET_NX && lookupKeyWrite(c->db,key) != NULL) ||
- (flags & REDIS_SET_XX && lookupKeyWrite(c->db,key) == NULL))
+ if ((flags & OBJ_SET_NX && lookupKeyWrite(c->db,key) != NULL) ||
+ (flags & OBJ_SET_XX && lookupKeyWrite(c->db,key) == NULL))
{
addReply(c, abort_reply ? abort_reply : shared.nullbulk);
return;
}
setKey(c->db,key,val);
server.dirty++;
- if (expire) setExpire(c->db,key,mstime()+milliseconds);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",key,c->db->id);
- if (expire) notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,
+ if (expire) setExpire(c,c->db,key,mstime()+milliseconds);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
+ if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC,
"expire",key,c->db->id);
addReply(c, ok_reply ? ok_reply : shared.ok);
}
/* SET key value [NX] [XX] [EX <seconds>] [PX <milliseconds>] */
-void setCommand(redisClient *c) {
+void setCommand(client *c) {
int j;
robj *expire = NULL;
int unit = UNIT_SECONDS;
- int flags = REDIS_SET_NO_FLAGS;
+ int flags = OBJ_SET_NO_FLAGS;
for (j = 3; j < c->argc; j++) {
char *a = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((a[0] == 'n' || a[0] == 'N') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
- flags |= REDIS_SET_NX;
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_XX))
+ {
+ flags |= OBJ_SET_NX;
} else if ((a[0] == 'x' || a[0] == 'X') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0') {
- flags |= REDIS_SET_XX;
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_NX))
+ {
+ flags |= OBJ_SET_XX;
} else if ((a[0] == 'e' || a[0] == 'E') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_PX) && next)
+ {
+ flags |= OBJ_SET_EX;
unit = UNIT_SECONDS;
expire = next;
j++;
} else if ((a[0] == 'p' || a[0] == 'P') &&
- (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' && next) {
+ (a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
+ !(flags & OBJ_SET_EX) && next)
+ {
+ flags |= OBJ_SET_PX;
unit = UNIT_MILLISECONDS;
expire = next;
j++;
@@ -127,54 +139,54 @@ void setCommand(redisClient *c) {
setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
}
-void setnxCommand(redisClient *c) {
+void setnxCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
- setGenericCommand(c,REDIS_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
+ setGenericCommand(c,OBJ_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
}
-void setexCommand(redisClient *c) {
+void setexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,REDIS_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
}
-void psetexCommand(redisClient *c) {
+void psetexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
- setGenericCommand(c,REDIS_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
+ setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
}
-int getGenericCommand(redisClient *c) {
+int getGenericCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
- return REDIS_OK;
+ return C_OK;
- if (o->type != REDIS_STRING) {
+ if (o->type != OBJ_STRING) {
addReply(c,shared.wrongtypeerr);
- return REDIS_ERR;
+ return C_ERR;
} else {
addReplyBulk(c,o);
- return REDIS_OK;
+ return C_OK;
}
}
-void getCommand(redisClient *c) {
+void getCommand(client *c) {
getGenericCommand(c);
}
-void getsetCommand(redisClient *c) {
- if (getGenericCommand(c) == REDIS_ERR) return;
+void getsetCommand(client *c) {
+ if (getGenericCommand(c) == C_ERR) return;
c->argv[2] = tryObjectEncoding(c->argv[2]);
setKey(c->db,c->argv[1],c->argv[2]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[1],c->db->id);
server.dirty++;
}
-void setrangeCommand(redisClient *c) {
+void setrangeCommand(client *c) {
robj *o;
long offset;
sds value = c->argv[3]->ptr;
- if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != REDIS_OK)
+ if (getLongFromObjectOrReply(c,c->argv[2],&offset,NULL) != C_OK)
return;
if (offset < 0) {
@@ -191,16 +203,16 @@ void setrangeCommand(redisClient *c) {
}
/* Return when the resulting string exceeds allowed size */
- if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
+ if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
- o = createObject(REDIS_STRING,sdsempty());
+ o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+sdslen(value)));
dbAdd(c->db,c->argv[1],o);
} else {
size_t olen;
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
+ if (checkType(c,o,OBJ_STRING))
return;
/* Return existing string length when setting nothing */
@@ -211,7 +223,7 @@ void setrangeCommand(redisClient *c) {
}
/* Return when the resulting string exceeds allowed size */
- if (checkStringLength(c,offset+sdslen(value)) != REDIS_OK)
+ if (checkStringLength(c,offset+sdslen(value)) != C_OK)
return;
/* Create a copy when the object is shared or encoded. */
@@ -222,27 +234,27 @@ void setrangeCommand(redisClient *c) {
o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value));
memcpy((char*)o->ptr+offset,value,sdslen(value));
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,
+ notifyKeyspaceEvent(NOTIFY_STRING,
"setrange",c->argv[1],c->db->id);
server.dirty++;
}
addReplyLongLong(c,sdslen(o->ptr));
}
-void getrangeCommand(redisClient *c) {
+void getrangeCommand(client *c) {
robj *o;
long long start, end;
char *str, llbuf[32];
size_t strlen;
- if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK)
return;
- if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)
+ if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK)
return;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptybulk)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
- if (o->encoding == REDIS_ENCODING_INT) {
+ if (o->encoding == OBJ_ENCODING_INT) {
str = llbuf;
strlen = ll2string(llbuf,sizeof(llbuf),(long)o->ptr);
} else {
@@ -251,6 +263,10 @@ void getrangeCommand(redisClient *c) {
}
/* Convert negative indexes */
+ if (start < 0 && end < 0 && start > end) {
+ addReply(c,shared.emptybulk);
+ return;
+ }
if (start < 0) start = strlen+start;
if (end < 0) end = strlen+end;
if (start < 0) start = 0;
@@ -266,7 +282,7 @@ void getrangeCommand(redisClient *c) {
}
}
-void mgetCommand(redisClient *c) {
+void mgetCommand(client *c) {
int j;
addReplyMultiBulkLen(c,c->argc-1);
@@ -275,7 +291,7 @@ void mgetCommand(redisClient *c) {
if (o == NULL) {
addReply(c,shared.nullbulk);
} else {
- if (o->type != REDIS_STRING) {
+ if (o->type != OBJ_STRING) {
addReply(c,shared.nullbulk);
} else {
addReplyBulk(c,o);
@@ -284,7 +300,7 @@ void mgetCommand(redisClient *c) {
}
}
-void msetGenericCommand(redisClient *c, int nx) {
+void msetGenericCommand(client *c, int nx) {
int j, busykeys = 0;
if ((c->argc % 2) == 0) {
@@ -308,27 +324,27 @@ void msetGenericCommand(redisClient *c, int nx) {
for (j = 1; j < c->argc; j += 2) {
c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
setKey(c->db,c->argv[j],c->argv[j+1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"set",c->argv[j],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id);
}
server.dirty += (c->argc-1)/2;
addReply(c, nx ? shared.cone : shared.ok);
}
-void msetCommand(redisClient *c) {
+void msetCommand(client *c) {
msetGenericCommand(c,0);
}
-void msetnxCommand(redisClient *c) {
+void msetnxCommand(client *c) {
msetGenericCommand(c,1);
}
-void incrDecrCommand(redisClient *c, long long incr) {
+void incrDecrCommand(client *c, long long incr) {
long long value, oldvalue;
robj *o, *new;
o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL && checkType(c,o,REDIS_STRING)) return;
- if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ if (getLongLongFromObjectOrReply(c,o,&value,NULL) != C_OK) return;
oldvalue = value;
if ((incr < 0 && oldvalue < 0 && incr < (LLONG_MIN-oldvalue)) ||
@@ -338,14 +354,14 @@ void incrDecrCommand(redisClient *c, long long incr) {
}
value += incr;
- if (o && o->refcount == 1 && o->encoding == REDIS_ENCODING_INT &&
- (value < 0 || value >= REDIS_SHARED_INTEGERS) &&
+ if (o && o->refcount == 1 && o->encoding == OBJ_ENCODING_INT &&
+ (value < 0 || value >= OBJ_SHARED_INTEGERS) &&
value >= LONG_MIN && value <= LONG_MAX)
{
new = o;
o->ptr = (void*)((long)value);
} else {
- new = createStringObjectFromLongLong(value);
+ new = createStringObjectFromLongLongForValue(value);
if (o) {
dbOverwrite(c->db,c->argv[1],new);
} else {
@@ -353,43 +369,43 @@ void incrDecrCommand(redisClient *c, long long incr) {
}
}
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrby",c->argv[1],c->db->id);
server.dirty++;
addReply(c,shared.colon);
addReply(c,new);
addReply(c,shared.crlf);
}
-void incrCommand(redisClient *c) {
+void incrCommand(client *c) {
incrDecrCommand(c,1);
}
-void decrCommand(redisClient *c) {
+void decrCommand(client *c) {
incrDecrCommand(c,-1);
}
-void incrbyCommand(redisClient *c) {
+void incrbyCommand(client *c) {
long long incr;
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,incr);
}
-void decrbyCommand(redisClient *c) {
+void decrbyCommand(client *c) {
long long incr;
- if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
+ if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != C_OK) return;
incrDecrCommand(c,-incr);
}
-void incrbyfloatCommand(redisClient *c) {
+void incrbyfloatCommand(client *c) {
long double incr, value;
robj *o, *new, *aux;
o = lookupKeyWrite(c->db,c->argv[1]);
- if (o != NULL && checkType(c,o,REDIS_STRING)) return;
- if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != REDIS_OK ||
- getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != REDIS_OK)
+ if (o != NULL && checkType(c,o,OBJ_STRING)) return;
+ if (getLongDoubleFromObjectOrReply(c,o,&value,NULL) != C_OK ||
+ getLongDoubleFromObjectOrReply(c,c->argv[2],&incr,NULL) != C_OK)
return;
value += incr;
@@ -403,7 +419,7 @@ void incrbyfloatCommand(redisClient *c) {
else
dbAdd(c->db,c->argv[1],new);
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id);
server.dirty++;
addReplyBulk(c,new);
@@ -416,7 +432,7 @@ void incrbyfloatCommand(redisClient *c) {
rewriteClientCommandArgument(c,2,new);
}
-void appendCommand(redisClient *c) {
+void appendCommand(client *c) {
size_t totlen;
robj *o, *append;
@@ -429,13 +445,13 @@ void appendCommand(redisClient *c) {
totlen = stringObjectLen(c->argv[2]);
} else {
/* Key exists, check type */
- if (checkType(c,o,REDIS_STRING))
+ if (checkType(c,o,OBJ_STRING))
return;
/* "append" is an argument, so always an sds */
append = c->argv[2];
totlen = stringObjectLen(o)+sdslen(append->ptr);
- if (checkStringLength(c,totlen) != REDIS_OK)
+ if (checkStringLength(c,totlen) != C_OK)
return;
/* Append the value */
@@ -444,14 +460,14 @@ void appendCommand(redisClient *c) {
totlen = sdslen(o->ptr);
}
signalModifiedKey(c->db,c->argv[1]);
- notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"append",c->argv[1],c->db->id);
+ notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
server.dirty++;
addReplyLongLong(c,totlen);
}
-void strlenCommand(redisClient *c) {
+void strlenCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
- checkType(c,o,REDIS_STRING)) return;
+ checkType(c,o,OBJ_STRING)) return;
addReplyLongLong(c,stringObjectLen(o));
}
diff --git a/src/t_zset.c b/src/t_zset.c
index 64418c9b4..db381b592 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -38,9 +38,16 @@
*
* The elements are added to a hash table mapping Redis objects to scores.
* At the same time the elements are added to a skip list mapping scores
- * to Redis objects (so objects are sorted by scores in this "view"). */
-
-/* This skiplist implementation is almost a C translation of the original
+ * to Redis objects (so objects are sorted by scores in this "view").
+ *
+ * Note that the SDS string representing the element is the same in both
+ * the hash table and skiplist in order to save memory. What we do in order
+ * to manage the shared SDS string more easily is to free the SDS string
+ * only in zslFreeNode(). The dictionary has no value free method set.
+ * So we should always remove an element from the dictionary, and later from
+ * the skiplist.
+ *
+ * This skiplist implementation is almost a C translation of the original
* algorithm described by William Pugh in "Skip Lists: A Probabilistic
* Alternative to Balanced Trees", modified in three ways:
* a) this implementation allows for repeated scores.
@@ -49,19 +56,27 @@
* pointers being only at "level 1". This allows to traverse the list
* from tail to head, useful for ZREVRANGE. */
-#include "redis.h"
+#include "server.h"
#include <math.h>
-static int zslLexValueGteMin(robj *value, zlexrangespec *spec);
-static int zslLexValueLteMax(robj *value, zlexrangespec *spec);
+/*-----------------------------------------------------------------------------
+ * Skiplist implementation of the low level API
+ *----------------------------------------------------------------------------*/
+
+int zslLexValueGteMin(sds value, zlexrangespec *spec);
+int zslLexValueLteMax(sds value, zlexrangespec *spec);
-zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
- zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
+/* Create a skiplist node with the specified number of levels.
+ * The SDS string 'ele' is referenced by the node after the call. */
+zskiplistNode *zslCreateNode(int level, double score, sds ele) {
+ zskiplistNode *zn =
+ zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
zn->score = score;
- zn->obj = obj;
+ zn->ele = ele;
return zn;
}
+/* Create a new skiplist. */
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
@@ -79,11 +94,15 @@ zskiplist *zslCreate(void) {
return zsl;
}
+/* Free the specified skiplist node. The referenced SDS string representation
+ * of the element is freed too, unless node->ele is set to NULL before calling
+ * this function. */
void zslFreeNode(zskiplistNode *node) {
- decrRefCount(node->obj);
+ sdsfree(node->ele);
zfree(node);
}
+/* Free a whole skiplist. */
void zslFree(zskiplist *zsl) {
zskiplistNode *node = zsl->header->level[0].forward, *next;
@@ -107,29 +126,33 @@ int zslRandomLevel(void) {
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
-zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
+/* Insert a new node in the skiplist. Assumes the element does not already
+ * exist (up to the caller to enforce that). The skiplist takes ownership
+ * of the passed SDS string 'ele'. */
+zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
- redisAssert(!isnan(score));
+ serverAssert(!isnan(score));
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->level[i].forward &&
- (x->level[i].forward->score < score ||
- (x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,obj) < 0))) {
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
- /* we assume the key is not already inside, since we allow duplicated
- * scores, and the re-insertion of score and redis object should never
- * happen since the caller of zslInsert() should test in the hash table
- * if the element is already inside or not. */
+ /* we assume the element is not already inside, since we allow duplicated
+ * scores, reinserting the same element should never happen since the
+ * caller of zslInsert() should test in the hash table if the element is
+ * already inside or not. */
level = zslRandomLevel();
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
@@ -139,7 +162,7 @@ zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
}
zsl->level = level;
}
- x = zslCreateNode(level,score,obj);
+ x = zslCreateNode(level,score,ele);
for (i = 0; i < level; i++) {
x->level[i].forward = update[i]->level[i].forward;
update[i]->level[i].forward = x;
@@ -184,36 +207,103 @@ void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
zsl->length--;
}
-/* Delete an element with matching score/object from the skiplist. */
-int zslDelete(zskiplist *zsl, double score, robj *obj) {
+/* Delete an element with matching score/element from the skiplist.
+ * The function returns 1 if the node was found and deleted, otherwise
+ * 0 is returned.
+ *
+ * If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise
+ * it is not freed (but just unlinked) and *node is set to the node pointer,
+ * so that it is possible for the caller to reuse the node (including the
+ * referenced SDS string at node->ele). */
+int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
- (x->level[i].forward->score < score ||
- (x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,obj) < 0)))
+ (x->level[i].forward->score < score ||
+ (x->level[i].forward->score == score &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
x = x->level[i].forward;
+ }
update[i] = x;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
x = x->level[0].forward;
- if (x && score == x->score && equalStringObjects(x->obj,obj)) {
+ if (x && score == x->score && sdscmp(x->ele,ele) == 0) {
zslDeleteNode(zsl, x, update);
- zslFreeNode(x);
+ if (!node)
+ zslFreeNode(x);
+ else
+ *node = x;
return 1;
}
return 0; /* not found */
}
-static int zslValueGteMin(double value, zrangespec *spec) {
+/* Update the score of an elmenent inside the sorted set skiplist.
+ * Note that the element must exist and must match 'score'.
+ * This function does not update the score in the hash table side, the
+ * caller should take care of it.
+ *
+ * Note that this function attempts to just update the node, in case after
+ * the score update, the node would be exactly at the same position.
+ * Otherwise the skiplist is modified by removing and re-adding a new
+ * element, which is more costly.
+ *
+ * The function returns the updated element skiplist node pointer. */
+zskiplistNode *zslUpdateScore(zskiplist *zsl, double curscore, sds ele, double newscore) {
+ zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
+ int i;
+
+ /* We need to seek to element to update to start: this is useful anyway,
+ * we'll have to update or remove it. */
+ x = zsl->header;
+ for (i = zsl->level-1; i >= 0; i--) {
+ while (x->level[i].forward &&
+ (x->level[i].forward->score < curscore ||
+ (x->level[i].forward->score == curscore &&
+ sdscmp(x->level[i].forward->ele,ele) < 0)))
+ {
+ x = x->level[i].forward;
+ }
+ update[i] = x;
+ }
+
+ /* Jump to our element: note that this function assumes that the
+ * element with the matching score exists. */
+ x = x->level[0].forward;
+ serverAssert(x && curscore == x->score && sdscmp(x->ele,ele) == 0);
+
+ /* If the node, after the score update, would be still exactly
+ * at the same position, we can just update the score without
+ * actually removing and re-inserting the element in the skiplist. */
+ if ((x->backward == NULL || x->backward->score < newscore) &&
+ (x->level[0].forward == NULL || x->level[0].forward->score > newscore))
+ {
+ x->score = newscore;
+ return x;
+ }
+
+ /* No way to reuse the old node: we need to remove and insert a new
+ * one at a different place. */
+ zslDeleteNode(zsl, x, update);
+ zskiplistNode *newnode = zslInsert(zsl,newscore,x->ele);
+ /* We reused the old node x->ele SDS string, free the node now
+ * since zslInsert created a new one. */
+ x->ele = NULL;
+ zslFreeNode(x);
+ return newnode;
+}
+
+int zslValueGteMin(double value, zrangespec *spec) {
return spec->minex ? (value > spec->min) : (value >= spec->min);
}
-static int zslValueLteMax(double value, zrangespec *spec) {
+int zslValueLteMax(double value, zrangespec *spec) {
return spec->maxex ? (value < spec->max) : (value <= spec->max);
}
@@ -253,7 +343,7 @@ zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslValueLteMax(x->score,range)) return NULL;
@@ -278,7 +368,7 @@ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
}
/* This is an inner range, so this node cannot be NULL. */
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score >= min. */
if (!zslValueGteMin(x->score,range)) return NULL;
@@ -312,8 +402,8 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dic
{
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
removed++;
x = next;
}
@@ -329,7 +419,7 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
- !zslLexValueGteMin(x->level[i].forward->obj,range))
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
x = x->level[i].forward;
update[i] = x;
}
@@ -338,11 +428,11 @@ unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *di
x = x->level[0].forward;
/* Delete nodes while in range. */
- while (x && zslLexValueLteMax(x->obj,range)) {
+ while (x && zslLexValueLteMax(x->ele,range)) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
- zslFreeNode(x);
+ dictDelete(dict,x->ele);
+ zslFreeNode(x); /* Here is where x->ele is actually released. */
removed++;
x = next;
}
@@ -370,7 +460,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned
while (x && traversed <= end) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
- dictDelete(dict,x->obj);
+ dictDelete(dict,x->ele);
zslFreeNode(x);
removed++;
traversed++;
@@ -383,7 +473,7 @@ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned
* Returns 0 when the element cannot be found, rank otherwise.
* Note that the rank is 1-based due to the span of zsl->header to the
* first element. */
-unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
+unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
zskiplistNode *x;
unsigned long rank = 0;
int i;
@@ -393,13 +483,13 @@ unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
- compareStringObjects(x->level[i].forward->obj,o) <= 0))) {
+ sdscmp(x->level[i].forward->ele,ele) <= 0))) {
rank += x->level[i].span;
x = x->level[i].forward;
}
/* x might be equal to zsl->header, so test if obj is non-NULL */
- if (x->obj && equalStringObjects(x->obj,o)) {
+ if (x->ele && sdscmp(x->ele,ele) == 0) {
return rank;
}
}
@@ -435,32 +525,32 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
* by the "(" character, it's considered "open". For instance
* ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
* ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
- if (min->encoding == REDIS_ENCODING_INT) {
+ if (min->encoding == OBJ_ENCODING_INT) {
spec->min = (long)min->ptr;
} else {
if (((char*)min->ptr)[0] == '(') {
spec->min = strtod((char*)min->ptr+1,&eptr);
- if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
spec->minex = 1;
} else {
spec->min = strtod((char*)min->ptr,&eptr);
- if (eptr[0] != '\0' || isnan(spec->min)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
}
}
- if (max->encoding == REDIS_ENCODING_INT) {
+ if (max->encoding == OBJ_ENCODING_INT) {
spec->max = (long)max->ptr;
} else {
if (((char*)max->ptr)[0] == '(') {
spec->max = strtod((char*)max->ptr+1,&eptr);
- if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
spec->maxex = 1;
} else {
spec->max = strtod((char*)max->ptr,&eptr);
- if (eptr[0] != '\0' || isnan(spec->max)) return REDIS_ERR;
+ if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
}
}
- return REDIS_OK;
+ return C_OK;
}
/* ------------------------ Lexicographic ranges ---------------------------- */
@@ -472,91 +562,89 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
* + means the max string possible
*
* If the string is valid the *dest pointer is set to the redis object
- * that will be used for the comparision, and ex will be set to 0 or 1
- * respectively if the item is exclusive or inclusive. REDIS_OK will be
+ * that will be used for the comparison, and ex will be set to 0 or 1
+ * respectively if the item is exclusive or inclusive. C_OK will be
* returned.
*
- * If the string is not a valid range REDIS_ERR is returned, and the value
+ * If the string is not a valid range C_ERR is returned, and the value
* of *dest and *ex is undefined. */
-int zslParseLexRangeItem(robj *item, robj **dest, int *ex) {
+int zslParseLexRangeItem(robj *item, sds *dest, int *ex) {
char *c = item->ptr;
switch(c[0]) {
case '+':
- if (c[1] != '\0') return REDIS_ERR;
+ if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.maxstring;
- incrRefCount(shared.maxstring);
- return REDIS_OK;
+ return C_OK;
case '-':
- if (c[1] != '\0') return REDIS_ERR;
+ if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.minstring;
- incrRefCount(shared.minstring);
- return REDIS_OK;
+ return C_OK;
case '(':
*ex = 1;
- *dest = createStringObject(c+1,sdslen(c)-1);
- return REDIS_OK;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
case '[':
*ex = 0;
- *dest = createStringObject(c+1,sdslen(c)-1);
- return REDIS_OK;
+ *dest = sdsnewlen(c+1,sdslen(c)-1);
+ return C_OK;
default:
- return REDIS_ERR;
+ return C_ERR;
}
}
-/* Populate the rangespec according to the objects min and max.
+/* Free a lex range structure, must be called only after zelParseLexRange()
+ * populated the structure with success (C_OK returned). */
+void zslFreeLexRange(zlexrangespec *spec) {
+ if (spec->min != shared.minstring &&
+ spec->min != shared.maxstring) sdsfree(spec->min);
+ if (spec->max != shared.minstring &&
+ spec->max != shared.maxstring) sdsfree(spec->max);
+}
+
+/* Populate the lex rangespec according to the objects min and max.
*
- * Return REDIS_OK on success. On error REDIS_ERR is returned.
+ * Return C_OK on success. On error C_ERR is returned.
* When OK is returned the structure must be freed with zslFreeLexRange(),
* otherwise no release is needed. */
-static int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
+int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
/* The range can't be valid if objects are integer encoded.
* Every item must start with ( or [. */
- if (min->encoding == REDIS_ENCODING_INT ||
- max->encoding == REDIS_ENCODING_INT) return REDIS_ERR;
+ if (min->encoding == OBJ_ENCODING_INT ||
+ max->encoding == OBJ_ENCODING_INT) return C_ERR;
spec->min = spec->max = NULL;
- if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == REDIS_ERR ||
- zslParseLexRangeItem(max, &spec->max, &spec->maxex) == REDIS_ERR) {
- if (spec->min) decrRefCount(spec->min);
- if (spec->max) decrRefCount(spec->max);
- return REDIS_ERR;
+ if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == C_ERR ||
+ zslParseLexRangeItem(max, &spec->max, &spec->maxex) == C_ERR) {
+ zslFreeLexRange(spec);
+ return C_ERR;
} else {
- return REDIS_OK;
+ return C_OK;
}
}
-/* Free a lex range structure, must be called only after zelParseLexRange()
- * populated the structure with success (REDIS_OK returned). */
-void zslFreeLexRange(zlexrangespec *spec) {
- decrRefCount(spec->min);
- decrRefCount(spec->max);
-}
-
-/* This is just a wrapper to compareStringObjects() that is able to
+/* This is just a wrapper to sdscmp() that is able to
* handle shared.minstring and shared.maxstring as the equivalent of
* -inf and +inf for strings */
-int compareStringObjectsForLexRange(robj *a, robj *b) {
- if (a == b) return 0; /* This makes sure that we handle inf,inf and
- -inf,-inf ASAP. One special case less. */
+int sdscmplex(sds a, sds b) {
+ if (a == b) return 0;
if (a == shared.minstring || b == shared.maxstring) return -1;
if (a == shared.maxstring || b == shared.minstring) return 1;
- return compareStringObjects(a,b);
+ return sdscmp(a,b);
}
-static int zslLexValueGteMin(robj *value, zlexrangespec *spec) {
+int zslLexValueGteMin(sds value, zlexrangespec *spec) {
return spec->minex ?
- (compareStringObjectsForLexRange(value,spec->min) > 0) :
- (compareStringObjectsForLexRange(value,spec->min) >= 0);
+ (sdscmplex(value,spec->min) > 0) :
+ (sdscmplex(value,spec->min) >= 0);
}
-static int zslLexValueLteMax(robj *value, zlexrangespec *spec) {
+int zslLexValueLteMax(sds value, zlexrangespec *spec) {
return spec->maxex ?
- (compareStringObjectsForLexRange(value,spec->max) < 0) :
- (compareStringObjectsForLexRange(value,spec->max) <= 0);
+ (sdscmplex(value,spec->max) < 0) :
+ (sdscmplex(value,spec->max) <= 0);
}
/* Returns if there is a part of the zset is in the lex range. */
@@ -564,15 +652,15 @@ int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
/* Test for ranges that will always be empty. */
- if (compareStringObjectsForLexRange(range->min,range->max) > 1 ||
- (compareStringObjects(range->min,range->max) == 0 &&
+ if (sdscmplex(range->min,range->max) > 1 ||
+ (sdscmp(range->min,range->max) == 0 &&
(range->minex || range->maxex)))
return 0;
x = zsl->tail;
- if (x == NULL || !zslLexValueGteMin(x->obj,range))
+ if (x == NULL || !zslLexValueGteMin(x->ele,range))
return 0;
x = zsl->header->level[0].forward;
- if (x == NULL || !zslLexValueLteMax(x->obj,range))
+ if (x == NULL || !zslLexValueLteMax(x->ele,range))
return 0;
return 1;
}
@@ -590,16 +678,16 @@ zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
- !zslLexValueGteMin(x->level[i].forward->obj,range))
+ !zslLexValueGteMin(x->level[i].forward->ele,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score <= max. */
- if (!zslLexValueLteMax(x->obj,range)) return NULL;
+ if (!zslLexValueLteMax(x->ele,range)) return NULL;
return x;
}
@@ -616,15 +704,15 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *IN* range. */
while (x->level[i].forward &&
- zslLexValueLteMax(x->level[i].forward->obj,range))
+ zslLexValueLteMax(x->level[i].forward->ele,range))
x = x->level[i].forward;
}
/* This is an inner range, so this node cannot be NULL. */
- redisAssert(x != NULL);
+ serverAssert(x != NULL);
/* Check if score >= min. */
- if (!zslLexValueGteMin(x->obj,range)) return NULL;
+ if (!zslLexValueGteMin(x->ele,range)) return NULL;
return x;
}
@@ -639,8 +727,8 @@ double zzlGetScore(unsigned char *sptr) {
char buf[128];
double score;
- redisAssert(sptr != NULL);
- redisAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
+ serverAssert(sptr != NULL);
+ serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
memcpy(buf,vstr,vlen);
@@ -653,21 +741,19 @@ double zzlGetScore(unsigned char *sptr) {
return score;
}
-/* Return a ziplist element as a Redis string object.
- * This simple abstraction can be used to simplifies some code at the
- * cost of some performance. */
-robj *ziplistGetObject(unsigned char *sptr) {
+/* Return a ziplist element as an SDS string. */
+sds ziplistGetObject(unsigned char *sptr) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- redisAssert(sptr != NULL);
- redisAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
+ serverAssert(sptr != NULL);
+ serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
- return createStringObject((char*)vstr,vlen);
+ return sdsnewlen((char*)vstr,vlen);
} else {
- return createStringObjectFromLongLong(vlong);
+ return sdsfromlonglong(vlong);
}
}
@@ -679,7 +765,7 @@ int zzlCompareElements(unsigned char *eptr, unsigned char *cstr, unsigned int cl
unsigned char vbuf[32];
int minlen, cmp;
- redisAssert(ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssert(ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL) {
/* Store string representation of long long in buf. */
vlen = ll2string((char*)vbuf,sizeof(vbuf),vlong);
@@ -700,12 +786,12 @@ unsigned int zzlLength(unsigned char *zl) {
* NULL when there is no next entry. */
void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
unsigned char *_eptr, *_sptr;
- redisAssert(*eptr != NULL && *sptr != NULL);
+ serverAssert(*eptr != NULL && *sptr != NULL);
_eptr = ziplistNext(zl,*sptr);
if (_eptr != NULL) {
_sptr = ziplistNext(zl,_eptr);
- redisAssert(_sptr != NULL);
+ serverAssert(_sptr != NULL);
} else {
/* No next entry. */
_sptr = NULL;
@@ -719,12 +805,12 @@ void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
* set to NULL when there is no next entry. */
void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr) {
unsigned char *_eptr, *_sptr;
- redisAssert(*eptr != NULL && *sptr != NULL);
+ serverAssert(*eptr != NULL && *sptr != NULL);
_sptr = ziplistPrev(zl,*eptr);
if (_sptr != NULL) {
_eptr = ziplistPrev(zl,_sptr);
- redisAssert(_eptr != NULL);
+ serverAssert(_eptr != NULL);
} else {
/* No previous entry. */
_eptr = NULL;
@@ -752,7 +838,7 @@ int zzlIsInRange(unsigned char *zl, zrangespec *range) {
return 0;
p = ziplistIndex(zl,1); /* First score. */
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
score = zzlGetScore(p);
if (!zslValueLteMax(score,range))
return 0;
@@ -771,7 +857,7 @@ unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range) {
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
score = zzlGetScore(sptr);
if (zslValueGteMin(score,range)) {
@@ -799,7 +885,7 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
score = zzlGetScore(sptr);
if (zslValueLteMax(score,range)) {
@@ -813,7 +899,7 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
* When this returns NULL, we know there also is no element. */
sptr = ziplistPrev(zl,eptr);
if (sptr != NULL)
- redisAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
+ serverAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
else
eptr = NULL;
}
@@ -821,17 +907,17 @@ unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) {
return NULL;
}
-static int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec) {
- robj *value = ziplistGetObject(p);
+int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec) {
+ sds value = ziplistGetObject(p);
int res = zslLexValueGteMin(value,spec);
- decrRefCount(value);
+ sdsfree(value);
return res;
}
-static int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec) {
- robj *value = ziplistGetObject(p);
+int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec) {
+ sds value = ziplistGetObject(p);
int res = zslLexValueLteMax(value,spec);
- decrRefCount(value);
+ sdsfree(value);
return res;
}
@@ -841,8 +927,8 @@ int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) {
unsigned char *p;
/* Test for ranges that will always be empty. */
- if (compareStringObjectsForLexRange(range->min,range->max) > 1 ||
- (compareStringObjects(range->min,range->max) == 0 &&
+ if (sdscmplex(range->min,range->max) > 1 ||
+ (sdscmp(range->min,range->max) == 0 &&
(range->minex || range->maxex)))
return 0;
@@ -852,7 +938,7 @@ int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) {
return 0;
p = ziplistIndex(zl,0); /* First element. */
- redisAssert(p != NULL);
+ serverAssert(p != NULL);
if (!zzlLexValueLteMax(p,range))
return 0;
@@ -877,7 +963,7 @@ unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range) {
/* Move to next element. */
sptr = ziplistNext(zl,eptr); /* This element score. Skip it. */
- redisAssert(sptr != NULL);
+ serverAssert(sptr != NULL);
eptr = ziplistNext(zl,sptr); /* Next element. */
}
@@ -904,7 +990,7 @@ unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) {
* When this returns NULL, we know there also is no element. */
sptr = ziplistPrev(zl,eptr);
if (sptr != NULL)
- redisAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
+ serverAssert((eptr = ziplistPrev(zl,sptr)) != NULL);
else
eptr = NULL;
}
@@ -912,26 +998,22 @@ unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) {
return NULL;
}
-unsigned char *zzlFind(unsigned char *zl, robj *ele, double *score) {
+unsigned char *zzlFind(unsigned char *zl, sds ele, double *score) {
unsigned char *eptr = ziplistIndex(zl,0), *sptr;
- ele = getDecodedObject(ele);
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,ele,sptr != NULL);
+ serverAssert(sptr != NULL);
- if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr))) {
+ if (ziplistCompare(eptr,(unsigned char*)ele,sdslen(ele))) {
/* Matching element, pull out score. */
if (score != NULL) *score = zzlGetScore(sptr);
- decrRefCount(ele);
return eptr;
}
/* Move to next element. */
eptr = ziplistNext(zl,sptr);
}
-
- decrRefCount(ele);
return NULL;
}
@@ -946,41 +1028,38 @@ unsigned char *zzlDelete(unsigned char *zl, unsigned char *eptr) {
return zl;
}
-unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, robj *ele, double score) {
+unsigned char *zzlInsertAt(unsigned char *zl, unsigned char *eptr, sds ele, double score) {
unsigned char *sptr;
char scorebuf[128];
int scorelen;
size_t offset;
- redisAssertWithInfo(NULL,ele,sdsEncodedObject(ele));
scorelen = d2string(scorebuf,sizeof(scorebuf),score);
if (eptr == NULL) {
- zl = ziplistPush(zl,ele->ptr,sdslen(ele->ptr),ZIPLIST_TAIL);
+ zl = ziplistPush(zl,(unsigned char*)ele,sdslen(ele),ZIPLIST_TAIL);
zl = ziplistPush(zl,(unsigned char*)scorebuf,scorelen,ZIPLIST_TAIL);
} else {
/* Keep offset relative to zl, as it might be re-allocated. */
offset = eptr-zl;
- zl = ziplistInsert(zl,eptr,ele->ptr,sdslen(ele->ptr));
+ zl = ziplistInsert(zl,eptr,(unsigned char*)ele,sdslen(ele));
eptr = zl+offset;
/* Insert score after the element. */
- redisAssertWithInfo(NULL,ele,(sptr = ziplistNext(zl,eptr)) != NULL);
+ serverAssert((sptr = ziplistNext(zl,eptr)) != NULL);
zl = ziplistInsert(zl,sptr,(unsigned char*)scorebuf,scorelen);
}
-
return zl;
}
/* Insert (element,score) pair in ziplist. This function assumes the element is
* not yet present in the list. */
-unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
+unsigned char *zzlInsert(unsigned char *zl, sds ele, double score) {
unsigned char *eptr = ziplistIndex(zl,0), *sptr;
double s;
- ele = getDecodedObject(ele);
while (eptr != NULL) {
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,ele,sptr != NULL);
+ serverAssert(sptr != NULL);
s = zzlGetScore(sptr);
if (s > score) {
@@ -991,7 +1070,7 @@ unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
break;
} else if (s == score) {
/* Ensure lexicographical ordering for elements. */
- if (zzlCompareElements(eptr,ele->ptr,sdslen(ele->ptr)) > 0) {
+ if (zzlCompareElements(eptr,(unsigned char*)ele,sdslen(ele)) > 0) {
zl = zzlInsertAt(zl,eptr,ele,score);
break;
}
@@ -1004,8 +1083,6 @@ unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) {
/* Push on tail of list when it was not yet inserted. */
if (eptr == NULL)
zl = zzlInsertAt(zl,NULL,ele,score);
-
- decrRefCount(ele);
return zl;
}
@@ -1078,14 +1155,14 @@ unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsig
* Common sorted set API
*----------------------------------------------------------------------------*/
-unsigned int zsetLength(robj *zobj) {
- int length = -1;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+unsigned long zsetLength(const robj *zobj) {
+ unsigned long length = 0;
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
length = zzlLength(zobj->ptr);
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- length = ((zset*)zobj->ptr)->zsl->length;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ length = ((const zset*)zobj->ptr)->zsl->length;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
return length;
}
@@ -1093,52 +1170,50 @@ unsigned int zsetLength(robj *zobj) {
void zsetConvert(robj *zobj, int encoding) {
zset *zs;
zskiplistNode *node, *next;
- robj *ele;
+ sds ele;
double score;
if (zobj->encoding == encoding) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
unsigned int vlen;
long long vlong;
- if (encoding != REDIS_ENCODING_SKIPLIST)
- redisPanic("Unknown target encoding");
+ if (encoding != OBJ_ENCODING_SKIPLIST)
+ serverPanic("Unknown target encoding");
zs = zmalloc(sizeof(*zs));
zs->dict = dictCreate(&zsetDictType,NULL);
zs->zsl = zslCreate();
eptr = ziplistIndex(zl,0);
- redisAssertWithInfo(NULL,zobj,eptr != NULL);
+ serverAssertWithInfo(NULL,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(NULL,zobj,sptr != NULL);
+ serverAssertWithInfo(NULL,zobj,sptr != NULL);
while (eptr != NULL) {
score = zzlGetScore(sptr);
- redisAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL)
- ele = createStringObjectFromLongLong(vlong);
+ ele = sdsfromlonglong(vlong);
else
- ele = createStringObject((char*)vstr,vlen);
+ ele = sdsnewlen((char*)vstr,vlen);
- /* Has incremented refcount since it was just created. */
node = zslInsert(zs->zsl,score,ele);
- redisAssertWithInfo(NULL,zobj,dictAdd(zs->dict,ele,&node->score) == DICT_OK);
- incrRefCount(ele); /* Added to dictionary. */
+ serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
zzlNext(zl,&eptr,&sptr);
}
zfree(zobj->ptr);
zobj->ptr = zs;
- zobj->encoding = REDIS_ENCODING_SKIPLIST;
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ zobj->encoding = OBJ_ENCODING_SKIPLIST;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
unsigned char *zl = ziplistNew();
- if (encoding != REDIS_ENCODING_ZIPLIST)
- redisPanic("Unknown target encoding");
+ if (encoding != OBJ_ENCODING_ZIPLIST)
+ serverPanic("Unknown target encoding");
/* Approach similar to zslFree(), since we want to free the skiplist at
* the same time as creating the ziplist. */
@@ -1149,10 +1224,7 @@ void zsetConvert(robj *zobj, int encoding) {
zfree(zs->zsl);
while (node) {
- ele = getDecodedObject(node->obj);
- zl = zzlInsertAt(zl,NULL,ele,node->score);
- decrRefCount(ele);
-
+ zl = zzlInsertAt(zl,NULL,node->ele,node->score);
next = node->level[0].forward;
zslFreeNode(node);
node = next;
@@ -1160,9 +1232,298 @@ void zsetConvert(robj *zobj, int encoding) {
zfree(zs);
zobj->ptr = zl;
- zobj->encoding = REDIS_ENCODING_ZIPLIST;
+ zobj->encoding = OBJ_ENCODING_ZIPLIST;
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+}
+
+/* Convert the sorted set object into a ziplist if it is not already a ziplist
+ * and if the number of elements and the maximum element size is within the
+ * expected ranges. */
+void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) return;
+ zset *zset = zobj->ptr;
+
+ if (zset->zsl->length <= server.zset_max_ziplist_entries &&
+ maxelelen <= server.zset_max_ziplist_value)
+ zsetConvert(zobj,OBJ_ENCODING_ZIPLIST);
+}
+
+/* Return (by reference) the score of the specified member of the sorted set
+ * storing it into *score. If the element does not exist C_ERR is returned
+ * otherwise C_OK is returned and *score is correctly populated.
+ * If 'zobj' or 'member' is NULL, C_ERR is returned. */
+int zsetScore(robj *zobj, sds member, double *score) {
+ if (!zobj || !member) return C_ERR;
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ if (zzlFind(zobj->ptr, member, score) == NULL) return C_ERR;
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de = dictFind(zs->dict, member);
+ if (de == NULL) return C_ERR;
+ *score = *(double*)dictGetVal(de);
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return C_OK;
+}
+
+/* Add a new element or update the score of an existing element in a sorted
+ * set, regardless of its encoding.
+ *
+ * The set of flags change the command behavior. They are passed with an integer
+ * pointer since the function will clear the flags and populate them with
+ * other flags to indicate different conditions.
+ *
+ * The input flags are the following:
+ *
+ * ZADD_INCR: Increment the current element score by 'score' instead of updating
+ * the current element score. If the element does not exist, we
+ * assume 0 as previous score.
+ * ZADD_NX: Perform the operation only if the element does not exist.
+ * ZADD_XX: Perform the operation only if the element already exist.
+ *
+ * When ZADD_INCR is used, the new score of the element is stored in
+ * '*newscore' if 'newscore' is not NULL.
+ *
+ * The returned flags are the following:
+ *
+ * ZADD_NAN: The resulting score is not a number.
+ * ZADD_ADDED: The element was added (not present before the call).
+ * ZADD_UPDATED: The element score was updated.
+ * ZADD_NOP: No operation was performed because of NX or XX.
+ *
+ * Return value:
+ *
+ * The function returns 1 on success, and sets the appropriate flags
+ * ADDED or UPDATED to signal what happened during the operation (note that
+ * none could be set if we re-added an element using the same score it used
+ * to have, or in the case a zero increment is used).
+ *
+ * The function returns 0 on erorr, currently only when the increment
+ * produces a NAN condition, or when the 'score' value is NAN since the
+ * start.
+ *
+ * The commad as a side effect of adding a new element may convert the sorted
+ * set internal encoding from ziplist to hashtable+skiplist.
+ *
+ * Memory managemnet of 'ele':
+ *
+ * The function does not take ownership of the 'ele' SDS string, but copies
+ * it if needed. */
+int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore) {
+ /* Turn options into simple to check vars. */
+ int incr = (*flags & ZADD_INCR) != 0;
+ int nx = (*flags & ZADD_NX) != 0;
+ int xx = (*flags & ZADD_XX) != 0;
+ *flags = 0; /* We'll return our response flags. */
+ double curscore;
+
+ /* NaN as input is an error regardless of all the other parameters. */
+ if (isnan(score)) {
+ *flags = ZADD_NAN;
+ return 0;
+ }
+
+ /* Update the sorted set according to its encoding. */
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *flags |= ZADD_NAN;
+ return 0;
+ }
+ if (newscore) *newscore = score;
+ }
+
+ /* Remove and re-insert when score changed. */
+ if (score != curscore) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ *flags |= ZADD_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ /* Optimize: check if the element is too large or the list
+ * becomes too long *before* executing zzlInsert. */
+ zobj->ptr = zzlInsert(zobj->ptr,ele,score);
+ if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries)
+ zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
+ if (sdslen(ele) > server.zset_max_ziplist_value)
+ zsetConvert(zobj,OBJ_ENCODING_SKIPLIST);
+ if (newscore) *newscore = score;
+ *flags |= ZADD_ADDED;
+ return 1;
+ } else {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplistNode *znode;
+ dictEntry *de;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ /* NX? Return, same element already exists. */
+ if (nx) {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
+ curscore = *(double*)dictGetVal(de);
+
+ /* Prepare the score for the increment if needed. */
+ if (incr) {
+ score += curscore;
+ if (isnan(score)) {
+ *flags |= ZADD_NAN;
+ return 0;
+ }
+ if (newscore) *newscore = score;
+ }
+
+ /* Remove and re-insert when score changes. */
+ if (score != curscore) {
+ znode = zslUpdateScore(zs->zsl,curscore,ele,score);
+ /* Note that we did not removed the original element from
+ * the hash table representing the sorted set, so we just
+ * update the score. */
+ dictGetVal(de) = &znode->score; /* Update score ptr. */
+ *flags |= ZADD_UPDATED;
+ }
+ return 1;
+ } else if (!xx) {
+ ele = sdsdup(ele);
+ znode = zslInsert(zs->zsl,score,ele);
+ serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
+ *flags |= ZADD_ADDED;
+ if (newscore) *newscore = score;
+ return 1;
+ } else {
+ *flags |= ZADD_NOP;
+ return 1;
+ }
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* Never reached. */
+}
+
+/* Delete the element 'ele' from the sorted set, returning 1 if the element
+ * existed and was deleted, 0 otherwise (the element was not there). */
+int zsetDel(robj *zobj, sds ele) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *eptr;
+
+ if ((eptr = zzlFind(zobj->ptr,ele,NULL)) != NULL) {
+ zobj->ptr = zzlDelete(zobj->ptr,eptr);
+ return 1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ dictEntry *de;
+ double score;
+
+ de = dictUnlink(zs->dict,ele);
+ if (de != NULL) {
+ /* Get the score in order to delete from the skiplist later. */
+ score = *(double*)dictGetVal(de);
+
+ /* Delete from the hash table and later from the skiplist.
+ * Note that the order is important: deleting from the skiplist
+ * actually releases the SDS string representing the element,
+ * which is shared between the skiplist and the hash table, so
+ * we need to delete from the skiplist as the final step. */
+ dictFreeUnlinkedEntry(zs->dict,de);
+
+ /* Delete from skiplist. */
+ int retval = zslDelete(zs->zsl,score,ele,NULL);
+ serverAssert(retval);
+
+ if (htNeedsResize(zs->dict)) dictResize(zs->dict);
+ return 1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
+ }
+ return 0; /* No such element found. */
+}
+
+/* Given a sorted set object returns the 0-based rank of the object or
+ * -1 if the object does not exist.
+ *
+ * For rank we mean the position of the element in the sorted collection
+ * of elements. So the first element has rank 0, the second rank 1, and so
+ * forth up to length-1 elements.
+ *
+ * If 'reverse' is false, the rank is returned considering as first element
+ * the one with the lowest score. Otherwise if 'reverse' is non-zero
+ * the rank is computed considering as element with rank 0 the one with
+ * the highest score. */
+long zsetRank(robj *zobj, sds ele, int reverse) {
+ unsigned long llen;
+ unsigned long rank;
+
+ llen = zsetLength(zobj);
+
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+
+ eptr = ziplistIndex(zl,0);
+ serverAssert(eptr != NULL);
+ sptr = ziplistNext(zl,eptr);
+ serverAssert(sptr != NULL);
+
+ rank = 1;
+ while(eptr != NULL) {
+ if (ziplistCompare(eptr,(unsigned char*)ele,sdslen(ele)))
+ break;
+ rank++;
+ zzlNext(zl,&eptr,&sptr);
+ }
+
+ if (eptr != NULL) {
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ dictEntry *de;
+ double score;
+
+ de = dictFind(zs->dict,ele);
+ if (de != NULL) {
+ score = *(double*)dictGetVal(de);
+ rank = zslGetRank(zsl,score,ele);
+ /* Existing elements always have a rank. */
+ serverAssert(rank != 0);
+ if (reverse)
+ return llen-rank;
+ else
+ return rank-1;
+ } else {
+ return -1;
+ }
+ } else {
+ serverPanic("Unknown sorted set encoding");
}
}
@@ -1171,35 +1532,78 @@ void zsetConvert(robj *zobj, int encoding) {
*----------------------------------------------------------------------------*/
/* This generic command implements both ZADD and ZINCRBY. */
-void zaddGenericCommand(redisClient *c, int incr) {
+void zaddGenericCommand(client *c, int flags) {
static char *nanerr = "resulting score is not a number (NaN)";
robj *key = c->argv[1];
- robj *ele;
robj *zobj;
- robj *curobj;
- double score = 0, *scores = NULL, curscore = 0.0;
- int j, elements = (c->argc-2)/2;
- int added = 0, updated = 0;
-
- if (c->argc % 2) {
+ sds ele;
+ double score = 0, *scores = NULL;
+ int j, elements;
+ int scoreidx = 0;
+ /* The following vars are used in order to track what the command actually
+ * did during the execution, to reply to the client and to trigger the
+ * notification of keyspace change. */
+ int added = 0; /* Number of new elements added. */
+ int updated = 0; /* Number of elements with updated score. */
+ int processed = 0; /* Number of elements processed, may remain zero with
+ options like XX. */
+
+ /* Parse options. At the end 'scoreidx' is set to the argument position
+ * of the score of the first score-element pair. */
+ scoreidx = 2;
+ while(scoreidx < c->argc) {
+ char *opt = c->argv[scoreidx]->ptr;
+ if (!strcasecmp(opt,"nx")) flags |= ZADD_NX;
+ else if (!strcasecmp(opt,"xx")) flags |= ZADD_XX;
+ else if (!strcasecmp(opt,"ch")) flags |= ZADD_CH;
+ else if (!strcasecmp(opt,"incr")) flags |= ZADD_INCR;
+ else break;
+ scoreidx++;
+ }
+
+ /* Turn options into simple to check vars. */
+ int incr = (flags & ZADD_INCR) != 0;
+ int nx = (flags & ZADD_NX) != 0;
+ int xx = (flags & ZADD_XX) != 0;
+ int ch = (flags & ZADD_CH) != 0;
+
+ /* After the options, we expect to have an even number of args, since
+ * we expect any number of score-element pairs. */
+ elements = c->argc-scoreidx;
+ if (elements % 2 || !elements) {
addReply(c,shared.syntaxerr);
return;
}
+ elements /= 2; /* Now this holds the number of score-element pairs. */
+
+ /* Check for incompatible options. */
+ if (nx && xx) {
+ addReplyError(c,
+ "XX and NX options at the same time are not compatible");
+ return;
+ }
+
+ if (incr && elements > 1) {
+ addReplyError(c,
+ "INCR option supports a single increment-element pair");
+ return;
+ }
/* Start parsing all the scores, we need to emit any syntax error
* before executing additions to the sorted set, as the command should
* either execute fully or nothing at all. */
scores = zmalloc(sizeof(double)*elements);
for (j = 0; j < elements; j++) {
- if (getDoubleFromObjectOrReply(c,c->argv[2+j*2],&scores[j],NULL)
- != REDIS_OK) goto cleanup;
+ if (getDoubleFromObjectOrReply(c,c->argv[scoreidx+j*2],&scores[j],NULL)
+ != C_OK) goto cleanup;
}
/* Lookup the key and create the sorted set if does not exist. */
zobj = lookupKeyWrite(c->db,key);
if (zobj == NULL) {
+ if (xx) goto reply_to_client; /* No key + XX option: nothing to do. */
if (server.zset_max_ziplist_entries == 0 ||
- server.zset_max_ziplist_value < sdslen(c->argv[3]->ptr))
+ server.zset_max_ziplist_value < sdslen(c->argv[scoreidx+1]->ptr))
{
zobj = createZsetObject();
} else {
@@ -1207,167 +1611,78 @@ void zaddGenericCommand(redisClient *c, int incr) {
}
dbAdd(c->db,key,zobj);
} else {
- if (zobj->type != REDIS_ZSET) {
+ if (zobj->type != OBJ_ZSET) {
addReply(c,shared.wrongtypeerr);
goto cleanup;
}
}
for (j = 0; j < elements; j++) {
+ double newscore;
score = scores[j];
+ int retflags = flags;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *eptr;
-
- /* Prefer non-encoded element when dealing with ziplists. */
- ele = c->argv[3+j*2];
- if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
- if (incr) {
- score += curscore;
- if (isnan(score)) {
- addReplyError(c,nanerr);
- goto cleanup;
- }
- }
-
- /* Remove and re-insert when score changed. */
- if (score != curscore) {
- zobj->ptr = zzlDelete(zobj->ptr,eptr);
- zobj->ptr = zzlInsert(zobj->ptr,ele,score);
- server.dirty++;
- updated++;
- }
- } else {
- /* Optimize: check if the element is too large or the list
- * becomes too long *before* executing zzlInsert. */
- zobj->ptr = zzlInsert(zobj->ptr,ele,score);
- if (zzlLength(zobj->ptr) > server.zset_max_ziplist_entries)
- zsetConvert(zobj,REDIS_ENCODING_SKIPLIST);
- if (sdslen(ele->ptr) > server.zset_max_ziplist_value)
- zsetConvert(zobj,REDIS_ENCODING_SKIPLIST);
- server.dirty++;
- added++;
- }
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- zskiplistNode *znode;
- dictEntry *de;
-
- ele = c->argv[3+j*2] = tryObjectEncoding(c->argv[3+j*2]);
- de = dictFind(zs->dict,ele);
- if (de != NULL) {
- curobj = dictGetKey(de);
- curscore = *(double*)dictGetVal(de);
-
- if (incr) {
- score += curscore;
- if (isnan(score)) {
- addReplyError(c,nanerr);
- /* Don't need to check if the sorted set is empty
- * because we know it has at least one element. */
- goto cleanup;
- }
- }
-
- /* Remove and re-insert when score changed. We can safely
- * delete the key object from the skiplist, since the
- * dictionary still has a reference to it. */
- if (score != curscore) {
- redisAssertWithInfo(c,curobj,zslDelete(zs->zsl,curscore,curobj));
- znode = zslInsert(zs->zsl,score,curobj);
- incrRefCount(curobj); /* Re-inserted in skiplist. */
- dictGetVal(de) = &znode->score; /* Update score ptr. */
- server.dirty++;
- updated++;
- }
- } else {
- znode = zslInsert(zs->zsl,score,ele);
- incrRefCount(ele); /* Inserted in skiplist. */
- redisAssertWithInfo(c,NULL,dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
- incrRefCount(ele); /* Added to dictionary. */
- server.dirty++;
- added++;
- }
- } else {
- redisPanic("Unknown sorted set encoding");
+ ele = c->argv[scoreidx+1+j*2]->ptr;
+ int retval = zsetAdd(zobj, score, ele, &retflags, &newscore);
+ if (retval == 0) {
+ addReplyError(c,nanerr);
+ goto cleanup;
}
+ if (retflags & ZADD_ADDED) added++;
+ if (retflags & ZADD_UPDATED) updated++;
+ if (!(retflags & ZADD_NOP)) processed++;
+ score = newscore;
+ }
+ server.dirty += (added+updated);
+
+reply_to_client:
+ if (incr) { /* ZINCRBY or INCR option. */
+ if (processed)
+ addReplyDouble(c,score);
+ else
+ addReply(c,shared.nullbulk);
+ } else { /* ZADD. */
+ addReplyLongLong(c,ch ? added+updated : added);
}
- if (incr) /* ZINCRBY */
- addReplyDouble(c,score);
- else /* ZADD */
- addReplyLongLong(c,added);
cleanup:
zfree(scores);
if (added || updated) {
signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,
+ notifyKeyspaceEvent(NOTIFY_ZSET,
incr ? "zincr" : "zadd", key, c->db->id);
}
}
-void zaddCommand(redisClient *c) {
- zaddGenericCommand(c,0);
+void zaddCommand(client *c) {
+ zaddGenericCommand(c,ZADD_NONE);
}
-void zincrbyCommand(redisClient *c) {
- zaddGenericCommand(c,1);
+void zincrbyCommand(client *c) {
+ zaddGenericCommand(c,ZADD_INCR);
}
-void zremCommand(redisClient *c) {
+void zremCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
int deleted = 0, keyremoved = 0, j;
if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
-
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *eptr;
+ checkType(c,zobj,OBJ_ZSET)) return;
- for (j = 2; j < c->argc; j++) {
- if ((eptr = zzlFind(zobj->ptr,c->argv[j],NULL)) != NULL) {
- deleted++;
- zobj->ptr = zzlDelete(zobj->ptr,eptr);
- if (zzlLength(zobj->ptr) == 0) {
- dbDelete(c->db,key);
- keyremoved = 1;
- break;
- }
- }
- }
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- dictEntry *de;
- double score;
-
- for (j = 2; j < c->argc; j++) {
- de = dictFind(zs->dict,c->argv[j]);
- if (de != NULL) {
- deleted++;
-
- /* Delete from the skiplist */
- score = *(double*)dictGetVal(de);
- redisAssertWithInfo(c,c->argv[j],zslDelete(zs->zsl,score,c->argv[j]));
-
- /* Delete from the hash table */
- dictDelete(zs->dict,c->argv[j]);
- if (htNeedsResize(zs->dict)) dictResize(zs->dict);
- if (dictSize(zs->dict) == 0) {
- dbDelete(c->db,key);
- keyremoved = 1;
- break;
- }
- }
+ for (j = 2; j < c->argc; j++) {
+ if (zsetDel(zobj,c->argv[j]->ptr)) deleted++;
+ if (zsetLength(zobj) == 0) {
+ dbDelete(c->db,key);
+ keyremoved = 1;
+ break;
}
- } else {
- redisPanic("Unknown sorted set encoding");
}
if (deleted) {
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,"zrem",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
signalModifiedKey(c->db,key);
server.dirty += deleted;
}
@@ -1378,7 +1693,7 @@ void zremCommand(redisClient *c) {
#define ZRANGE_RANK 0
#define ZRANGE_SCORE 1
#define ZRANGE_LEX 2
-void zremrangeGenericCommand(redisClient *c, int rangetype) {
+void zremrangeGenericCommand(client *c, int rangetype) {
robj *key = c->argv[1];
robj *zobj;
int keyremoved = 0;
@@ -1389,16 +1704,16 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
/* Step 1: Parse the range. */
if (rangetype == ZRANGE_RANK) {
- if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK))
return;
} else if (rangetype == ZRANGE_SCORE) {
- if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
} else if (rangetype == ZRANGE_LEX) {
- if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
@@ -1406,7 +1721,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
/* Step 2: Lookup & range sanity checks if needed. */
if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) goto cleanup;
+ checkType(c,zobj,OBJ_ZSET)) goto cleanup;
if (rangetype == ZRANGE_RANK) {
/* Sanitize indexes. */
@@ -1425,7 +1740,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
}
/* Step 3: Perform the range deletion operation. */
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
switch(rangetype) {
case ZRANGE_RANK:
zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted);
@@ -1441,7 +1756,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
dbDelete(c->db,key);
keyremoved = 1;
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
switch(rangetype) {
case ZRANGE_RANK:
@@ -1460,16 +1775,16 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) {
keyremoved = 1;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
/* Step 4: Notifications and reply. */
if (deleted) {
char *event[3] = {"zremrangebyrank","zremrangebyscore","zremrangebylex"};
signalModifiedKey(c->db,key);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,event[rangetype],key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_ZSET,event[rangetype],key,c->db->id);
if (keyremoved)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
}
server.dirty += deleted;
addReplyLongLong(c,deleted);
@@ -1478,15 +1793,15 @@ cleanup:
if (rangetype == ZRANGE_LEX) zslFreeLexRange(&lexrange);
}
-void zremrangebyrankCommand(redisClient *c) {
+void zremrangebyrankCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_RANK);
}
-void zremrangebyscoreCommand(redisClient *c) {
+void zremrangebyscoreCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_SCORE);
}
-void zremrangebylexCommand(redisClient *c) {
+void zremrangebylexCommand(client *c) {
zremrangeGenericCommand(c,ZRANGE_LEX);
}
@@ -1531,7 +1846,7 @@ typedef struct {
* we already checked that "ell" holds a long long, or tried to convert another
* representation into a long long value. When this was successful,
* OPVAL_VALID_LL is set as well. */
-#define OPVAL_DIRTY_ROBJ 1
+#define OPVAL_DIRTY_SDS 1
#define OPVAL_DIRTY_LL 2
#define OPVAL_VALID_LL 4
@@ -1539,7 +1854,7 @@ typedef struct {
typedef struct {
int flags;
unsigned char _buf[32]; /* Private buffer. */
- robj *ele;
+ sds ele;
unsigned char *estr;
unsigned int elen;
long long ell;
@@ -1553,35 +1868,35 @@ void zuiInitIterator(zsetopsrc *op) {
if (op->subject == NULL)
return;
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
it->is.is = op->subject->ptr;
it->is.ii = 0;
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
it->ht.dict = op->subject->ptr;
it->ht.di = dictGetIterator(op->subject->ptr);
it->ht.de = dictNext(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
it->zl.zl = op->subject->ptr;
it->zl.eptr = ziplistIndex(it->zl.zl,0);
if (it->zl.eptr != NULL) {
it->zl.sptr = ziplistNext(it->zl.zl,it->zl.eptr);
- redisAssert(it->zl.sptr != NULL);
+ serverAssert(it->zl.sptr != NULL);
}
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
it->sl.zs = op->subject->ptr;
it->sl.node = it->sl.zs->zsl->header->level[0].forward;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1589,53 +1904,53 @@ void zuiClearIterator(zsetopsrc *op) {
if (op->subject == NULL)
return;
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
- REDIS_NOTUSED(it); /* skip */
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dictReleaseIterator(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
- REDIS_NOTUSED(it); /* skip */
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
- REDIS_NOTUSED(it); /* skip */
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
+ UNUSED(it); /* skip */
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
+ UNUSED(it); /* skip */
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
-int zuiLength(zsetopsrc *op) {
+unsigned long zuiLength(zsetopsrc *op) {
if (op->subject == NULL)
return 0;
- if (op->type == REDIS_SET) {
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->type == OBJ_SET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
return intsetLen(op->subject->ptr);
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dict *ht = op->subject->ptr;
return dictSize(ht);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ } else if (op->type == OBJ_ZSET) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
return zzlLength(op->subject->ptr);
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = op->subject->ptr;
return zs->zsl->length;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
@@ -1646,14 +1961,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
if (op->subject == NULL)
return 0;
- if (val->flags & OPVAL_DIRTY_ROBJ)
- decrRefCount(val->ele);
+ if (val->flags & OPVAL_DIRTY_SDS)
+ sdsfree(val->ele);
memset(val,0,sizeof(zsetopval));
- if (op->type == REDIS_SET) {
+ if (op->type == OBJ_SET) {
iterset *it = &op->iter.set;
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
int64_t ell;
if (!intsetGet(it->is.is,it->is.ii,&ell))
@@ -1663,7 +1978,7 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->is.ii++;
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
if (it->ht.de == NULL)
return 0;
val->ele = dictGetKey(it->ht.de);
@@ -1672,32 +1987,32 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->ht.de = dictNext(it->ht.di);
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
+ } else if (op->type == OBJ_ZSET) {
iterzset *it = &op->iter.zset;
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
/* No need to check both, but better be explicit. */
if (it->zl.eptr == NULL || it->zl.sptr == NULL)
return 0;
- redisAssert(ziplistGet(it->zl.eptr,&val->estr,&val->elen,&val->ell));
+ serverAssert(ziplistGet(it->zl.eptr,&val->estr,&val->elen,&val->ell));
val->score = zzlGetScore(it->zl.sptr);
/* Move to next element. */
zzlNext(it->zl.zl,&it->zl.eptr,&it->zl.sptr);
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
if (it->sl.node == NULL)
return 0;
- val->ele = it->sl.node->obj;
+ val->ele = it->sl.node->ele;
val->score = it->sl.node->score;
/* Move to next element. */
it->sl.node = it->sl.node->level[0].forward;
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
return 1;
}
@@ -1707,15 +2022,8 @@ int zuiLongLongFromValue(zsetopval *val) {
val->flags |= OPVAL_DIRTY_LL;
if (val->ele != NULL) {
- if (val->ele->encoding == REDIS_ENCODING_INT) {
- val->ell = (long)val->ele->ptr;
+ if (string2ll(val->ele,sdslen(val->ele),&val->ell))
val->flags |= OPVAL_VALID_LL;
- } else if (sdsEncodedObject(val->ele)) {
- if (string2ll(val->ele->ptr,sdslen(val->ele->ptr),&val->ell))
- val->flags |= OPVAL_VALID_LL;
- } else {
- redisPanic("Unsupported element encoding");
- }
} else if (val->estr != NULL) {
if (string2ll((char*)val->estr,val->elen,&val->ell))
val->flags |= OPVAL_VALID_LL;
@@ -1727,30 +2035,41 @@ int zuiLongLongFromValue(zsetopval *val) {
return val->flags & OPVAL_VALID_LL;
}
-robj *zuiObjectFromValue(zsetopval *val) {
+sds zuiSdsFromValue(zsetopval *val) {
if (val->ele == NULL) {
if (val->estr != NULL) {
- val->ele = createStringObject((char*)val->estr,val->elen);
+ val->ele = sdsnewlen((char*)val->estr,val->elen);
} else {
- val->ele = createStringObjectFromLongLong(val->ell);
+ val->ele = sdsfromlonglong(val->ell);
}
- val->flags |= OPVAL_DIRTY_ROBJ;
+ val->flags |= OPVAL_DIRTY_SDS;
}
return val->ele;
}
+/* This is different from zuiSdsFromValue since returns a new SDS string
+ * which is up to the caller to free. */
+sds zuiNewSdsFromValue(zsetopval *val) {
+ if (val->flags & OPVAL_DIRTY_SDS) {
+ /* We have already one to return! */
+ sds ele = val->ele;
+ val->flags &= ~OPVAL_DIRTY_SDS;
+ val->ele = NULL;
+ return ele;
+ } else if (val->ele) {
+ return sdsdup(val->ele);
+ } else if (val->estr) {
+ return sdsnewlen((char*)val->estr,val->elen);
+ } else {
+ return sdsfromlonglong(val->ell);
+ }
+}
+
int zuiBufferFromValue(zsetopval *val) {
if (val->estr == NULL) {
if (val->ele != NULL) {
- if (val->ele->encoding == REDIS_ENCODING_INT) {
- val->elen = ll2string((char*)val->_buf,sizeof(val->_buf),(long)val->ele->ptr);
- val->estr = val->_buf;
- } else if (sdsEncodedObject(val->ele)) {
- val->elen = sdslen(val->ele->ptr);
- val->estr = val->ele->ptr;
- } else {
- redisPanic("Unsupported element encoding");
- }
+ val->elen = sdslen(val->ele);
+ val->estr = (unsigned char*)val->ele;
} else {
val->elen = ll2string((char*)val->_buf,sizeof(val->_buf),val->ell);
val->estr = val->_buf;
@@ -1765,8 +2084,8 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
if (op->subject == NULL)
return 0;
- if (op->type == REDIS_SET) {
- if (op->encoding == REDIS_ENCODING_INTSET) {
+ if (op->type == OBJ_SET) {
+ if (op->encoding == OBJ_ENCODING_INTSET) {
if (zuiLongLongFromValue(val) &&
intsetFind(op->subject->ptr,val->ell))
{
@@ -1775,9 +2094,9 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
} else {
return 0;
}
- } else if (op->encoding == REDIS_ENCODING_HT) {
+ } else if (op->encoding == OBJ_ENCODING_HT) {
dict *ht = op->subject->ptr;
- zuiObjectFromValue(val);
+ zuiSdsFromValue(val);
if (dictFind(ht,val->ele) != NULL) {
*score = 1.0;
return 1;
@@ -1785,19 +2104,19 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
return 0;
}
} else {
- redisPanic("Unknown set encoding");
+ serverPanic("Unknown set encoding");
}
- } else if (op->type == REDIS_ZSET) {
- zuiObjectFromValue(val);
+ } else if (op->type == OBJ_ZSET) {
+ zuiSdsFromValue(val);
- if (op->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (op->encoding == OBJ_ENCODING_ZIPLIST) {
if (zzlFind(op->subject->ptr,val->ele,score) != NULL) {
/* Score is already set by zzlFind. */
return 1;
} else {
return 0;
}
- } else if (op->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (op->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = op->subject->ptr;
dictEntry *de;
if ((de = dictFind(zs->dict,val->ele)) != NULL) {
@@ -1807,15 +2126,19 @@ int zuiFind(zsetopsrc *op, zsetopval *val, double *score) {
return 0;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
} else {
- redisPanic("Unsupported type");
+ serverPanic("Unsupported type");
}
}
int zuiCompareByCardinality(const void *s1, const void *s2) {
- return zuiLength((zsetopsrc*)s1) - zuiLength((zsetopsrc*)s2);
+ unsigned long first = zuiLength((zsetopsrc*)s1);
+ unsigned long second = zuiLength((zsetopsrc*)s2);
+ if (first > second) return 1;
+ if (first < second) return -1;
+ return 0;
}
#define REDIS_AGGR_SUM 1
@@ -1836,25 +2159,37 @@ inline static void zunionInterAggregate(double *target, double val, int aggregat
*target = val > *target ? val : *target;
} else {
/* safety net */
- redisPanic("Unknown ZUNION/INTER aggregate type");
+ serverPanic("Unknown ZUNION/INTER aggregate type");
}
}
-void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
+uint64_t dictSdsHash(const void *key);
+int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
+
+dictType setAccumulatorDictType = {
+ dictSdsHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictSdsKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+void zunionInterGenericCommand(client *c, robj *dstkey, int op) {
int i, j;
long setnum;
int aggregate = REDIS_AGGR_SUM;
zsetopsrc *src;
zsetopval zval;
- robj *tmp;
- unsigned int maxelelen = 0;
+ sds tmp;
+ size_t maxelelen = 0;
robj *dstobj;
zset *dstzset;
zskiplistNode *znode;
int touched = 0;
/* expect setnum input keys to be given */
- if ((getLongFromObjectOrReply(c, c->argv[2], &setnum, NULL) != REDIS_OK))
+ if ((getLongFromObjectOrReply(c, c->argv[2], &setnum, NULL) != C_OK))
return;
if (setnum < 1) {
@@ -1874,7 +2209,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
for (i = 0, j = 3; i < setnum; i++, j++) {
robj *obj = lookupKeyWrite(c->db,c->argv[j]);
if (obj != NULL) {
- if (obj->type != REDIS_ZSET && obj->type != REDIS_SET) {
+ if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
zfree(src);
addReply(c,shared.wrongtypeerr);
return;
@@ -1896,17 +2231,21 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
int remaining = c->argc - j;
while (remaining) {
- if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) {
+ if (remaining >= (setnum + 1) &&
+ !strcasecmp(c->argv[j]->ptr,"weights"))
+ {
j++; remaining--;
for (i = 0; i < setnum; i++, j++, remaining--) {
if (getDoubleFromObjectOrReply(c,c->argv[j],&src[i].weight,
- "weight value is not a float") != REDIS_OK)
+ "weight value is not a float") != C_OK)
{
zfree(src);
return;
}
}
- } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) {
+ } else if (remaining >= 2 &&
+ !strcasecmp(c->argv[j]->ptr,"aggregate"))
+ {
j++; remaining--;
if (!strcasecmp(c->argv[j]->ptr,"sum")) {
aggregate = REDIS_AGGR_SUM;
@@ -1936,7 +2275,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
dstzset = dstobj->ptr;
memset(&zval, 0, sizeof(zval));
- if (op == REDIS_OP_INTER) {
+ if (op == SET_OP_INTER) {
/* Skip everything if the smallest input is empty. */
if (zuiLength(&src[0]) > 0) {
/* Precondition: as src[0] is non-empty and the inputs are ordered
@@ -1964,24 +2303,18 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
/* Only continue when present in every input. */
if (j == setnum) {
- tmp = zuiObjectFromValue(&zval);
+ tmp = zuiNewSdsFromValue(&zval);
znode = zslInsert(dstzset->zsl,score,tmp);
- incrRefCount(tmp); /* added to skiplist */
dictAdd(dstzset->dict,tmp,&znode->score);
- incrRefCount(tmp); /* added to dictionary */
-
- if (sdsEncodedObject(tmp)) {
- if (sdslen(tmp->ptr) > maxelelen)
- maxelelen = sdslen(tmp->ptr);
- }
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
}
}
zuiClearIterator(&src[0]);
}
- } else if (op == REDIS_OP_UNION) {
- dict *accumulator = dictCreate(&setDictType,NULL);
+ } else if (op == SET_OP_UNION) {
+ dict *accumulator = dictCreate(&setAccumulatorDictType,NULL);
dictIterator *di;
- dictEntry *de;
+ dictEntry *de, *existing;
double score;
if (setnum) {
@@ -2002,20 +2335,16 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
if (isnan(score)) score = 0;
/* Search for this element in the accumulating dictionary. */
- de = dictFind(accumulator,zuiObjectFromValue(&zval));
+ de = dictAddRaw(accumulator,zuiSdsFromValue(&zval),&existing);
/* If we don't have it, we need to create a new entry. */
- if (de == NULL) {
- tmp = zuiObjectFromValue(&zval);
+ if (!existing) {
+ tmp = zuiNewSdsFromValue(&zval);
/* Remember the longest single element encountered,
* to understand if it's possible to convert to ziplist
* at the end. */
- if (sdsEncodedObject(tmp)) {
- if (sdslen(tmp->ptr) > maxelelen)
- maxelelen = sdslen(tmp->ptr);
- }
- /* Add the element with its initial score. */
- de = dictAddRaw(accumulator,tmp);
- incrRefCount(tmp);
+ if (sdslen(tmp) > maxelelen) maxelelen = sdslen(tmp);
+ /* Update the element with its initial score. */
+ dictSetKey(accumulator, de, tmp);
dictSetDoubleVal(de,score);
} else {
/* Update the score with the score of the new instance
@@ -2024,7 +2353,7 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
* Here we access directly the dictEntry double
* value inside the union as it is a big speedup
* compared to using the getDouble/setDouble API. */
- zunionInterAggregate(&de->v.d,score,aggregate);
+ zunionInterAggregate(&existing->v.d,score,aggregate);
}
}
zuiClearIterator(&src[i]);
@@ -2039,67 +2368,59 @@ void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
dictExpand(dstzset->dict,dictSize(accumulator));
while((de = dictNext(di)) != NULL) {
- robj *ele = dictGetKey(de);
+ sds ele = dictGetKey(de);
score = dictGetDoubleVal(de);
znode = zslInsert(dstzset->zsl,score,ele);
- incrRefCount(ele); /* added to skiplist */
dictAdd(dstzset->dict,ele,&znode->score);
- incrRefCount(ele); /* added to dictionary */
}
dictReleaseIterator(di);
-
- /* We can free the accumulator dictionary now. */
dictRelease(accumulator);
} else {
- redisPanic("Unknown operator");
+ serverPanic("Unknown operator");
}
- if (dbDelete(c->db,dstkey)) {
- signalModifiedKey(c->db,dstkey);
+ if (dbDelete(c->db,dstkey))
touched = 1;
- server.dirty++;
- }
if (dstzset->zsl->length) {
- /* Convert to ziplist when in limits. */
- if (dstzset->zsl->length <= server.zset_max_ziplist_entries &&
- maxelelen <= server.zset_max_ziplist_value)
- zsetConvert(dstobj,REDIS_ENCODING_ZIPLIST);
-
+ zsetConvertToZiplistIfNeeded(dstobj,maxelelen);
dbAdd(c->db,dstkey,dstobj);
addReplyLongLong(c,zsetLength(dstobj));
- if (!touched) signalModifiedKey(c->db,dstkey);
- notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,
- (op == REDIS_OP_UNION) ? "zunionstore" : "zinterstore",
+ signalModifiedKey(c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_ZSET,
+ (op == SET_OP_UNION) ? "zunionstore" : "zinterstore",
dstkey,c->db->id);
server.dirty++;
} else {
decrRefCount(dstobj);
addReply(c,shared.czero);
- if (touched)
- notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ if (touched) {
+ signalModifiedKey(c->db,dstkey);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id);
+ server.dirty++;
+ }
}
zfree(src);
}
-void zunionstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
+void zunionstoreCommand(client *c) {
+ zunionInterGenericCommand(c,c->argv[1], SET_OP_UNION);
}
-void zinterstoreCommand(redisClient *c) {
- zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
+void zinterstoreCommand(client *c) {
+ zunionInterGenericCommand(c,c->argv[1], SET_OP_INTER);
}
-void zrangeGenericCommand(redisClient *c, int reverse) {
+void zrangeGenericCommand(client *c, int reverse) {
robj *key = c->argv[1];
robj *zobj;
int withscores = 0;
long start;
long end;
- int llen;
- int rangelen;
+ long llen;
+ long rangelen;
- if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
withscores = 1;
@@ -2109,7 +2430,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
}
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL
- || checkType(c,zobj,REDIS_ZSET)) return;
+ || checkType(c,zobj,OBJ_ZSET)) return;
/* Sanitize indexes. */
llen = zsetLength(zobj);
@@ -2129,7 +2450,7 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
/* Return the result in form of a multi-bulk reply */
addReplyMultiBulkLen(c, withscores ? (rangelen*2) : rangelen);
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2141,12 +2462,12 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
else
eptr = ziplistIndex(zl,2*start);
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
while (rangelen--) {
- redisAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,eptr != NULL && sptr != NULL);
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
if (vstr == NULL)
addReplyBulkLongLong(c,vlong);
else
@@ -2161,11 +2482,11 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
- robj *ele;
+ sds ele;
/* Check if starting point is trivial, before doing log(N) lookup. */
if (reverse) {
@@ -2179,28 +2500,28 @@ void zrangeGenericCommand(redisClient *c, int reverse) {
}
while(rangelen--) {
- redisAssertWithInfo(c,zobj,ln != NULL);
- ele = ln->obj;
- addReplyBulk(c,ele);
+ serverAssertWithInfo(c,zobj,ln != NULL);
+ ele = ln->ele;
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
if (withscores)
addReplyDouble(c,ln->score);
ln = reverse ? ln->backward : ln->level[0].forward;
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
}
-void zrangeCommand(redisClient *c) {
+void zrangeCommand(client *c) {
zrangeGenericCommand(c,0);
}
-void zrevrangeCommand(redisClient *c) {
+void zrevrangeCommand(client *c) {
zrangeGenericCommand(c,1);
}
/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
-void genericZrangebyscoreCommand(redisClient *c, int reverse) {
+void genericZrangebyscoreCommand(client *c, int reverse) {
zrangespec range;
robj *key = c->argv[1];
robj *zobj;
@@ -2219,7 +2540,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
minidx = 2; maxidx = 3;
}
- if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
@@ -2235,8 +2556,13 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
pos++; remaining--;
withscores = 1;
} else if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL)
+ != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL)
+ != C_OK))
+ {
+ return;
+ }
pos += 3; remaining -= 3;
} else {
addReply(c,shared.syntaxerr);
@@ -2247,9 +2573,9 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
/* Ok, lookup the key and get the range */
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2271,7 +2597,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
/* Get score pointer for the first element. */
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
/* We don't know in advance how many matching elements there are in the
@@ -2300,7 +2626,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
/* We know the element exists, so ziplistGet should always succeed */
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
rangelen++;
if (vstr == NULL) {
@@ -2320,7 +2646,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
@@ -2362,7 +2688,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
rangelen++;
- addReplyBulk(c,ln->obj);
+ addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
if (withscores) {
addReplyDouble(c,ln->score);
@@ -2376,7 +2702,7 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
if (withscores) {
@@ -2386,31 +2712,31 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) {
setDeferredMultiBulkLength(c, replylen, rangelen);
}
-void zrangebyscoreCommand(redisClient *c) {
+void zrangebyscoreCommand(client *c) {
genericZrangebyscoreCommand(c,0);
}
-void zrevrangebyscoreCommand(redisClient *c) {
+void zrevrangebyscoreCommand(client *c) {
genericZrangebyscoreCommand(c,1);
}
-void zcountCommand(redisClient *c) {
+void zcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zrangespec range;
- int count = 0;
+ unsigned long count = 0;
/* Parse the range arguments */
- if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max is not a float");
return;
}
/* Lookup the sorted set */
if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
- checkType(c, zobj, REDIS_ZSET)) return;
+ checkType(c, zobj, OBJ_ZSET)) return;
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
double score;
@@ -2427,7 +2753,7 @@ void zcountCommand(redisClient *c) {
/* First element is in range */
sptr = ziplistNext(zl,eptr);
score = zzlGetScore(sptr);
- redisAssertWithInfo(c,zobj,zslValueLteMax(score,&range));
+ serverAssertWithInfo(c,zobj,zslValueLteMax(score,&range));
/* Iterate over elements in range */
while (eptr) {
@@ -2441,7 +2767,7 @@ void zcountCommand(redisClient *c) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *zn;
@@ -2452,7 +2778,7 @@ void zcountCommand(redisClient *c) {
/* Use rank of first element, if any, to determine preliminary count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count = (zsl->length - (rank - 1));
/* Find last element in range */
@@ -2460,38 +2786,38 @@ void zcountCommand(redisClient *c) {
/* Use rank of last element, if any, to determine the actual count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count -= (zsl->length - rank);
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
addReplyLongLong(c, count);
}
-void zlexcountCommand(redisClient *c) {
+void zlexcountCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
zlexrangespec range;
- int count = 0;
+ unsigned long count = 0;
/* Parse the range arguments */
- if (zslParseLexRange(c->argv[2],c->argv[3],&range) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[2],c->argv[3],&range) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
/* Lookup the sorted set */
if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL ||
- checkType(c, zobj, REDIS_ZSET))
+ checkType(c, zobj, OBJ_ZSET))
{
zslFreeLexRange(&range);
return;
}
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
@@ -2507,7 +2833,7 @@ void zlexcountCommand(redisClient *c) {
/* First element is in range */
sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range));
+ serverAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range));
/* Iterate over elements in range */
while (eptr) {
@@ -2519,7 +2845,7 @@ void zlexcountCommand(redisClient *c) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *zn;
@@ -2530,7 +2856,7 @@ void zlexcountCommand(redisClient *c) {
/* Use rank of first element, if any, to determine preliminary count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count = (zsl->length - (rank - 1));
/* Find last element in range */
@@ -2538,12 +2864,12 @@ void zlexcountCommand(redisClient *c) {
/* Use rank of last element, if any, to determine the actual count */
if (zn != NULL) {
- rank = zslGetRank(zsl, zn->score, zn->obj);
+ rank = zslGetRank(zsl, zn->score, zn->ele);
count -= (zsl->length - rank);
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
zslFreeLexRange(&range);
@@ -2551,7 +2877,7 @@ void zlexcountCommand(redisClient *c) {
}
/* This command implements ZRANGEBYLEX, ZREVRANGEBYLEX. */
-void genericZrangebylexCommand(redisClient *c, int reverse) {
+void genericZrangebylexCommand(client *c, int reverse) {
zlexrangespec range;
robj *key = c->argv[1];
robj *zobj;
@@ -2569,7 +2895,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
minidx = 2; maxidx = 3;
}
- if (zslParseLexRange(c->argv[minidx],c->argv[maxidx],&range) != REDIS_OK) {
+ if (zslParseLexRange(c->argv[minidx],c->argv[maxidx],&range) != C_OK) {
addReplyError(c,"min or max not valid string range item");
return;
}
@@ -2582,8 +2908,8 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
while (remaining) {
if (remaining >= 3 && !strcasecmp(c->argv[pos]->ptr,"limit")) {
- if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != REDIS_OK) ||
- (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != REDIS_OK)) return;
+ if ((getLongFromObjectOrReply(c, c->argv[pos+1], &offset, NULL) != C_OK) ||
+ (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != C_OK)) return;
pos += 3; remaining -= 3;
} else {
zslFreeLexRange(&range);
@@ -2595,13 +2921,13 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
/* Ok, lookup the key and get the range */
if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET))
+ checkType(c,zobj,OBJ_ZSET))
{
zslFreeLexRange(&range);
return;
}
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
unsigned char *zl = zobj->ptr;
unsigned char *eptr, *sptr;
unsigned char *vstr;
@@ -2623,7 +2949,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
}
/* Get score pointer for the first element. */
- redisAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
sptr = ziplistNext(zl,eptr);
/* We don't know in advance how many matching elements there are in the
@@ -2651,7 +2977,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
/* We know the element exists, so ziplistGet should always
* succeed. */
- redisAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
rangelen++;
if (vstr == NULL) {
@@ -2667,7 +2993,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
zzlNext(zl,&eptr,&sptr);
}
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zobj->ptr;
zskiplist *zsl = zs->zsl;
zskiplistNode *ln;
@@ -2704,13 +3030,13 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
while (ln && limit--) {
/* Abort when the node is no longer in range. */
if (reverse) {
- if (!zslLexValueGteMin(ln->obj,&range)) break;
+ if (!zslLexValueGteMin(ln->ele,&range)) break;
} else {
- if (!zslLexValueLteMax(ln->obj,&range)) break;
+ if (!zslLexValueLteMax(ln->ele,&range)) break;
}
rangelen++;
- addReplyBulk(c,ln->obj);
+ addReplyBulkCBuffer(c,ln->ele,sdslen(ln->ele));
/* Move to next node */
if (reverse) {
@@ -2720,137 +3046,258 @@ void genericZrangebylexCommand(redisClient *c, int reverse) {
}
}
} else {
- redisPanic("Unknown sorted set encoding");
+ serverPanic("Unknown sorted set encoding");
}
zslFreeLexRange(&range);
setDeferredMultiBulkLength(c, replylen, rangelen);
}
-void zrangebylexCommand(redisClient *c) {
+void zrangebylexCommand(client *c) {
genericZrangebylexCommand(c,0);
}
-void zrevrangebylexCommand(redisClient *c) {
+void zrevrangebylexCommand(client *c) {
genericZrangebylexCommand(c,1);
}
-void zcardCommand(redisClient *c) {
+void zcardCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
if ((zobj = lookupKeyReadOrReply(c,key,shared.czero)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
+ checkType(c,zobj,OBJ_ZSET)) return;
addReplyLongLong(c,zsetLength(zobj));
}
-void zscoreCommand(redisClient *c) {
+void zscoreCommand(client *c) {
robj *key = c->argv[1];
robj *zobj;
double score;
if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
-
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- if (zzlFind(zobj->ptr,c->argv[2],&score) != NULL)
- addReplyDouble(c,score);
- else
- addReply(c,shared.nullbulk);
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- dictEntry *de;
+ checkType(c,zobj,OBJ_ZSET)) return;
- c->argv[2] = tryObjectEncoding(c->argv[2]);
- de = dictFind(zs->dict,c->argv[2]);
- if (de != NULL) {
- score = *(double*)dictGetVal(de);
- addReplyDouble(c,score);
- } else {
- addReply(c,shared.nullbulk);
- }
+ if (zsetScore(zobj,c->argv[2]->ptr,&score) == C_ERR) {
+ addReply(c,shared.nullbulk);
} else {
- redisPanic("Unknown sorted set encoding");
+ addReplyDouble(c,score);
}
}
-void zrankGenericCommand(redisClient *c, int reverse) {
+void zrankGenericCommand(client *c, int reverse) {
robj *key = c->argv[1];
robj *ele = c->argv[2];
robj *zobj;
- unsigned long llen;
- unsigned long rank;
+ long rank;
if ((zobj = lookupKeyReadOrReply(c,key,shared.nullbulk)) == NULL ||
- checkType(c,zobj,REDIS_ZSET)) return;
- llen = zsetLength(zobj);
+ checkType(c,zobj,OBJ_ZSET)) return;
- redisAssertWithInfo(c,ele,sdsEncodedObject(ele));
+ serverAssertWithInfo(c,ele,sdsEncodedObject(ele));
+ rank = zsetRank(zobj,ele->ptr,reverse);
+ if (rank >= 0) {
+ addReplyLongLong(c,rank);
+ } else {
+ addReply(c,shared.nullbulk);
+ }
+}
- if (zobj->encoding == REDIS_ENCODING_ZIPLIST) {
- unsigned char *zl = zobj->ptr;
- unsigned char *eptr, *sptr;
+void zrankCommand(client *c) {
+ zrankGenericCommand(c, 0);
+}
- eptr = ziplistIndex(zl,0);
- redisAssertWithInfo(c,zobj,eptr != NULL);
- sptr = ziplistNext(zl,eptr);
- redisAssertWithInfo(c,zobj,sptr != NULL);
+void zrevrankCommand(client *c) {
+ zrankGenericCommand(c, 1);
+}
- rank = 1;
- while(eptr != NULL) {
- if (ziplistCompare(eptr,ele->ptr,sdslen(ele->ptr)))
- break;
- rank++;
- zzlNext(zl,&eptr,&sptr);
- }
+void zscanCommand(client *c) {
+ robj *o;
+ unsigned long cursor;
- if (eptr != NULL) {
- if (reverse)
- addReplyLongLong(c,llen-rank);
+ if (parseScanCursorOrReply(c,c->argv[2],&cursor) == C_ERR) return;
+ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
+ checkType(c,o,OBJ_ZSET)) return;
+ scanGenericCommand(c,o,cursor);
+}
+
+/* This command implements the generic zpop operation, used by:
+ * ZPOPMIN, ZPOPMAX, BZPOPMIN and BZPOPMAX. This function is also used
+ * inside blocked.c in the unblocking stage of BZPOPMIN and BZPOPMAX.
+ *
+ * If 'emitkey' is true also the key name is emitted, useful for the blocking
+ * behavior of BZPOP[MIN|MAX], since we can block into multiple keys.
+ *
+ * The synchronous version instead does not need to emit the key, but may
+ * use the 'count' argument to return multiple items if available. */
+void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey, robj *countarg) {
+ int idx;
+ robj *key = NULL;
+ robj *zobj = NULL;
+ sds ele;
+ double score;
+ long count = 1;
+
+ /* If a count argument as passed, parse it or return an error. */
+ if (countarg) {
+ if (getLongFromObjectOrReply(c,countarg,&count,NULL) != C_OK)
+ return;
+ if (count < 0) count = 1;
+ }
+
+ /* Check type and break on the first error, otherwise identify candidate. */
+ idx = 0;
+ while (idx < keyc) {
+ key = keyv[idx++];
+ zobj = lookupKeyWrite(c->db,key);
+ if (!zobj) continue;
+ if (checkType(c,zobj,OBJ_ZSET)) return;
+ break;
+ }
+
+ /* No candidate for zpopping, return empty. */
+ if (!zobj) {
+ addReply(c,shared.emptymultibulk);
+ return;
+ }
+
+ void *arraylen_ptr = addDeferredMultiBulkLength(c);
+ long arraylen = 0;
+
+ /* We emit the key only for the blocking variant. */
+ if (emitkey) addReplyBulk(c,key);
+
+ /* Remove the element. */
+ do {
+ if (zobj->encoding == OBJ_ENCODING_ZIPLIST) {
+ unsigned char *zl = zobj->ptr;
+ unsigned char *eptr, *sptr;
+ unsigned char *vstr;
+ unsigned int vlen;
+ long long vlong;
+
+ /* Get the first or last element in the sorted set. */
+ eptr = ziplistIndex(zl,where == ZSET_MAX ? -2 : 0);
+ serverAssertWithInfo(c,zobj,eptr != NULL);
+ serverAssertWithInfo(c,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+ if (vstr == NULL)
+ ele = sdsfromlonglong(vlong);
else
- addReplyLongLong(c,rank-1);
+ ele = sdsnewlen(vstr,vlen);
+
+ /* Get the score. */
+ sptr = ziplistNext(zl,eptr);
+ serverAssertWithInfo(c,zobj,sptr != NULL);
+ score = zzlGetScore(sptr);
+ } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
+ zset *zs = zobj->ptr;
+ zskiplist *zsl = zs->zsl;
+ zskiplistNode *zln;
+
+ /* Get the first or last element in the sorted set. */
+ zln = (where == ZSET_MAX ? zsl->tail :
+ zsl->header->level[0].forward);
+
+ /* There must be an element in the sorted set. */
+ serverAssertWithInfo(c,zobj,zln != NULL);
+ ele = sdsdup(zln->ele);
+ score = zln->score;
} else {
- addReply(c,shared.nullbulk);
+ serverPanic("Unknown sorted set encoding");
}
- } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) {
- zset *zs = zobj->ptr;
- zskiplist *zsl = zs->zsl;
- dictEntry *de;
- double score;
- ele = c->argv[2] = tryObjectEncoding(c->argv[2]);
- de = dictFind(zs->dict,ele);
- if (de != NULL) {
- score = *(double*)dictGetVal(de);
- rank = zslGetRank(zsl,score,ele);
- redisAssertWithInfo(c,ele,rank); /* Existing elements always have a rank. */
- if (reverse)
- addReplyLongLong(c,llen-rank);
- else
- addReplyLongLong(c,rank-1);
- } else {
- addReply(c,shared.nullbulk);
+ serverAssertWithInfo(c,zobj,zsetDel(zobj,ele));
+ server.dirty++;
+
+ if (arraylen == 0) { /* Do this only for the first iteration. */
+ char *events[2] = {"zpopmin","zpopmax"};
+ notifyKeyspaceEvent(NOTIFY_ZSET,events[where],key,c->db->id);
+ signalModifiedKey(c->db,key);
}
- } else {
- redisPanic("Unknown sorted set encoding");
- }
+
+ addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyDouble(c,score);
+ sdsfree(ele);
+ arraylen += 2;
+
+ /* Remove the key, if indeed needed. */
+ if (zsetLength(zobj) == 0) {
+ dbDelete(c->db,key);
+ notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
+ break;
+ }
+ } while(--count);
+
+ setDeferredMultiBulkLength(c,arraylen_ptr,arraylen + (emitkey != 0));
}
-void zrankCommand(redisClient *c) {
- zrankGenericCommand(c, 0);
+/* ZPOPMIN key [<count>] */
+void zpopminCommand(client *c) {
+ if (c->argc > 3) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ genericZpopCommand(c,&c->argv[1],1,ZSET_MIN,0,
+ c->argc == 3 ? c->argv[2] : NULL);
}
-void zrevrankCommand(redisClient *c) {
- zrankGenericCommand(c, 1);
+/* ZMAXPOP key [<count>] */
+void zpopmaxCommand(client *c) {
+ if (c->argc > 3) {
+ addReply(c,shared.syntaxerr);
+ return;
+ }
+ genericZpopCommand(c,&c->argv[1],1,ZSET_MAX,0,
+ c->argc == 3 ? c->argv[2] : NULL);
}
-void zscanCommand(redisClient *c) {
+/* BZPOPMIN / BZPOPMAX actual implementation. */
+void blockingGenericZpopCommand(client *c, int where) {
robj *o;
- unsigned long cursor;
+ mstime_t timeout;
+ int j;
- if (parseScanCursorOrReply(c,c->argv[2],&cursor) == REDIS_ERR) return;
- if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptyscan)) == NULL ||
- checkType(c,o,REDIS_ZSET)) return;
- scanGenericCommand(c,o,cursor);
+ if (getTimeoutFromObjectOrReply(c,c->argv[c->argc-1],&timeout,UNIT_SECONDS)
+ != C_OK) return;
+
+ for (j = 1; j < c->argc-1; j++) {
+ o = lookupKeyWrite(c->db,c->argv[j]);
+ if (o != NULL) {
+ if (o->type != OBJ_ZSET) {
+ addReply(c,shared.wrongtypeerr);
+ return;
+ } else {
+ if (zsetLength(o) != 0) {
+ /* Non empty zset, this is like a normal ZPOP[MIN|MAX]. */
+ genericZpopCommand(c,&c->argv[j],1,where,1,NULL);
+ /* Replicate it as an ZPOP[MIN|MAX] instead of BZPOP[MIN|MAX]. */
+ rewriteClientCommandVector(c,2,
+ where == ZSET_MAX ? shared.zpopmax : shared.zpopmin,
+ c->argv[j]);
+ return;
+ }
+ }
+ }
+ }
+
+ /* If we are inside a MULTI/EXEC and the zset is empty the only thing
+ * we can do is treating it as a timeout (even with timeout 0). */
+ if (c->flags & CLIENT_MULTI) {
+ addReply(c,shared.nullmultibulk);
+ return;
+ }
+
+ /* If the keys do not exist we must block */
+ blockForKeys(c,BLOCKED_ZSET,c->argv + 1,c->argc - 2,timeout,NULL,NULL);
+}
+
+// BZPOPMIN key [key ...] timeout
+void bzpopminCommand(client *c) {
+ blockingGenericZpopCommand(c,ZSET_MIN);
+}
+
+// BZPOPMAX key [key ...] timeout
+void bzpopmaxCommand(client *c) {
+ blockingGenericZpopCommand(c,ZSET_MAX);
}
diff --git a/src/util.c b/src/util.c
index d69721bf4..3fa6c9244 100644
--- a/src/util.c
+++ b/src/util.c
@@ -38,8 +38,10 @@
#include <sys/time.h>
#include <float.h>
#include <stdint.h>
+#include <errno.h>
#include "util.h"
+#include "sha1.h"
/* Glob-style pattern matching. */
int stringmatchlen(const char *pattern, int patternLen,
@@ -82,7 +84,7 @@ int stringmatchlen(const char *pattern, int patternLen,
}
match = 0;
while(1) {
- if (pattern[0] == '\\') {
+ if (pattern[0] == '\\' && patternLen >= 2) {
pattern++;
patternLen--;
if (pattern[0] == string[0])
@@ -169,11 +171,12 @@ int stringmatch(const char *pattern, const char *string, int nocase) {
}
/* Convert a string representing an amount of memory into the number of
- * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
+ * bytes, so for instance memtoll("1Gb") will return 1073741824 that is
* (1024*1024*1024).
*
* On parsing error, if *err is not NULL, it's set to 1, otherwise it's
- * set to 0 */
+ * set to 0. On error the function return value is 0, regardless of the
+ * fact 'err' is NULL or not. */
long long memtoll(const char *p, int *err) {
const char *u;
char buf[128];
@@ -182,6 +185,7 @@ long long memtoll(const char *p, int *err) {
unsigned int digits;
if (err) *err = 0;
+
/* Search the first non digit character. */
u = p;
if (*u == '-') u++;
@@ -202,16 +206,26 @@ long long memtoll(const char *p, int *err) {
mul = 1024L*1024*1024;
} else {
if (err) *err = 1;
- mul = 1;
+ return 0;
}
+
+ /* Copy the digits into a buffer, we'll use strtoll() to convert
+ * the digit (without the unit) into a number. */
digits = u-p;
if (digits >= sizeof(buf)) {
if (err) *err = 1;
- return LLONG_MAX;
+ return 0;
}
memcpy(buf,p,digits);
buf[digits] = '\0';
- val = strtoll(buf,NULL,10);
+
+ char *endptr;
+ errno = 0;
+ val = strtoll(buf,&endptr,10);
+ if ((val == 0 && errno == EINVAL) || *endptr != '\0') {
+ if (err) *err = 1;
+ return 0;
+ }
return val*mul;
}
@@ -237,6 +251,18 @@ uint32_t digits10(uint64_t v) {
return 12 + digits10(v / 1000000000000UL);
}
+/* Like digits10() but for signed values. */
+uint32_t sdigits10(int64_t v) {
+ if (v < 0) {
+ /* Abs value of LLONG_MIN requires special handling. */
+ uint64_t uv = (v != LLONG_MIN) ?
+ (uint64_t)-v : ((uint64_t) LLONG_MAX)+1;
+ return digits10(uv)+1; /* +1 for the minus. */
+ } else {
+ return digits10(v);
+ }
+}
+
/* Convert a long long into a string. Returns the number of
* characters needed to represent the number.
* If the buffer is not big enough to store the string, 0 is returned.
@@ -248,7 +274,7 @@ uint32_t digits10(uint64_t v) {
*
* Modified in order to handle signed integers since the original code was
* designed for unsigned integers. */
-int ll2string(char* dst, size_t dstlen, long long svalue) {
+int ll2string(char *dst, size_t dstlen, long long svalue) {
static const char digits[201] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
@@ -304,13 +330,23 @@ int ll2string(char* dst, size_t dstlen, long long svalue) {
/* Convert a string into a long long. Returns 1 if the string could be parsed
* into a (non-overflowing) long long, 0 otherwise. The value will be set to
- * the parsed value when appropriate. */
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a long long: no spaces or other characters before or after the string
+ * representing the number are accepted, nor zeroes at the start if not
+ * for the string "0" representing the zero number.
+ *
+ * Because of its strictness, it is safe to use this function to check if
+ * you can convert a string into a long long, and obtain back the string
+ * from the number without any loss in the string representation. */
int string2ll(const char *s, size_t slen, long long *value) {
const char *p = s;
size_t plen = 0;
int negative = 0;
unsigned long long v;
+ /* A zero length string is not a valid number. */
if (plen == slen)
return 0;
@@ -320,6 +356,8 @@ int string2ll(const char *s, size_t slen, long long *value) {
return 1;
}
+ /* Handle negative numbers: just set a flag and continue like if it
+ * was a positive number. Later convert into negative. */
if (p[0] == '-') {
negative = 1;
p++; plen++;
@@ -333,13 +371,11 @@ int string2ll(const char *s, size_t slen, long long *value) {
if (p[0] >= '1' && p[0] <= '9') {
v = p[0]-'0';
p++; plen++;
- } else if (p[0] == '0' && slen == 1) {
- *value = 0;
- return 1;
} else {
return 0;
}
+ /* Parse all the other digits, checking for overflow at every step. */
while (plen < slen && p[0] >= '0' && p[0] <= '9') {
if (v > (ULLONG_MAX / 10)) /* Overflow. */
return 0;
@@ -356,6 +392,8 @@ int string2ll(const char *s, size_t slen, long long *value) {
if (plen < slen)
return 0;
+ /* Convert to negative if needed, and do the final overflow check when
+ * converting from unsigned long long to long long. */
if (negative) {
if (v > ((unsigned long long)(-(LLONG_MIN+1))+1)) /* Overflow. */
return 0;
@@ -384,8 +422,40 @@ int string2l(const char *s, size_t slen, long *lval) {
return 1;
}
+/* Convert a string into a double. Returns 1 if the string could be parsed
+ * into a (non-overflowing) double, 0 otherwise. The value will be set to
+ * the parsed value when appropriate.
+ *
+ * Note that this function demands that the string strictly represents
+ * a double: no spaces or other characters before or after the string
+ * representing the number are accepted. */
+int string2ld(const char *s, size_t slen, long double *dp) {
+ char buf[256];
+ long double value;
+ char *eptr;
+
+ if (slen >= sizeof(buf)) return 0;
+ memcpy(buf,s,slen);
+ buf[slen] = '\0';
+
+ errno = 0;
+ value = strtold(buf, &eptr);
+ if (isspace(buf[0]) || eptr[0] != '\0' ||
+ (errno == ERANGE &&
+ (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
+ errno == EINVAL ||
+ isnan(value))
+ return 0;
+
+ if (dp) *dp = value;
+ return 1;
+}
+
/* Convert a double to a string representation. Returns the number of bytes
- * required. The representation should always be parsable by strtod(3). */
+ * required. The representation should always be parsable by strtod(3).
+ * This function does not support human-friendly formatting like ld2string
+ * does. It is intended mainly to be used inside t_zset.c when writing scores
+ * into a ziplist representing a sorted set. */
int d2string(char *buf, size_t len, double value) {
if (isnan(value)) {
len = snprintf(buf,len,"nan");
@@ -423,50 +493,111 @@ int d2string(char *buf, size_t len, double value) {
return len;
}
+/* Convert a long double into a string. If humanfriendly is non-zero
+ * it does not use exponential format and trims trailing zeroes at the end,
+ * however this results in loss of precision. Otherwise exp format is used
+ * and the output of snprintf() is not modified.
+ *
+ * The function returns the length of the string or zero if there was not
+ * enough buffer room to store it. */
+int ld2string(char *buf, size_t len, long double value, int humanfriendly) {
+ size_t l;
+
+ if (isinf(value)) {
+ /* Libc in odd systems (Hi Solaris!) will format infinite in a
+ * different way, so better to handle it in an explicit way. */
+ if (len < 5) return 0; /* No room. 5 is "-inf\0" */
+ if (value > 0) {
+ memcpy(buf,"inf",3);
+ l = 3;
+ } else {
+ memcpy(buf,"-inf",4);
+ l = 4;
+ }
+ } else if (humanfriendly) {
+ /* We use 17 digits precision since with 128 bit floats that precision
+ * after rounding is able to represent most small decimal numbers in a
+ * way that is "non surprising" for the user (that is, most small
+ * decimal numbers will be represented in a way that when converted
+ * back into a string are exactly the same as what the user typed.) */
+ l = snprintf(buf,len,"%.17Lf", value);
+ if (l+1 > len) return 0; /* No room. */
+ /* Now remove trailing zeroes after the '.' */
+ if (strchr(buf,'.') != NULL) {
+ char *p = buf+l-1;
+ while(*p == '0') {
+ p--;
+ l--;
+ }
+ if (*p == '.') l--;
+ }
+ } else {
+ l = snprintf(buf,len,"%.17Lg", value);
+ if (l+1 > len) return 0; /* No room. */
+ }
+ buf[l] = '\0';
+ return l;
+}
+
+/* Get random bytes, attempts to get an initial seed from /dev/urandom and
+ * the uses a one way hash function in counter mode to generate a random
+ * stream. However if /dev/urandom is not available, a weaker seed is used.
+ *
+ * This function is not thread safe, since the state is global. */
+void getRandomBytes(unsigned char *p, size_t len) {
+ /* Global state. */
+ static int seed_initialized = 0;
+ static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */
+ static uint64_t counter = 0; /* The counter we hash with the seed. */
+
+ if (!seed_initialized) {
+ /* Initialize a seed and use SHA1 in counter mode, where we hash
+ * the same seed with a progressive counter. For the goals of this
+ * function we just need non-colliding strings, there are no
+ * cryptographic security needs. */
+ FILE *fp = fopen("/dev/urandom","r");
+ if (fp == NULL || fread(seed,sizeof(seed),1,fp) != 1) {
+ /* Revert to a weaker seed, and in this case reseed again
+ * at every call.*/
+ for (unsigned int j = 0; j < sizeof(seed); j++) {
+ struct timeval tv;
+ gettimeofday(&tv,NULL);
+ pid_t pid = getpid();
+ seed[j] = tv.tv_sec ^ tv.tv_usec ^ pid ^ (long)fp;
+ }
+ } else {
+ seed_initialized = 1;
+ }
+ if (fp) fclose(fp);
+ }
+
+ while(len) {
+ unsigned char digest[20];
+ SHA1_CTX ctx;
+ unsigned int copylen = len > 20 ? 20 : len;
+
+ SHA1Init(&ctx);
+ SHA1Update(&ctx, seed, sizeof(seed));
+ SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter));
+ SHA1Final(digest, &ctx);
+ counter++;
+
+ memcpy(p,digest,copylen);
+ len -= copylen;
+ p += copylen;
+ }
+}
+
/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a
* given execution of Redis, so that if you are talking with an instance
* having run_id == A, and you reconnect and it has run_id == B, you can be
* sure that it is either a different instance or it was restarted. */
-void getRandomHexChars(char *p, unsigned int len) {
- FILE *fp = fopen("/dev/urandom","r");
+void getRandomHexChars(char *p, size_t len) {
char *charset = "0123456789abcdef";
- unsigned int j;
-
- if (fp == NULL || fread(p,len,1,fp) == 0) {
- /* If we can't read from /dev/urandom, do some reasonable effort
- * in order to create some entropy, since this function is used to
- * generate run_id and cluster instance IDs */
- char *x = p;
- unsigned int l = len;
- struct timeval tv;
- pid_t pid = getpid();
-
- /* Use time and PID to fill the initial array. */
- gettimeofday(&tv,NULL);
- if (l >= sizeof(tv.tv_usec)) {
- memcpy(x,&tv.tv_usec,sizeof(tv.tv_usec));
- l -= sizeof(tv.tv_usec);
- x += sizeof(tv.tv_usec);
- }
- if (l >= sizeof(tv.tv_sec)) {
- memcpy(x,&tv.tv_sec,sizeof(tv.tv_sec));
- l -= sizeof(tv.tv_sec);
- x += sizeof(tv.tv_sec);
- }
- if (l >= sizeof(pid)) {
- memcpy(x,&pid,sizeof(pid));
- l -= sizeof(pid);
- x += sizeof(pid);
- }
- /* Finally xor it with rand() output, that was already seeded with
- * time() at startup. */
- for (j = 0; j < len; j++)
- p[j] ^= rand();
- }
- /* Turn it into hex digits taking just 4 bits out of 8 for every byte. */
- for (j = 0; j < len; j++)
- p[j] = charset[p[j] & 0x0F];
- if (fp) fclose(fp);
+ size_t j;
+
+ getRandomBytes((unsigned char*)p,len);
+ for (j = 0; j < len; j++) p[j] = charset[p[j] & 0x0F];
}
/* Given the filename, return the absolute path as an SDS string, or NULL
diff --git a/src/util.h b/src/util.h
index 666042c9b..91acde047 100644
--- a/src/util.h
+++ b/src/util.h
@@ -30,15 +30,25 @@
#ifndef __REDIS_UTIL_H
#define __REDIS_UTIL_H
+#include <stdint.h>
#include "sds.h"
+/* The maximum number of characters needed to represent a long double
+ * as a string (long double has a huge range).
+ * This should be the size of the buffer given to ld2string */
+#define MAX_LONG_DOUBLE_CHARS 5*1024
+
int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase);
int stringmatch(const char *p, const char *s, int nocase);
long long memtoll(const char *p, int *err);
+uint32_t digits10(uint64_t v);
+uint32_t sdigits10(int64_t v);
int ll2string(char *s, size_t len, long long value);
int string2ll(const char *s, size_t slen, long long *value);
int string2l(const char *s, size_t slen, long *value);
+int string2ld(const char *s, size_t slen, long double *dp);
int d2string(char *buf, size_t len, double value);
+int ld2string(char *buf, size_t len, long double value, int humanfriendly);
sds getAbsolutePath(char *filename);
int pathIsBaseName(char *path);
diff --git a/src/version.h b/src/version.h
index 00cbae681..eb65e9bbd 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define REDIS_VERSION "2.9.999"
+#define REDIS_VERSION "999.999.999"
diff --git a/src/ziplist.c b/src/ziplist.c
index 7428d30e9..1579d1109 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -8,72 +8,150 @@
*
* ----------------------------------------------------------------------------
*
- * ZIPLIST OVERALL LAYOUT:
+ * ZIPLIST OVERALL LAYOUT
+ * ======================
+ *
* The general layout of the ziplist is as follows:
- * <zlbytes><zltail><zllen><entry><entry><zlend>
*
- * <zlbytes> is an unsigned integer to hold the number of bytes that the
- * ziplist occupies. This value needs to be stored to be able to resize the
- * entire structure without the need to traverse it first.
+ * <zlbytes> <zltail> <zllen> <entry> <entry> ... <entry> <zlend>
+ *
+ * NOTE: all fields are stored in little endian, if not specified otherwise.
+ *
+ * <uint32_t zlbytes> is an unsigned integer to hold the number of bytes that
+ * the ziplist occupies, including the four bytes of the zlbytes field itself.
+ * This value needs to be stored to be able to resize the entire structure
+ * without the need to traverse it first.
+ *
+ * <uint32_t zltail> is the offset to the last entry in the list. This allows
+ * a pop operation on the far side of the list without the need for full
+ * traversal.
*
- * <zltail> is the offset to the last entry in the list. This allows a pop
- * operation on the far side of the list without the need for full traversal.
+ * <uint16_t zllen> is the number of entries. When there are more than
+ * 2^16-2 entries, this value is set to 2^16-1 and we need to traverse the
+ * entire list to know how many items it holds.
*
- * <zllen> is the number of entries.When this value is larger than 2**16-2,
- * we need to traverse the entire list to know how many items it holds.
+ * <uint8_t zlend> is a special entry representing the end of the ziplist.
+ * Is encoded as a single byte equal to 255. No other normal entry starts
+ * with a byte set to the value of 255.
*
- * <zlend> is a single byte special value, equal to 255, which indicates the
- * end of the list.
+ * ZIPLIST ENTRIES
+ * ===============
*
- * ZIPLIST ENTRIES:
- * Every entry in the ziplist is prefixed by a header that contains two pieces
+ * Every entry in the ziplist is prefixed by metadata that contains two pieces
* of information. First, the length of the previous entry is stored to be
- * able to traverse the list from back to front. Second, the encoding with an
- * optional string length of the entry itself is stored.
+ * able to traverse the list from back to front. Second, the entry encoding is
+ * provided. It represents the entry type, integer or string, and in the case
+ * of strings it also represents the length of the string payload.
+ * So a complete entry is stored like this:
+ *
+ * <prevlen> <encoding> <entry-data>
+ *
+ * Sometimes the encoding represents the entry itself, like for small integers
+ * as we'll see later. In such a case the <entry-data> part is missing, and we
+ * could have just:
+ *
+ * <prevlen> <encoding>
*
- * The length of the previous entry is encoded in the following way:
+ * The length of the previous entry, <prevlen>, is encoded in the following way:
* If this length is smaller than 254 bytes, it will only consume a single
- * byte that takes the length as value. When the length is greater than or
- * equal to 254, it will consume 5 bytes. The first byte is set to 254 to
- * indicate a larger value is following. The remaining 4 bytes take the
- * length of the previous entry as value.
+ * byte representing the length as an unsinged 8 bit integer. When the length
+ * is greater than or equal to 254, it will consume 5 bytes. The first byte is
+ * set to 254 (FE) to indicate a larger value is following. The remaining 4
+ * bytes take the length of the previous entry as value.
*
- * The other header field of the entry itself depends on the contents of the
- * entry. When the entry is a string, the first 2 bits of this header will hold
- * the type of encoding used to store the length of the string, followed by the
- * actual length of the string. When the entry is an integer the first 2 bits
- * are both set to 1. The following 2 bits are used to specify what kind of
- * integer will be stored after this header. An overview of the different
- * types and encodings is as follows:
+ * So practically an entry is encoded in the following way:
+ *
+ * <prevlen from 0 to 253> <encoding> <entry>
+ *
+ * Or alternatively if the previous entry length is greater than 253 bytes
+ * the following encoding is used:
+ *
+ * 0xFE <4 bytes unsigned little endian prevlen> <encoding> <entry>
+ *
+ * The encoding field of the entry depends on the content of the
+ * entry. When the entry is a string, the first 2 bits of the encoding first
+ * byte will hold the type of encoding used to store the length of the string,
+ * followed by the actual length of the string. When the entry is an integer
+ * the first 2 bits are both set to 1. The following 2 bits are used to specify
+ * what kind of integer will be stored after this header. An overview of the
+ * different types and encodings is as follows. The first byte is always enough
+ * to determine the kind of entry.
*
* |00pppppp| - 1 byte
* String value with length less than or equal to 63 bytes (6 bits).
+ * "pppppp" represents the unsigned 6 bit length.
* |01pppppp|qqqqqqqq| - 2 bytes
* String value with length less than or equal to 16383 bytes (14 bits).
- * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
+ * IMPORTANT: The 14 bit number is stored in big endian.
+ * |10000000|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
* String value with length greater than or equal to 16384 bytes.
- * |11000000| - 1 byte
+ * Only the 4 bytes following the first byte represents the length
+ * up to 32^2-1. The 6 lower bits of the first byte are not used and
+ * are set to zero.
+ * IMPORTANT: The 32 bit number is stored in big endian.
+ * |11000000| - 3 bytes
* Integer encoded as int16_t (2 bytes).
- * |11010000| - 1 byte
+ * |11010000| - 5 bytes
* Integer encoded as int32_t (4 bytes).
- * |11100000| - 1 byte
+ * |11100000| - 9 bytes
* Integer encoded as int64_t (8 bytes).
- * |11110000| - 1 byte
+ * |11110000| - 4 bytes
* Integer encoded as 24 bit signed (3 bytes).
- * |11111110| - 1 byte
+ * |11111110| - 2 bytes
* Integer encoded as 8 bit signed (1 byte).
* |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.
* Unsigned integer from 0 to 12. The encoded value is actually from
* 1 to 13 because 0000 and 1111 can not be used, so 1 should be
* subtracted from the encoded 4 bit value to obtain the right value.
- * |11111111| - End of ziplist.
+ * |11111111| - End of ziplist special entry.
+ *
+ * Like for the ziplist header, all the integers are represented in little
+ * endian byte order, even when this code is compiled in big endian systems.
+ *
+ * EXAMPLES OF ACTUAL ZIPLISTS
+ * ===========================
*
- * All the integers are represented in little endian byte order.
+ * The following is a ziplist containing the two elements representing
+ * the strings "2" and "5". It is composed of 15 bytes, that we visually
+ * split into sections:
+ *
+ * [0f 00 00 00] [0c 00 00 00] [02 00] [00 f3] [02 f6] [ff]
+ * | | | | | |
+ * zlbytes zltail entries "2" "5" end
+ *
+ * The first 4 bytes represent the number 15, that is the number of bytes
+ * the whole ziplist is composed of. The second 4 bytes are the offset
+ * at which the last ziplist entry is found, that is 12, in fact the
+ * last entry, that is "5", is at offset 12 inside the ziplist.
+ * The next 16 bit integer represents the number of elements inside the
+ * ziplist, its value is 2 since there are just two elements inside.
+ * Finally "00 f3" is the first entry representing the number 2. It is
+ * composed of the previous entry length, which is zero because this is
+ * our first entry, and the byte F3 which corresponds to the encoding
+ * |1111xxxx| with xxxx between 0001 and 1101. We need to remove the "F"
+ * higher order bits 1111, and subtract 1 from the "3", so the entry value
+ * is "2". The next entry has a prevlen of 02, since the first entry is
+ * composed of exactly two bytes. The entry itself, F6, is encoded exactly
+ * like the first entry, and 6-1 = 5, so the value of the entry is 5.
+ * Finally the special entry FF signals the end of the ziplist.
+ *
+ * Adding another element to the above string with the value "Hello World"
+ * allows us to show how the ziplist encodes small strings. We'll just show
+ * the hex dump of the entry itself. Imagine the bytes as following the
+ * entry that stores "5" in the ziplist above:
+ *
+ * [02] [0b] [48 65 6c 6c 6f 20 57 6f 72 6c 64]
+ *
+ * The first byte, 02, is the length of the previous entry. The next
+ * byte represents the encoding in the pattern |00pppppp| that means
+ * that the entry is a string of length <pppppp>, so 0B means that
+ * an 11 bytes string follows. From the third byte (48) to the last (64)
+ * there are just the ASCII characters for "Hello World".
*
* ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
- * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
+ * Copyright (c) 2009-2017, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -112,8 +190,13 @@
#include "endianconv.h"
#include "redisassert.h"
-#define ZIP_END 255
-#define ZIP_BIGLEN 254
+#define ZIP_END 255 /* Special "end of ziplist" entry. */
+#define ZIP_BIG_PREVLEN 254 /* Max number of bytes of the previous entry, for
+ the "prevlen" field prefixing each entry, to be
+ represented with just a single byte. Otherwise
+ it is represented as FF AA BB CC DD, where
+ AA BB CC DD are a 4 bytes unsigned integer
+ representing the previous entry len. */
/* Different encoding/length possibilities */
#define ZIP_STR_MASK 0xc0
@@ -126,41 +209,83 @@
#define ZIP_INT_64B (0xc0 | 2<<4)
#define ZIP_INT_24B (0xc0 | 3<<4)
#define ZIP_INT_8B 0xfe
-/* 4 bit integer immediate encoding */
-#define ZIP_INT_IMM_MASK 0x0f
+
+/* 4 bit integer immediate encoding |1111xxxx| with xxxx between
+ * 0001 and 1101. */
+#define ZIP_INT_IMM_MASK 0x0f /* Mask to extract the 4 bits value. To add
+ one is needed to reconstruct the value. */
#define ZIP_INT_IMM_MIN 0xf1 /* 11110001 */
#define ZIP_INT_IMM_MAX 0xfd /* 11111101 */
-#define ZIP_INT_IMM_VAL(v) (v & ZIP_INT_IMM_MASK)
#define INT24_MAX 0x7fffff
#define INT24_MIN (-INT24_MAX - 1)
-/* Macro to determine type */
+/* Macro to determine if the entry is a string. String entries never start
+ * with "11" as most significant bits of the first byte. */
#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
-/* Utility macros */
+/* Utility macros.*/
+
+/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl) (*((uint32_t*)(zl)))
+
+/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
+
+/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
+ * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl) (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
+
+/* The size of a ziplist header: two 32 bit integers for the total
+ * bytes count and last item offset. One 16 bit integer for the number
+ * of items field. */
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
+
+/* Size of the "end of ziplist" entry. Just one byte. */
#define ZIPLIST_END_SIZE (sizeof(uint8_t))
+
+/* Return the pointer to the first entry of a ziplist. */
#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
+
+/* Return the pointer to the last entry of a ziplist, using the
+ * last entry offset inside the ziplist header. */
#define ZIPLIST_ENTRY_TAIL(zl) ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
+
+/* Return the pointer to the last byte of a ziplist, which is, the
+ * end of ziplist FF entry. */
#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
-/* We know a positive increment can only be 1 because entries can only be
- * pushed one at a time. */
+/* Increment the number of items field in the ziplist header. Note that this
+ * macro should never overflow the unsigned 16 bit integer, since entries are
+ * always pushed one at a time. When UINT16_MAX is reached we want the count
+ * to stay there to signal that a full scan is needed to get the number of
+ * items inside the ziplist. */
#define ZIPLIST_INCR_LENGTH(zl,incr) { \
if (ZIPLIST_LENGTH(zl) < UINT16_MAX) \
ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \
}
+/* We use this function to receive information about a ziplist entry.
+ * Note that this is not how the data is actually encoded, is just what we
+ * get filled by a function in order to operate more easily. */
typedef struct zlentry {
- unsigned int prevrawlensize, prevrawlen;
- unsigned int lensize, len;
- unsigned int headersize;
- unsigned char encoding;
- unsigned char *p;
+ unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
+ unsigned int prevrawlen; /* Previous entry len. */
+ unsigned int lensize; /* Bytes used to encode this entry type/len.
+ For example strings have a 1, 2 or 5 bytes
+ header. Integers always use a single byte.*/
+ unsigned int len; /* Bytes used to represent the actual entry.
+ For strings this is just the string length
+ while for integers it is 1, 2, 3, 4, 8 or
+ 0 (for 4 bit immediate) depending on the
+ number range. */
+ unsigned int headersize; /* prevrawlensize + lensize. */
+ unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
+ the entry encoding. However for 4 bits
+ immediate integers this can assume a range
+ of values and must be range-checked. */
+ unsigned char *p; /* Pointer to the very start of the entry, that
+ is, this points to prev-entry-len field. */
} zlentry;
#define ZIPLIST_ENTRY_ZERO(zle) { \
@@ -171,31 +296,40 @@ typedef struct zlentry {
}
/* Extract the encoding from the byte pointed by 'ptr' and set it into
- * 'encoding'. */
+ * 'encoding' field of the zlentry structure. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do { \
(encoding) = (ptr[0]); \
if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)
-void ziplistRepr(unsigned char *zl);
-
-/* Return bytes needed to store integer encoded by 'encoding' */
-static unsigned int zipIntSize(unsigned char encoding) {
+/* Return bytes needed to store integer encoded by 'encoding'. */
+unsigned int zipIntSize(unsigned char encoding) {
switch(encoding) {
case ZIP_INT_8B: return 1;
case ZIP_INT_16B: return 2;
case ZIP_INT_24B: return 3;
case ZIP_INT_32B: return 4;
case ZIP_INT_64B: return 8;
- default: return 0; /* 4 bit immediate */
}
- assert(NULL);
+ if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
+ return 0; /* 4 bit immediate */
+ panic("Invalid integer encoding 0x%02X", encoding);
return 0;
}
-/* Encode the length 'rawlen' writing it in 'p'. If p is NULL it just returns
- * the amount of bytes required to encode such a length. */
-static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
+/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns
+ * the amount of bytes required to encode such a length. Arguments:
+ *
+ * 'encoding' is the encoding we are using for the entry. It could be
+ * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
+ * for single-byte small immediate integers.
+ *
+ * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
+ * srting that this entry represents.
+ *
+ * The function returns the number of bytes used by the encoding/length
+ * header stored in 'p'. */
+unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
unsigned char len = 1, buf[5];
if (ZIP_IS_STR(encoding)) {
@@ -224,15 +358,16 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
buf[0] = encoding;
}
- /* Store this length at p */
+ /* Store this length at p. */
memcpy(p,buf,len);
return len;
}
-/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the
- * entries encoding, the 'lensize' variable will hold the number of bytes
- * required to encode the entries length, and the 'len' variable will hold the
- * entries length. */
+/* Decode the entry encoding type and data length (string length for strings,
+ * number of bytes used for the integer for integer entries) encoded in 'ptr'.
+ * The 'encoding' variable will hold the entry encoding, the 'lensize'
+ * variable will hold the number of bytes required to encode the entry
+ * length, and the 'len' variable will hold the entry length. */
#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do { \
ZIP_ENTRY_ENCODING((ptr), (encoding)); \
if ((encoding) < ZIP_STR_MASK) { \
@@ -242,14 +377,14 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
} else if ((encoding) == ZIP_STR_14B) { \
(lensize) = 2; \
(len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1]; \
- } else if (encoding == ZIP_STR_32B) { \
+ } else if ((encoding) == ZIP_STR_32B) { \
(lensize) = 5; \
(len) = ((ptr)[1] << 24) | \
((ptr)[2] << 16) | \
((ptr)[3] << 8) | \
((ptr)[4]); \
} else { \
- assert(NULL); \
+ panic("Invalid string encoding 0x%02X", (encoding)); \
} \
} else { \
(lensize) = 1; \
@@ -257,66 +392,83 @@ static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, un
} \
} while(0);
+/* Encode the length of the previous entry and write it to "p". This only
+ * uses the larger encoding (required in __ziplistCascadeUpdate). */
+int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
+ if (p != NULL) {
+ p[0] = ZIP_BIG_PREVLEN;
+ memcpy(p+1,&len,sizeof(len));
+ memrev32ifbe(p+1);
+ }
+ return 1+sizeof(len);
+}
+
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
-static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) {
+unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
- return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1;
+ return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
} else {
- if (len < ZIP_BIGLEN) {
+ if (len < ZIP_BIG_PREVLEN) {
p[0] = len;
return 1;
} else {
- p[0] = ZIP_BIGLEN;
- memcpy(p+1,&len,sizeof(len));
- memrev32ifbe(p+1);
- return 1+sizeof(len);
+ return zipStorePrevEntryLengthLarge(p,len);
}
}
}
-/* Encode the length of the previous entry and write it to "p". This only
- * uses the larger encoding (required in __ziplistCascadeUpdate). */
-static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
- if (p == NULL) return;
- p[0] = ZIP_BIGLEN;
- memcpy(p+1,&len,sizeof(len));
- memrev32ifbe(p+1);
-}
-
-/* Decode the number of bytes required to store the length of the previous
- * element, from the perspective of the entry pointed to by 'ptr'. */
+/* Return the number of bytes used to encode the length of the previous
+ * entry. The length is returned by setting the var 'prevlensize'. */
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do { \
- if ((ptr)[0] < ZIP_BIGLEN) { \
+ if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
(prevlensize) = 1; \
} else { \
(prevlensize) = 5; \
} \
} while(0);
-/* Decode the length of the previous element, from the perspective of the entry
- * pointed to by 'ptr'. */
+/* Return the length of the previous element, and the number of bytes that
+ * are used in order to encode the previous element length.
+ * 'ptr' must point to the prevlen prefix of an entry (that encodes the
+ * length of the previous entry in order to navigate the elements backward).
+ * The length of the previous entry is stored in 'prevlen', the number of
+ * bytes needed to encode the previous entry length are stored in
+ * 'prevlensize'. */
#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do { \
ZIP_DECODE_PREVLENSIZE(ptr, prevlensize); \
if ((prevlensize) == 1) { \
(prevlen) = (ptr)[0]; \
} else if ((prevlensize) == 5) { \
- assert(sizeof((prevlensize)) == 4); \
+ assert(sizeof((prevlen)) == 4); \
memcpy(&(prevlen), ((char*)(ptr)) + 1, 4); \
memrev32ifbe(&prevlen); \
} \
} while(0);
-/* Return the difference in number of bytes needed to store the length of the
- * previous element 'len', in the entry pointed to by 'p'. */
-static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
+/* Given a pointer 'p' to the prevlen info that prefixes an entry, this
+ * function returns the difference in number of bytes needed to encode
+ * the prevlen if the previous entry changes of size.
+ *
+ * So if A is the number of bytes used right now to encode the 'prevlen'
+ * field.
+ *
+ * And B is the number of bytes that are needed in order to encode the
+ * 'prevlen' if the previous element will be updated to one of size 'len'.
+ *
+ * Then the function returns B - A
+ *
+ * So the function returns a positive number if more space is needed,
+ * a negative number if less space is needed, or zero if the same space
+ * is needed. */
+int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
unsigned int prevlensize;
ZIP_DECODE_PREVLENSIZE(p, prevlensize);
- return zipPrevEncodeLength(NULL, len) - prevlensize;
+ return zipStorePrevEntryLength(NULL, len) - prevlensize;
}
/* Return the total number of bytes used by the entry pointed to by 'p'. */
-static unsigned int zipRawEntryLength(unsigned char *p) {
+unsigned int zipRawEntryLength(unsigned char *p) {
unsigned int prevlensize, encoding, lensize, len;
ZIP_DECODE_PREVLENSIZE(p, prevlensize);
ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
@@ -325,7 +477,7 @@ static unsigned int zipRawEntryLength(unsigned char *p) {
/* Check if string pointed to by 'entry' can be encoded as an integer.
* Stores the integer value in 'v' and its encoding in 'encoding'. */
-static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
+int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;
if (entrylen >= 32 || entrylen == 0) return 0;
@@ -352,7 +504,7 @@ static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long
}
/* Store integer 'value' at 'p', encoded as 'encoding' */
-static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
+void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64;
@@ -382,7 +534,7 @@ static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encodi
}
/* Read integer encoded as 'encoding' from 'p' */
-static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
+int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64, ret = 0;
@@ -414,7 +566,7 @@ static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
}
/* Return a struct with all information about an entry. */
-static void zipEntry(unsigned char *p, zlentry *e) {
+void zipEntry(unsigned char *p, zlentry *e) {
ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
@@ -434,7 +586,7 @@ unsigned char *ziplistNew(void) {
}
/* Resize the ziplist. */
-static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
+unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
zl = zrealloc(zl,len);
ZIPLIST_BYTES(zl) = intrev32ifbe(len);
zl[len-1] = ZIP_END;
@@ -449,8 +601,8 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
* causes a realloc and memmove). However, encoding the prevlen may require
* that this entry is grown as well. This effect may cascade throughout
* the ziplist when there are consecutive entries with a size close to
- * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every
- * consecutive entry.
+ * ZIP_BIG_PREVLEN, so we need to check that the prevlen can be encoded in
+ * every consecutive entry.
*
* Note that this effect can also happen in reverse, where the bytes required
* to encode the prevlen field can shrink. This effect is deliberately ignored,
@@ -461,7 +613,7 @@ static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
*
* The pointer "p" points to the first entry that does NOT need to be
* updated, i.e. consecutive fields MAY need an update. */
-static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
+unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
size_t offset, noffset, extra;
unsigned char *np;
@@ -470,7 +622,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
while (p[0] != ZIP_END) {
zipEntry(p, &cur);
rawlen = cur.headersize + cur.len;
- rawlensize = zipPrevEncodeLength(NULL,rawlen);
+ rawlensize = zipStorePrevEntryLength(NULL,rawlen);
/* Abort if there is no next entry. */
if (p[rawlen] == ZIP_END) break;
@@ -501,7 +653,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
memmove(np+rawlensize,
np+next.prevrawlensize,
curlen-noffset-next.prevrawlensize-1);
- zipPrevEncodeLength(np,rawlen);
+ zipStorePrevEntryLength(np,rawlen);
/* Advance the cursor */
p += rawlen;
@@ -510,9 +662,9 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
if (next.prevrawlensize > rawlensize) {
/* This would result in shrinking, which we want to avoid.
* So, set "rawlen" in the available bytes. */
- zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
+ zipStorePrevEntryLengthLarge(p+rawlen,rawlen);
} else {
- zipPrevEncodeLength(p+rawlen,rawlen);
+ zipStorePrevEntryLength(p+rawlen,rawlen);
}
/* Stop here, as the raw length of "next" has not changed. */
@@ -523,7 +675,7 @@ static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p
}
/* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
-static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
+unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
unsigned int i, totlen, deleted = 0;
size_t offset;
int nextdiff = 0;
@@ -535,7 +687,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
deleted++;
}
- totlen = p-first.p;
+ totlen = p-first.p; /* Bytes taken by the element(s) to delete. */
if (totlen > 0) {
if (p[0] != ZIP_END) {
/* Storing `prevrawlen` in this entry may increase or decrease the
@@ -543,8 +695,13 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
* There always is room to store this, because it was previously
* stored by an entry that is now being deleted. */
nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
+
+ /* Note that there is always space when p jumps backward: if
+ * the new previous entry is large, one of the deleted elements
+ * had a 5 bytes prevlen header, so there is for sure at least
+ * 5 bytes free and we need just 4. */
p -= nextdiff;
- zipPrevEncodeLength(p,first.prevrawlen);
+ zipStorePrevEntryLength(p,first.prevrawlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
@@ -583,7 +740,7 @@ static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsig
}
/* Insert item at "p". */
-static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
+unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
unsigned int prevlensize, prevlen = 0;
size_t offset;
@@ -609,19 +766,24 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
/* 'encoding' is set to the appropriate integer encoding */
reqlen = zipIntSize(encoding);
} else {
- /* 'encoding' is untouched, however zipEncodeLength will use the
+ /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
* string length to figure out how to encode it. */
reqlen = slen;
}
/* We need space for both the length of the previous entry and
* the length of the payload. */
- reqlen += zipPrevEncodeLength(NULL,prevlen);
- reqlen += zipEncodeLength(NULL,encoding,slen);
+ reqlen += zipStorePrevEntryLength(NULL,prevlen);
+ reqlen += zipStoreEntryEncoding(NULL,encoding,slen);
/* When the insert position is not equal to the tail, we need to
* make sure that the next entry can hold this entry's length in
* its prevlen field. */
+ int forcelarge = 0;
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
+ if (nextdiff == -4 && reqlen < 4) {
+ nextdiff = 0;
+ forcelarge = 1;
+ }
/* Store offset because a realloc may change the address of zl. */
offset = p-zl;
@@ -634,7 +796,10 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
/* Encode this entry's raw length in the next entry. */
- zipPrevEncodeLength(p+reqlen,reqlen);
+ if (forcelarge)
+ zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
+ else
+ zipStorePrevEntryLength(p+reqlen,reqlen);
/* Update offset for tail */
ZIPLIST_TAIL_OFFSET(zl) =
@@ -662,8 +827,8 @@ static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsig
}
/* Write the entry */
- p += zipPrevEncodeLength(p,prevlen);
- p += zipEncodeLength(p,encoding,slen);
+ p += zipStorePrevEntryLength(p,prevlen);
+ p += zipStoreEntryEncoding(p,encoding,slen);
if (ZIP_IS_STR(encoding)) {
memcpy(p,s,slen);
} else {
@@ -1029,7 +1194,7 @@ void ziplistRepr(unsigned char *zl) {
printf(
"{total bytes %d} "
- "{length %u}\n"
+ "{num entries %u}\n"
"{tail offset %u}\n",
intrev32ifbe(ZIPLIST_BYTES(zl)),
intrev16ifbe(ZIPLIST_LENGTH(zl)),
@@ -1038,16 +1203,15 @@ void ziplistRepr(unsigned char *zl) {
while(*p != ZIP_END) {
zipEntry(p, &entry);
printf(
- "{"
- "addr 0x%08lx, "
- "index %2d, "
- "offset %5ld, "
- "rl: %5u, "
- "hs %2u, "
- "pl: %5u, "
- "pls: %2u, "
- "payload %5u"
- "} ",
+ "{\n"
+ "\taddr 0x%08lx,\n"
+ "\tindex %2d,\n"
+ "\toffset %5ld,\n"
+ "\thdr+entry len: %5u,\n"
+ "\thdr len%2u,\n"
+ "\tprevrawlen: %5u,\n"
+ "\tprevrawlensize: %2u,\n"
+ "\tpayload %5u\n",
(long unsigned)p,
index,
(unsigned long) (p-zl),
@@ -1056,8 +1220,14 @@ void ziplistRepr(unsigned char *zl) {
entry.prevrawlen,
entry.prevrawlensize,
entry.len);
+ printf("\tbytes: ");
+ for (unsigned int i = 0; i < entry.headersize+entry.len; i++) {
+ printf("%02x|",p[i]);
+ }
+ printf("\n");
p += entry.headersize;
if (ZIP_IS_STR(entry.encoding)) {
+ printf("\t[str]");
if (entry.len > 40) {
if (fwrite(p,40,1,stdout) == 0) perror("fwrite");
printf("...");
@@ -1066,9 +1236,9 @@ void ziplistRepr(unsigned char *zl) {
fwrite(p,entry.len,1,stdout) == 0) perror("fwrite");
}
} else {
- printf("%lld", (long long) zipLoadInteger(p,entry.encoding));
+ printf("\t[int]%lld", (long long) zipLoadInteger(p,entry.encoding));
}
- printf("\n");
+ printf("\n}\n");
p += entry.len;
index++;
}
diff --git a/src/ziplist.h b/src/ziplist.h
index e92b5e783..964a47f6d 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -28,6 +28,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef _ZIPLIST_H
+#define _ZIPLIST_H
+
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
@@ -45,7 +48,10 @@ unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int sle
unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
unsigned int ziplistLen(unsigned char *zl);
size_t ziplistBlobLen(unsigned char *zl);
+void ziplistRepr(unsigned char *zl);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);
#endif
+
+#endif /* _ZIPLIST_H */
diff --git a/src/zmalloc.c b/src/zmalloc.c
index 640ee19e2..308774d86 100644
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
/* This function provide us access to the original libc free(). This is useful
* for instance to free results obtained by backtrace_symbols(). We need
@@ -43,6 +44,7 @@ void zlibc_free(void *ptr) {
#include <pthread.h>
#include "config.h"
#include "zmalloc.h"
+#include "atomicvar.h"
#ifdef HAVE_MALLOC_SIZE
#define PREFIX_SIZE (0)
@@ -65,51 +67,23 @@ void zlibc_free(void *ptr) {
#define calloc(count,size) je_calloc(count,size)
#define realloc(ptr,size) je_realloc(ptr,size)
#define free(ptr) je_free(ptr)
-#endif
-
-#if defined(__ATOMIC_RELAXED)
-#define update_zmalloc_stat_add(__n) __atomic_add_fetch(&used_memory, (__n), __ATOMIC_RELAXED)
-#define update_zmalloc_stat_sub(__n) __atomic_sub_fetch(&used_memory, (__n), __ATOMIC_RELAXED)
-#elif defined(HAVE_ATOMIC)
-#define update_zmalloc_stat_add(__n) __sync_add_and_fetch(&used_memory, (__n))
-#define update_zmalloc_stat_sub(__n) __sync_sub_and_fetch(&used_memory, (__n))
-#else
-#define update_zmalloc_stat_add(__n) do { \
- pthread_mutex_lock(&used_memory_mutex); \
- used_memory += (__n); \
- pthread_mutex_unlock(&used_memory_mutex); \
-} while(0)
-
-#define update_zmalloc_stat_sub(__n) do { \
- pthread_mutex_lock(&used_memory_mutex); \
- used_memory -= (__n); \
- pthread_mutex_unlock(&used_memory_mutex); \
-} while(0)
-
+#define mallocx(size,flags) je_mallocx(size,flags)
+#define dallocx(ptr,flags) je_dallocx(ptr,flags)
#endif
#define update_zmalloc_stat_alloc(__n) do { \
size_t _n = (__n); \
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
- if (zmalloc_thread_safe) { \
- update_zmalloc_stat_add(_n); \
- } else { \
- used_memory += _n; \
- } \
+ atomicIncr(used_memory,__n); \
} while(0)
#define update_zmalloc_stat_free(__n) do { \
size_t _n = (__n); \
if (_n&(sizeof(long)-1)) _n += sizeof(long)-(_n&(sizeof(long)-1)); \
- if (zmalloc_thread_safe) { \
- update_zmalloc_stat_sub(_n); \
- } else { \
- used_memory -= _n; \
- } \
+ atomicDecr(used_memory,__n); \
} while(0)
static size_t used_memory = 0;
-static int zmalloc_thread_safe = 0;
pthread_mutex_t used_memory_mutex = PTHREAD_MUTEX_INITIALIZER;
static void zmalloc_default_oom(size_t size) {
@@ -135,6 +109,24 @@ void *zmalloc(size_t size) {
#endif
}
+/* Allocation and free functions that bypass the thread cache
+ * and go straight to the allocator arena bins.
+ * Currently implemented only for jemalloc. Used for online defragmentation. */
+#ifdef HAVE_DEFRAG
+void *zmalloc_no_tcache(size_t size) {
+ void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);
+ if (!ptr) zmalloc_oom_handler(size);
+ update_zmalloc_stat_alloc(zmalloc_size(ptr));
+ return ptr;
+}
+
+void zfree_no_tcache(void *ptr) {
+ if (ptr == NULL) return;
+ update_zmalloc_stat_free(zmalloc_size(ptr));
+ dallocx(ptr, MALLOCX_TCACHE_NONE);
+}
+#endif
+
void *zcalloc(size_t size) {
void *ptr = calloc(1, size+PREFIX_SIZE);
@@ -173,7 +165,7 @@ void *zrealloc(void *ptr, size_t size) {
*((size_t*)newptr) = size;
update_zmalloc_stat_free(oldsize);
- update_zmalloc_stat_alloc(size);
+ update_zmalloc_stat_alloc(size+PREFIX_SIZE);
return (char*)newptr+PREFIX_SIZE;
#endif
}
@@ -190,6 +182,9 @@ size_t zmalloc_size(void *ptr) {
if (size&(sizeof(long)-1)) size += sizeof(long)-(size&(sizeof(long)-1));
return size+PREFIX_SIZE;
}
+size_t zmalloc_usable(void *ptr) {
+ return zmalloc_size(ptr)-PREFIX_SIZE;
+}
#endif
void zfree(void *ptr) {
@@ -220,27 +215,10 @@ char *zstrdup(const char *s) {
size_t zmalloc_used_memory(void) {
size_t um;
-
- if (zmalloc_thread_safe) {
-#if defined(__ATOMIC_RELAXED) || defined(HAVE_ATOMIC)
- um = update_zmalloc_stat_add(0);
-#else
- pthread_mutex_lock(&used_memory_mutex);
- um = used_memory;
- pthread_mutex_unlock(&used_memory_mutex);
-#endif
- }
- else {
- um = used_memory;
- }
-
+ atomicGet(used_memory,um);
return um;
}
-void zmalloc_enable_thread_safeness(void) {
- zmalloc_thread_safe = 1;
-}
-
void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
zmalloc_oom_handler = oom_handler;
}
@@ -323,23 +301,61 @@ size_t zmalloc_get_rss(void) {
}
#endif
-/* Fragmentation = RSS / allocated-bytes */
-float zmalloc_get_fragmentation_ratio(size_t rss) {
- return (float)rss/zmalloc_used_memory();
+#if defined(USE_JEMALLOC)
+int zmalloc_get_allocator_info(size_t *allocated,
+ size_t *active,
+ size_t *resident) {
+ uint64_t epoch = 1;
+ size_t sz;
+ *allocated = *resident = *active = 0;
+ /* Update the statistics cached by mallctl. */
+ sz = sizeof(epoch);
+ je_mallctl("epoch", &epoch, &sz, &epoch, sz);
+ sz = sizeof(size_t);
+ /* Unlike RSS, this does not include RSS from shared libraries and other non
+ * heap mappings. */
+ je_mallctl("stats.resident", resident, &sz, NULL, 0);
+ /* Unlike resident, this doesn't not include the pages jemalloc reserves
+ * for re-use (purge will clean that). */
+ je_mallctl("stats.active", active, &sz, NULL, 0);
+ /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
+ * into account all allocations done by this process (not only zmalloc). */
+ je_mallctl("stats.allocated", allocated, &sz, NULL, 0);
+ return 1;
}
+#else
+int zmalloc_get_allocator_info(size_t *allocated,
+ size_t *active,
+ size_t *resident) {
+ *allocated = *resident = *active = 0;
+ return 1;
+}
+#endif
/* Get the sum of the specified field (converted form kb to bytes) in
* /proc/self/smaps. The field must be specified with trailing ":" as it
* apperas in the smaps output.
*
- * Example: zmalloc_get_smap_bytes_by_field("Rss:");
+ * If a pid is specified, the information is extracted for such a pid,
+ * otherwise if pid is -1 the information is reported is about the
+ * current process.
+ *
+ * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1);
*/
#if defined(HAVE_PROC_SMAPS)
-size_t zmalloc_get_smap_bytes_by_field(char *field) {
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
char line[1024];
size_t bytes = 0;
- FILE *fp = fopen("/proc/self/smaps","r");
int flen = strlen(field);
+ FILE *fp;
+
+ if (pid == -1) {
+ fp = fopen("/proc/self/smaps","r");
+ } else {
+ char filename[128];
+ snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid);
+ fp = fopen(filename,"r");
+ }
if (!fp) return 0;
while(fgets(line,sizeof(line),fp) != NULL) {
@@ -355,18 +371,19 @@ size_t zmalloc_get_smap_bytes_by_field(char *field) {
return bytes;
}
#else
-size_t zmalloc_get_smap_bytes_by_field(char *field) {
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
((void) field);
+ ((void) pid);
return 0;
}
#endif
-size_t zmalloc_get_private_dirty(void) {
- return zmalloc_get_smap_bytes_by_field("Private_Dirty:");
+size_t zmalloc_get_private_dirty(long pid) {
+ return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
}
/* Returns the size of physical memory (RAM) in bytes.
- * It looks ugly, but this is the cleanest way to achive cross platform results.
+ * It looks ugly, but this is the cleanest way to achieve cross platform results.
* Cleaned up from:
*
* http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
@@ -405,7 +422,7 @@ size_t zmalloc_get_memory_size(void) {
mib[0] = CTL_HW;
#if defined(HW_REALMEM)
mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
-#elif defined(HW_PYSMEM)
+#elif defined(HW_PHYSMEM)
mib[1] = HW_PHYSMEM; /* Others. ------------------ */
#endif
unsigned int size = 0; /* 32-bit */
@@ -413,8 +430,9 @@ size_t zmalloc_get_memory_size(void) {
if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
return (size_t)size;
return 0L; /* Failed? */
-#endif /* sysctl and sysconf variants */
-
+#else
+ return 0L; /* Unknown method to get the data. */
+#endif
#else
return 0L; /* Unknown OS. */
#endif
diff --git a/src/zmalloc.h b/src/zmalloc.h
index a47ea6ccf..9c9229907 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -63,6 +63,18 @@
#ifndef ZMALLOC_LIB
#define ZMALLOC_LIB "libc"
+#ifdef __GLIBC__
+#include <malloc.h>
+#define HAVE_MALLOC_SIZE 1
+#define zmalloc_size(p) malloc_usable_size(p)
+#endif
+#endif
+
+/* We can enable the Redis defrag capabilities only if we are using Jemalloc
+ * and the version used is our special version modified for Redis having
+ * the ability to return per-allocation fragmentation hints. */
+#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT)
+#define HAVE_DEFRAG
#endif
void *zmalloc(size_t size);
@@ -71,17 +83,24 @@ void *zrealloc(void *ptr, size_t size);
void zfree(void *ptr);
char *zstrdup(const char *s);
size_t zmalloc_used_memory(void);
-void zmalloc_enable_thread_safeness(void);
void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
-float zmalloc_get_fragmentation_ratio(size_t rss);
size_t zmalloc_get_rss(void);
-size_t zmalloc_get_private_dirty(void);
-size_t zmalloc_get_smap_bytes_by_field(char *field);
+int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident);
+size_t zmalloc_get_private_dirty(long pid);
+size_t zmalloc_get_smap_bytes_by_field(char *field, long pid);
size_t zmalloc_get_memory_size(void);
void zlibc_free(void *ptr);
+#ifdef HAVE_DEFRAG
+void zfree_no_tcache(void *ptr);
+void *zmalloc_no_tcache(size_t size);
+#endif
+
#ifndef HAVE_MALLOC_SIZE
size_t zmalloc_size(void *ptr);
+size_t zmalloc_usable(void *ptr);
+#else
+#define zmalloc_usable(p) zmalloc_size(p)
#endif
#endif /* __ZMALLOC_H */
diff --git a/tests/assets/default.conf b/tests/assets/default.conf
index 81f8470bc..d7b8a75c6 100644
--- a/tests/assets/default.conf
+++ b/tests/assets/default.conf
@@ -1,5 +1,6 @@
# Redis configuration for testing.
+always-show-logo yes
notify-keyspace-events KEA
daemonize no
pidfile /var/run/redis.pid
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index 0647914dc..f9a0e180d 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -42,6 +42,16 @@ proc get_myself id {
return {}
}
+# Get a specific node by ID by parsing the CLUSTER NODES output
+# of the instance Number 'instance_id'
+proc get_node_by_id {instance_id node_id} {
+ set nodes [get_cluster_nodes $instance_id]
+ foreach n $nodes {
+ if {[dict get $n id] eq $node_id} {return $n}
+ }
+ return {}
+}
+
# Return the value of the specified CLUSTER INFO field.
proc CI {n field} {
get_info_field [R $n cluster info] $field
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 69a160c4f..93603ddc9 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -17,10 +17,12 @@ proc main {} {
}
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
puts $::errorInfo
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
index 8811762c6..68fba135e 100644
--- a/tests/cluster/tests/04-resharding.tcl
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -13,6 +13,24 @@ test "Cluster is up" {
assert_cluster_state ok
}
+test "Enable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly yes
+ # We use "appendfsync no" because it's fast but also guarantees that
+ # write(2) is performed before replying to client.
+ R $id config set appendfsync no
+ }
+
+ foreach_redis_id id {
+ wait_for_condition 1000 500 {
+ [RI $id aof_rewrite_in_progress] == 0 &&
+ [RI $id aof_enabled] == 1
+ } else {
+ fail "Failed to enable AOF on instance #$id"
+ }
+ }
+}
+
# Return nno-zero if the specified PID is about a process still in execution,
# otherwise 0 is returned.
proc process_is_running {pid} {
@@ -41,6 +59,7 @@ array set content {}
set tribpid {}
test "Cluster consistency during live resharding" {
+ set ele 0
for {set j 0} {$j < $numops} {incr j} {
# Trigger the resharding once we execute half the ops.
if {$tribpid ne {} &&
@@ -54,12 +73,12 @@ test "Cluster consistency during live resharding" {
flush stdout
set target [dict get [get_myself [randomInt 5]] id]
set tribpid [lindex [exec \
- ../../../src/redis-trib.rb reshard \
- --from all \
- --to $target \
- --slots 100 \
- --yes \
+ ../../../src/redis-cli --cluster reshard \
127.0.0.1:[get_instance_attrib redis 0 port] \
+ --cluster-from all \
+ --cluster-to $target \
+ --cluster-slots 100 \
+ --cluster-yes \
| [info nameofexecutable] \
../tests/helpers/onlydots.tcl \
&] 0]
@@ -68,7 +87,7 @@ test "Cluster consistency during live resharding" {
# Write random data to random list.
set listid [randomInt $numkeys]
set key "key:$listid"
- set ele [randomValue]
+ incr ele
# We write both with Lua scripts and with plain commands.
# This way we are able to stress Lua -> Redis command invocation
# as well, that has tests to prevent Lua to write into wrong
@@ -97,6 +116,57 @@ test "Cluster consistency during live resharding" {
test "Verify $numkeys keys for consistency with logical content" {
# Check that the Redis Cluster content matches our logical content.
foreach {key value} [array get content] {
- assert {[$cluster lrange $key 0 -1] eq $value}
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Crash and restart all the instances" {
+ foreach_redis_id id {
+ kill_instance redis $id
+ restart_instance redis $id
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Verify $numkeys keys after the crash & restart" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ if {[$cluster lrange $key 0 -1] ne $value} {
+ fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
+ }
+ }
+}
+
+test "Disable AOF in all the instances" {
+ foreach_redis_id id {
+ R $id config set appendonly no
+ }
+}
+
+test "Verify slaves consistency" {
+ set verified_masters 0
+ foreach_redis_id id {
+ set role [R $id role]
+ lassign $role myrole myoffset slaves
+ if {$myrole eq {slave}} continue
+ set masterport [get_instance_attrib redis $id port]
+ set masterdigest [R $id debug digest]
+ foreach_redis_id sid {
+ set srole [R $sid role]
+ if {[lindex $srole 0] eq {master}} continue
+ if {[lindex $srole 2] != $masterport} continue
+ wait_for_condition 1000 500 {
+ [R $sid debug digest] eq $masterdigest
+ } else {
+ fail "Master and slave data digest are different"
+ }
+ incr verified_masters
+ }
}
+ assert {$verified_masters >= 5}
}
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
index 6efedce5d..bcb0fa1ea 100644
--- a/tests/cluster/tests/05-slave-selection.tcl
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -92,3 +92,80 @@ test "Node #10 should eventually replicate node #5" {
fail "#10 didn't became slave of #5"
}
}
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 3 master and 15 slaves, so that we have 5
+# slaves for eatch master.
+test "Create a 3 nodes cluster" {
+ create_cluster 3 15
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "The first master has actually 5 slaves" {
+ assert {[llength [lindex [R 0 role] 2]] == 5}
+}
+
+test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
+ set port0 [get_instance_attrib redis 0 port]
+ assert {[lindex [R 3 role] 2] == $port0}
+ assert {[lindex [R 6 role] 2] == $port0}
+ assert {[lindex [R 9 role] 2] == $port0}
+ assert {[lindex [R 12 role] 2] == $port0}
+ assert {[lindex [R 15 role] 2] == $port0}
+}
+
+test {Instance #3, #6, #9, #12 and #15 synced with the master} {
+ wait_for_condition 1000 50 {
+ [RI 3 master_link_status] eq {up} &&
+ [RI 6 master_link_status] eq {up} &&
+ [RI 9 master_link_status] eq {up} &&
+ [RI 12 master_link_status] eq {up} &&
+ [RI 15 master_link_status] eq {up}
+ } else {
+ fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
+ }
+}
+
+proc master_detected {instances} {
+ foreach instance [dict keys $instances] {
+ if {[RI $instance role] eq {master}} {
+ return true
+ }
+ }
+
+ return false
+}
+
+test "New Master down consecutively" {
+ set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
+
+ set loops [expr {[dict size $instances]-1}]
+ for {set i 0} {$i < $loops} {incr i} {
+ set master_id -1
+ foreach instance [dict keys $instances] {
+ if {[RI $instance role] eq {master}} {
+ set master_id $instance
+ break;
+ }
+ }
+
+ if {$master_id eq -1} {
+ fail "no master detected, #loop $i"
+ }
+
+ set instances [dict remove $instances $master_id]
+
+ kill_instance redis $master_id
+ wait_for_condition 1000 50 {
+ [master_detected $instances]
+ } else {
+ failover "No failover detected when master $master_id fails"
+ }
+
+ assert_cluster_state ok
+ }
+}
diff --git a/tests/cluster/tests/07-replica-migration.tcl b/tests/cluster/tests/07-replica-migration.tcl
index 2ec0742b5..68231cd28 100644
--- a/tests/cluster/tests/07-replica-migration.tcl
+++ b/tests/cluster/tests/07-replica-migration.tcl
@@ -45,3 +45,59 @@ foreach_redis_id id {
}
}
}
+
+# Now test the migration to a master which used to be a slave, after
+# a failver.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 10 slaves, so that we have 2
+# slaves for each master.
+test "Create a 5 nodes cluster" {
+ create_cluster 5 10
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Kill slave #7 of master #2. Only slave left is #12 now" {
+ kill_instance redis 7
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing master node #2, #12 should failover" {
+ kill_instance redis 2
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance 12 is now a master without slaves" {
+ assert {[RI 12 role] eq {master}}
+}
+
+# The remaining instance is now without slaves. Some other slave
+# should migrate to it.
+
+test "Master #12 should get at least one migrated replica" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 12 role] 2]] >= 1
+ } else {
+ fail "Master #12 has no replicas"
+ }
+}
diff --git a/tests/cluster/tests/10-manual-failover.tcl b/tests/cluster/tests/10-manual-failover.tcl
new file mode 100644
index 000000000..5441b79f3
--- /dev/null
+++ b/tests/cluster/tests/10-manual-failover.tcl
@@ -0,0 +1,192 @@
+# Check the manual failover
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+set numkeys 50000
+set numops 10000
+set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]]
+catch {unset content}
+array set content {}
+
+test "Send CLUSTER FAILOVER to #5, during load" {
+ for {set j 0} {$j < $numops} {incr j} {
+ # Write random data to random list.
+ set listid [randomInt $numkeys]
+ set key "key:$listid"
+ set ele [randomValue]
+ # We write both with Lua scripts and with plain commands.
+ # This way we are able to stress Lua -> Redis command invocation
+ # as well, that has tests to prevent Lua to write into wrong
+ # hash slots.
+ if {$listid % 2} {
+ $cluster rpush $key $ele
+ } else {
+ $cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
+ }
+ lappend content($key) $ele
+
+ if {($j % 1000) == 0} {
+ puts -nonewline W; flush stdout
+ }
+
+ if {$j == $numops/2} {R 5 cluster failover}
+ }
+}
+
+test "Wait for failover" {
+ wait_for_condition 1000 50 {
+ [CI 1 cluster_current_epoch] > $current_epoch
+ } else {
+ fail "No failover detected"
+ }
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 1
+}
+
+test "Instance #5 is now a master" {
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Verify $numkeys keys for consistency with logical content" {
+ # Check that the Redis Cluster content matches our logical content.
+ foreach {key value} [array get content] {
+ assert {[$cluster lrange $key 0 -1] eq $value}
+ }
+}
+
+test "Instance #0 gets converted into a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave}
+ } else {
+ fail "Old master was not converted into slave"
+ }
+}
+
+## Check that manual failover does not happen if we can't talk with the master.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover
+}
+
+test "Instance #5 is still a slave after some time (no failover)" {
+ after 5000
+ assert {[RI 5 role] eq {master}}
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
+
+## Check with "force" failover happens anyway.
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "Make instance #0 unreachable without killing it" {
+ R 0 deferred 1
+ R 0 DEBUG SLEEP 10
+}
+
+test "Send CLUSTER FAILOVER to instance #5" {
+ R 5 cluster failover force
+}
+
+test "Instance #5 is a master after some time" {
+ wait_for_condition 1000 50 {
+ [RI 5 role] eq {master}
+ } else {
+ fail "Instance #5 is not a master after some time regardless of FORCE"
+ }
+}
+
+test "Wait for instance #0 to return back alive" {
+ R 0 deferred 0
+ assert {[R 0 read] eq {OK}}
+}
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
new file mode 100644
index 000000000..f567c6962
--- /dev/null
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -0,0 +1,59 @@
+# Manual takeover test
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Killing majority of master nodes" {
+ kill_instance redis 0
+ kill_instance redis 1
+ kill_instance redis 2
+}
+
+test "Cluster should eventually be down" {
+ assert_cluster_state fail
+}
+
+test "Use takeover to bring slaves back" {
+ R 5 cluster failover takeover
+ R 6 cluster failover takeover
+ R 7 cluster failover takeover
+}
+
+test "Cluster should eventually be up again" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 4
+}
+
+test "Instance #5, #6, #7 are now masters" {
+ assert {[RI 5 role] eq {master}}
+ assert {[RI 6 role] eq {master}}
+ assert {[RI 7 role] eq {master}}
+}
+
+test "Restarting the previously killed master nodes" {
+ restart_instance redis 0
+ restart_instance redis 1
+ restart_instance redis 2
+}
+
+test "Instance #0, #1, #2 gets converted into a slaves" {
+ wait_for_condition 1000 50 {
+ [RI 0 role] eq {slave} && [RI 1 role] eq {slave} && [RI 2 role] eq {slave}
+ } else {
+ fail "Old masters not converted into slaves"
+ }
+}
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
new file mode 100644
index 000000000..3d8b7b04b
--- /dev/null
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -0,0 +1,64 @@
+# Replica migration test #2.
+#
+# Check that the status of master that can be targeted by replica migration
+# is acquired again, after being getting slots again, in a cluster where the
+# other masters have slaves.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+ create_cluster 5 15
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+ foreach_redis_id id {
+ if {$id < 5} {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] >= 2
+ } else {
+ fail "Master #$id does not have 2 slaves as expected"
+ }
+ }
+ }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+ set output [exec \
+ ../../../src/redis-cli --cluster rebalance \
+ 127.0.0.1:[get_instance_attrib redis 0 port] \
+ --cluster-weight ${master0_id}=0 >@ stdout ]
+}
+
+test "Master #0 should lose its replicas" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] == 0
+ } else {
+ fail "Master #0 still has replicas"
+ }
+}
+
+test "Resharding back some slot to master #0" {
+ # Wait for the cluster config to propagate before attempting a
+ # new resharding.
+ after 10000
+ set output [exec \
+ ../../../src/redis-cli --cluster rebalance \
+ 127.0.0.1:[get_instance_attrib redis 0 port] \
+ --cluster-weight ${master0_id}=.01 \
+ --cluster-use-empty-masters >@ stdout]
+}
+
+test "Master #0 should re-acquire one or more replicas" {
+ wait_for_condition 1000 50 {
+ [llength [lindex [R 0 role] 2]] >= 1
+ } else {
+ fail "Master #0 has no has replicas"
+ }
+}
diff --git a/tests/cluster/tests/13-no-failover-option.tcl b/tests/cluster/tests/13-no-failover-option.tcl
new file mode 100644
index 000000000..befa598d1
--- /dev/null
+++ b/tests/cluster/tests/13-no-failover-option.tcl
@@ -0,0 +1,61 @@
+# Check that the no-failover option works
+
+source "../tests/includes/init-tests.tcl"
+
+test "Create a 5 nodes cluster" {
+ create_cluster 5 5
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "Cluster is writable" {
+ cluster_write_test 0
+}
+
+test "Instance #5 is a slave" {
+ assert {[RI 5 role] eq {slave}}
+
+ # Configure it to never failover the master
+ R 5 CONFIG SET cluster-slave-no-failover yes
+}
+
+test "Instance #5 synced with the master" {
+ wait_for_condition 1000 50 {
+ [RI 5 master_link_status] eq {up}
+ } else {
+ fail "Instance #5 master link status is not up"
+ }
+}
+
+test "The nofailover flag is propagated" {
+ set slave5_id [dict get [get_myself 5] id]
+
+ foreach_redis_id id {
+ wait_for_condition 1000 50 {
+ [has_flag [get_node_by_id $id $slave5_id] nofailover]
+ } else {
+ fail "Instance $id can't see the nofailover flag of slave"
+ }
+ }
+}
+
+set current_epoch [CI 1 cluster_current_epoch]
+
+test "Killing one master node" {
+ kill_instance redis 0
+}
+
+test "Cluster should be still down after some time" {
+ after 10000
+ assert_cluster_state fail
+}
+
+test "Instance #5 is still a slave" {
+ assert {[RI 5 role] eq {slave}}
+}
+
+test "Restarting the previously killed master node" {
+ restart_instance redis 0
+}
diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl
index 65fc806e1..466ab8f25 100644
--- a/tests/cluster/tests/includes/init-tests.tcl
+++ b/tests/cluster/tests/includes/init-tests.tcl
@@ -27,10 +27,17 @@ test "Cluster nodes are reachable" {
test "Cluster nodes hard reset" {
foreach_redis_id id {
+ if {$::valgrind} {
+ set node_timeout 10000
+ } else {
+ set node_timeout 3000
+ }
catch {R $id flushall} ; # May fail for readonly slaves.
+ R $id MULTI
R $id cluster reset hard
R $id cluster set-config-epoch [expr {$id+1}]
- R $id config set cluster-node-timeout 3000
+ R $id EXEC
+ R $id config set cluster-node-timeout $node_timeout
R $id config set cluster-slave-validity-factor 10
R $id config rewrite
}
diff --git a/tests/helpers/bg_block_op.tcl b/tests/helpers/bg_block_op.tcl
new file mode 100644
index 000000000..238d3874f
--- /dev/null
+++ b/tests/helpers/bg_block_op.tcl
@@ -0,0 +1,52 @@
+source tests/support/redis.tcl
+source tests/support/util.tcl
+
+# This function sometimes writes sometimes blocking-reads from lists/sorted
+# sets. There are multiple processes like this executing at the same time
+# so that we have some chance to trap some corner condition if there is
+# a regression. For this to happen it is important that we narrow the key
+# space to just a few elements, and balance the operations so that it is
+# unlikely that lists and zsets just get more data without ever causing
+# blocking.
+proc bg_block_op {host port db ops} {
+ set r [redis $host $port]
+ $r select $db
+
+ for {set j 0} {$j < $ops} {incr j} {
+
+ # List side
+ set k list_[randomInt 10]
+ set k2 list_[randomInt 10]
+ set v [randomValue]
+
+ randpath {
+ randpath {
+ $r rpush $k $v
+ } {
+ $r lpush $k $v
+ }
+ } {
+ $r blpop $k 2
+ } {
+ $r blpop $k $k2 2
+ }
+
+ # Zset side
+ set k zset_[randomInt 10]
+ set k2 zset_[randomInt 10]
+ set v1 [randomValue]
+ set v2 [randomValue]
+
+ randpath {
+ $r zadd $k [randomInt 10000] $v
+ } {
+ $r zadd $k [randomInt 10000] $v [randomInt 10000] $v2
+ } {
+ $r bzpopmin $k 2
+ } {
+ $r bzpopmax $k 2
+ }
+ }
+}
+
+bg_block_op [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3]
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 426508f33..357b34818 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -16,8 +16,10 @@ source ../support/server.tcl
source ../support/test.tcl
set ::verbose 0
+set ::valgrind 0
set ::pause_on_error 0
set ::simulate_error 0
+set ::failed 0
set ::sentinel_instances {}
set ::redis_instances {}
set ::sentinel_base_port 20000
@@ -32,6 +34,25 @@ if {[catch {cd tmp}]} {
exit 1
}
+# Execute the specified instance of the server specified by 'type', using
+# the provided configuration file. Returns the PID of the process.
+proc exec_instance {type cfgfile} {
+ if {$type eq "redis"} {
+ set prgname redis-server
+ } elseif {$type eq "sentinel"} {
+ set prgname redis-sentinel
+ } else {
+ error "Unknown instance type."
+ }
+
+ if {$::valgrind} {
+ set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
+ } else {
+ set pid [exec ../../../src/${prgname} $cfgfile &]
+ }
+ return $pid
+}
+
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
for {set j 0} {$j < $count} {incr j} {
@@ -58,14 +79,7 @@ proc spawn_instance {type base_port count {conf {}}} {
close $cfg
# Finally exec it and remember the pid for later cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } elseif {$type eq "sentinel"} {
- set prgname redis-sentinel
- } else {
- error "Unknown instance type."
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
lappend ::pids $pid
# Check availability
@@ -85,8 +99,25 @@ proc spawn_instance {type base_port count {conf {}}} {
}
}
+proc log_crashes {} {
+ set start_pattern {*REDIS BUG REPORT START*}
+ set logs [glob */log.txt]
+ foreach log $logs {
+ set fd [open $log]
+ set found 0
+ while {[gets $fd line] >= 0} {
+ if {[string match $start_pattern $line]} {
+ puts "\n*** Crash report found in $log ***"
+ set found 1
+ }
+ if {$found} {puts $line}
+ }
+ }
+}
+
proc cleanup {} {
puts "Cleaning up..."
+ log_crashes
foreach pid $::pids {
catch {exec kill -9 $pid}
}
@@ -96,8 +127,10 @@ proc cleanup {} {
}
proc abort_sentinel_test msg {
+ incr ::failed
puts "WARNING: Aborting the test."
puts ">>>>>>>> $msg"
+ if {$::pause_on_error} pause_on_error
cleanup
exit 1
}
@@ -113,12 +146,15 @@ proc parse_options {} {
set ::pause_on_error 1
} elseif {$opt eq "--fail"} {
set ::simulate_error 1
+ } elseif {$opt eq {--valgrind}} {
+ set ::valgrind 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
puts "--single <pattern> Only runs tests specified by pattern."
puts "--pause-on-error Pause for manual inspection on error."
puts "--fail Simulate a test failure."
+ puts "--valgrind Run with valgrind."
puts "--help Shows this help."
exit 0
} else {
@@ -215,6 +251,7 @@ proc test {descr code} {
flush stdout
if {[catch {set retval [uplevel 1 $code]} error]} {
+ incr ::failed
if {[string match "assertion:*" $error]} {
set msg [string range $error 10 end]
puts [colorstr red $msg]
@@ -230,6 +267,38 @@ proc test {descr code} {
}
}
+# Check memory leaks when running on OSX using the "leaks" utility.
+proc check_leaks instance_types {
+ if {[string match {*Darwin*} [exec uname -a]]} {
+ puts -nonewline "Testing for memory leaks..."; flush stdout
+ foreach type $instance_types {
+ foreach_instance_id [set ::${type}_instances] id {
+ if {[instance_is_killed $type $id]} continue
+ set pid [get_instance_attrib $type $id pid]
+ set output {0 leaks}
+ catch {exec leaks $pid} output
+ if {[string match {*process does not exist*} $output] ||
+ [string match {*cannot examine*} $output]} {
+ # In a few tests we kill the server process.
+ set output "0 leaks"
+ } else {
+ puts -nonewline "$type/$pid "
+ flush stdout
+ }
+ if {![string match {*0 leaks*} $output]} {
+ puts [colorstr red "=== MEMORY LEAK DETECTED ==="]
+ puts "Instance type $type, ID $id:"
+ puts $output
+ puts "==="
+ incr ::failed
+ }
+ }
+ }
+ puts ""
+ }
+}
+
+# Execute all the units inside the 'tests' directory.
proc run_tests {} {
set tests [lsort [glob ../tests/*]]
foreach test $tests {
@@ -239,6 +308,18 @@ proc run_tests {} {
if {[file isdirectory $test]} continue
puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
source $test
+ check_leaks {redis sentinel}
+ }
+}
+
+# Print a message and exists with 0 / 1 according to zero or more failures.
+proc end_tests {} {
+ if {$::failed == 0} {
+ puts "GOOD! No errors."
+ exit 0
+ } else {
+ puts "WARNING $::failed test(s) failed."
+ exit 1
}
}
@@ -360,15 +441,31 @@ proc get_instance_id_by_port {type port} {
# The instance can be restarted with restart-instance.
proc kill_instance {type id} {
set pid [get_instance_attrib $type $id pid]
+ set port [get_instance_attrib $type $id port]
+
if {$pid == -1} {
error "You tried to kill $type $id twice."
}
+
exec kill -9 $pid
set_instance_attrib $type $id pid -1
set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
# Remove the PID from the list of pids to kill at exit.
set ::pids [lsearch -all -inline -not -exact $::pids $pid]
+
+ # Wait for the port it was using to be available again, so that's not
+ # an issue to start a new server ASAP with the same port.
+ set retry 10
+ while {[incr retry -1]} {
+ set port_is_free [catch {set s [socket 127.0.01 $port]}]
+ if {$port_is_free} break
+ catch {close $s}
+ after 1000
+ }
+ if {$retry == 0} {
+ error "Port $port does not return available after killing instance."
+ }
}
# Return true of the instance of the specified type/id is killed.
@@ -385,12 +482,7 @@ proc restart_instance {type id} {
# Execute the instance with its old setup and append the new pid
# file for cleanup.
- if {$type eq "redis"} {
- set prgname redis-server
- } else {
- set prgname redis-sentinel
- }
- set pid [exec ../../../src/${prgname} $cfgfile &]
+ set pid [exec_instance $type $cfgfile]
set_instance_attrib $type $id pid $pid
lappend ::pids $pid
@@ -403,5 +495,17 @@ proc restart_instance {type id} {
set link [redis 127.0.0.1 $port]
$link reconnect 1
set_instance_attrib $type $id link $link
+
+ # Make sure the instance is not loading the dataset when this
+ # function returns.
+ while 1 {
+ catch {[$link ping]} retval
+ if {[string match {*LOADING*} $retval]} {
+ after 100
+ continue
+ } else {
+ break
+ }
+ }
}
diff --git a/tests/integration/aof-race.tcl b/tests/integration/aof-race.tcl
index 207f20739..fb8d71083 100644
--- a/tests/integration/aof-race.tcl
+++ b/tests/integration/aof-race.tcl
@@ -1,4 +1,4 @@
-set defaults { appendonly {yes} appendfilename {appendonly.aof} }
+set defaults { appendonly {yes} appendfilename {appendonly.aof} aof-use-rdb-preamble {no} }
set server_path [tmpdir server.aof]
set aof_path "$server_path/appendonly.aof"
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index 01b928bb5..e397faeeb 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -88,7 +88,7 @@ tags {"aof"} {
set pattern "*Bad file format reading the append only file*"
set retry 10
while {$retry} {
- set result [exec tail -n1 < [dict get $srv stdout]]
+ set result [exec tail -1 < [dict get $srv stdout]]
if {[string match $pattern $result]} {
break
}
@@ -113,7 +113,7 @@ tags {"aof"} {
set pattern "*Unexpected end of file reading the append only file*"
set retry 10
while {$retry} {
- set result [exec tail -n1 < [dict get $srv stdout]]
+ set result [exec tail -1 < [dict get $srv stdout]]
if {[string match $pattern $result]} {
break
}
@@ -137,7 +137,7 @@ tags {"aof"} {
set pattern "*Unexpected end of file reading the append only file*"
set retry 10
while {$retry} {
- set result [exec tail -n1 < [dict get $srv stdout]]
+ set result [exec tail -1 < [dict get $srv stdout]]
if {[string match $pattern $result]} {
break
}
@@ -204,7 +204,7 @@ tags {"aof"} {
}
}
- ## Test that SPOP with <count> (that modifies the client's argc/argv) is correctly free'd
+ ## Uses the alsoPropagate() API.
create_aof {
append_to_aof [formatCommand sadd set foo]
append_to_aof [formatCommand sadd set bar]
diff --git a/tests/integration/block-repl.tcl b/tests/integration/block-repl.tcl
new file mode 100644
index 000000000..3b720ffdf
--- /dev/null
+++ b/tests/integration/block-repl.tcl
@@ -0,0 +1,58 @@
+# Test replication of blocking lists and zset operations.
+# Unlike stream operations such operations are "pop" style, so they consume
+# the list or sorted set, and must be replicated correctly.
+
+proc start_bg_block_op {host port db ops} {
+ set tclsh [info nameofexecutable]
+ exec $tclsh tests/helpers/bg_block_op.tcl $host $port $db $ops &
+}
+
+proc stop_bg_block_op {handle} {
+ catch {exec /bin/kill -9 $handle}
+}
+
+start_server {tags {"repl"}} {
+ start_server {} {
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+ set slave [srv 0 client]
+
+ set load_handle0 [start_bg_block_op $master_host $master_port 9 100000]
+ set load_handle1 [start_bg_block_op $master_host $master_port 9 100000]
+ set load_handle2 [start_bg_block_op $master_host $master_port 9 100000]
+
+ test {First server should have role slave after SLAVEOF} {
+ $slave slaveof $master_host $master_port
+ after 1000
+ s 0 role
+ } {slave}
+
+ test {Test replication with blocking lists and sorted sets operations} {
+ after 25000
+ stop_bg_block_op $load_handle0
+ stop_bg_block_op $load_handle1
+ stop_bg_block_op $load_handle2
+ set retry 10
+ while {$retry && ([$master debug digest] ne [$slave debug digest])}\
+ {
+ after 1000
+ incr retry -1
+ }
+
+ if {[$master debug digest] ne [$slave debug digest]} {
+ set csv1 [csvdump r]
+ set csv2 [csvdump {r -1}]
+ set fd [open /tmp/repldump1.txt w]
+ puts -nonewline $fd $csv1
+ close $fd
+ set fd [open /tmp/repldump2.txt w]
+ puts -nonewline $fd $csv2
+ close $fd
+ puts "Master - Slave inconsistency"
+ puts "Run diff -u against /tmp/repldump*.txt for more info"
+ }
+ assert_equal [r debug digest] [r -1 debug digest]
+ }
+ }
+}
diff --git a/tests/integration/logging.tcl b/tests/integration/logging.tcl
new file mode 100644
index 000000000..c1f4854d4
--- /dev/null
+++ b/tests/integration/logging.tcl
@@ -0,0 +1,24 @@
+set server_path [tmpdir server.log]
+set system_name [string tolower [exec uname -s]]
+
+if {$system_name eq {linux} || $system_name eq {darwin}} {
+ start_server [list overrides [list dir $server_path]] {
+ test "Server is able to generate a stack trace on selected systems" {
+ r config set watchdog-period 200
+ r debug sleep 1
+ set pattern "*debugCommand*"
+ set retry 10
+ while {$retry} {
+ set result [exec tail -100 < [srv 0 stdout]]
+ if {[string match $pattern $result]} {
+ break
+ }
+ incr retry -1
+ after 1000
+ }
+ if {$retry == 0} {
+ error "assertion:expected stack trace not found into log file"
+ }
+ }
+ }
+}
diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl
new file mode 100644
index 000000000..ba610a3b8
--- /dev/null
+++ b/tests/integration/psync2-reg.tcl
@@ -0,0 +1,78 @@
+# Issue 3899 regression test.
+# We create a chain of three instances: master -> slave -> slave2
+# and continuously break the link while traffic is generated by
+# redis-benchmark. At the end we check that the data is the same
+# everywhere.
+
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0 ; # Do not exit at end of the test
+
+ set duration 20 ; # Total test seconds
+
+ for {set j 0} {$j < 3} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ # Setup the replication and backlog parameters
+ test "PSYNC2 #3899 regression: setup" {
+ $R(1) slaveof $R_host(0) $R_port(0)
+ $R(2) slaveof $R_host(0) $R_port(0)
+ $R(0) set foo bar
+ wait_for_condition 50 1000 {
+ [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1
+ } else {
+ fail "Slaves not replicating from master"
+ }
+ $R(0) config set repl-backlog-size 10mb
+ $R(1) config set repl-backlog-size 10mb
+ }
+
+ set cycle_start_time [clock milliseconds]
+ set bench_pid [exec src/redis-benchmark -p $R_port(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &]
+ while 1 {
+ set elapsed [expr {[clock milliseconds]-$cycle_start_time}]
+ if {$elapsed > $duration*1000} break
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill first slave" {
+ $R(1) client kill type master
+ }
+ }
+ if {rand() < .05} {
+ test "PSYNC2 #3899 regression: kill chained slave" {
+ $R(2) client kill type master
+ }
+ }
+ after 100
+ }
+ exec kill -9 $bench_pid
+
+ if {$debug_msg} {
+ for {set j 0} {$j < 100} {incr j} {
+ if {
+ [$R(0) debug digest] == [$R(1) debug digest] &&
+ [$R(1) debug digest] == [$R(2) debug digest]
+ } break
+ puts [$R(0) debug digest]
+ puts [$R(1) debug digest]
+ puts [$R(2) debug digest]
+ after 1000
+ }
+ }
+
+ test "PSYNC2 #3899 regression: verify consistency" {
+ wait_for_condition 50 1000 {
+ ([$R(0) debug digest] eq [$R(1) debug digest]) &&
+ ([$R(1) debug digest] eq [$R(2) debug digest])
+ } else {
+ fail "The three instances have different data sets"
+ }
+ }
+}}}
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
new file mode 100644
index 000000000..b76f36363
--- /dev/null
+++ b/tests/integration/psync2.tcl
@@ -0,0 +1,249 @@
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
+start_server {} {
+start_server {} {
+ set master_id 0 ; # Current master
+ set start_time [clock seconds] ; # Test start time
+ set counter_value 0 ; # Current value of the Redis counter "x"
+
+ # Config
+ set debug_msg 0 ; # Enable additional debug messages
+
+ set no_exit 0 ; # Do not exit at end of the test
+
+ set duration 20 ; # Total test seconds
+
+ set genload 1 ; # Load master with writes at every cycle
+
+ set genload_time 5000 ; # Writes duration time in ms
+
+ set disconnect 1 ; # Break replication link between random
+ # master and slave instances while the
+ # master is loaded with writes.
+
+ set disconnect_period 1000 ; # Disconnect repl link every N ms.
+
+ for {set j 0} {$j < 5} {incr j} {
+ set R($j) [srv [expr 0-$j] client]
+ set R_host($j) [srv [expr 0-$j] host]
+ set R_port($j) [srv [expr 0-$j] port]
+ if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
+ }
+
+ set cycle 1
+ while {([clock seconds]-$start_time) < $duration} {
+ test "PSYNC2: --- CYCLE $cycle ---" {}
+ incr cycle
+
+ # Create a random replication layout.
+ # Start with switching master (this simulates a failover).
+
+ # 1) Select the new master.
+ set master_id [randomInt 5]
+ set used [list $master_id]
+ test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
+ $R($master_id) slaveof no one
+ if {$counter_value == 0} {
+ $R($master_id) set x $counter_value
+ }
+ }
+
+ # 2) Attach all the slaves to a random instance
+ while {[llength $used] != 5} {
+ while 1 {
+ set slave_id [randomInt 5]
+ if {[lsearch -exact $used $slave_id] == -1} break
+ }
+ set rand [randomInt [llength $used]]
+ set mid [lindex $used $rand]
+ set master_host $R_host($mid)
+ set master_port $R_port($mid)
+
+ test "PSYNC2: Set #$slave_id to replicate from #$mid" {
+ $R($slave_id) slaveof $master_host $master_port
+ }
+ lappend used $slave_id
+ }
+
+ # 3) Increment the counter and wait for all the instances
+ # to converge.
+ test "PSYNC2: cluster is consistent after failover" {
+ $R($master_id) incr x; incr counter_value
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # 4) Generate load while breaking the connection of random
+ # slave-master pairs.
+ test "PSYNC2: generate load while killing replication links" {
+ set t [clock milliseconds]
+ set next_break [expr {$t+$disconnect_period}]
+ while {[clock milliseconds]-$t < $genload_time} {
+ if {$genload} {
+ $R($master_id) incr x; incr counter_value
+ }
+ if {[clock milliseconds] == $next_break} {
+ set next_break \
+ [expr {[clock milliseconds]+$disconnect_period}]
+ set slave_id [randomInt 5]
+ if {$disconnect} {
+ $R($slave_id) client kill type master
+ if {$debug_msg} {
+ puts "+++ Breaking link for slave #$slave_id"
+ }
+ }
+ }
+ }
+ }
+
+ # 5) Increment the counter and wait for all the instances
+ set x [$R($master_id) get x]
+ test "PSYNC2: cluster is consistent after load (x = $x)" {
+ for {set j 0} {$j < 5} {incr j} {
+ wait_for_condition 50 1000 {
+ [$R($j) get x] == $counter_value
+ } else {
+ fail "Instance #$j x variable is inconsistent"
+ }
+ }
+ }
+
+ # Put down the old master so that it cannot generate more
+ # replication stream, this way in the next master switch, the time at
+ # which we move slaves away is not important, each will have full
+ # history (otherwise PINGs will make certain slaves have more history),
+ # and sometimes a full resync will be needed.
+ $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail.
+
+ if {$debug_msg} {
+ for {set j 0} {$j < 5} {incr j} {
+ puts "$j: sync_full: [status $R($j) sync_full]"
+ puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
+ puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
+ puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
+ puts "---"
+ }
+ }
+
+ test "PSYNC2: total sum of full synchronizations is exactly 4" {
+ set sum 0
+ for {set j 0} {$j < 5} {incr j} {
+ incr sum [status $R($j) sync_full]
+ }
+ assert {$sum == 4}
+ }
+
+ # Limit anyway the maximum number of cycles. This is useful when the
+ # test is skipped via --only option of the test suite. In that case
+ # we don't want to see many seconds of this test being just skipped.
+ if {$cycle > 50} break
+ }
+
+ test "PSYNC2: Bring the master back again for next test" {
+ $R($master_id) slaveof no one
+ set master_host $R_host($master_id)
+ set master_port $R_port($master_id)
+ for {set j 0} {$j < 5} {incr j} {
+ if {$j == $master_id} continue
+ $R($j) slaveof $master_host $master_port
+ }
+
+ # Wait for slaves to sync
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4
+ } else {
+ fail "Slave not reconnecting"
+ }
+ }
+
+ test "PSYNC2: Partial resync after restart using RDB aux fields" {
+ # Pick a random slave
+ set slave_id [expr {($master_id+1)%5}]
+ set sync_count [status $R($master_id) sync_full]
+ catch {
+ $R($slave_id) config rewrite
+ $R($slave_id) debug restart
+ }
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4
+ } else {
+ fail "Slave not reconnecting"
+ }
+ set new_sync_count [status $R($master_id) sync_full]
+ assert {$sync_count == $new_sync_count}
+ }
+
+ test "PSYNC2: Slave RDB restart with EVALSHA in backlog issue #4483" {
+ # Pick a random slave
+ set slave_id [expr {($master_id+1)%5}]
+ set sync_count [status $R($master_id) sync_full]
+
+ # Make sure to replicate the first EVAL while the salve is online
+ # so that it's part of the scripts the master believes it's safe
+ # to propagate as EVALSHA.
+ $R($master_id) EVAL {return redis.call("incr","__mycounter")} 0
+ $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
+
+ # Wait for the two to sync
+ wait_for_condition 50 1000 {
+ [$R($master_id) debug digest] == [$R($slave_id) debug digest]
+ } else {
+ fail "Slave not reconnecting"
+ }
+
+ # Prevent the slave from receiving master updates, and at
+ # the same time send a new script several times to the
+ # master, so that we'll end with EVALSHA into the backlog.
+ $R($slave_id) slaveof 127.0.0.1 0
+
+ $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
+ $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
+ $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
+
+ catch {
+ $R($slave_id) config rewrite
+ $R($slave_id) debug restart
+ }
+
+ # Reconfigure the slave correctly again, when it's back online.
+ set retry 50
+ while {$retry} {
+ if {[catch {
+ $R($slave_id) slaveof $master_host $master_port
+ }]} {
+ after 1000
+ } else {
+ break
+ }
+ incr retry -1
+ }
+
+ # The master should be back at 4 slaves eventually
+ wait_for_condition 50 1000 {
+ [status $R($master_id) connected_slaves] == 4
+ } else {
+ fail "Slave not reconnecting"
+ }
+ set new_sync_count [status $R($master_id) sync_full]
+ assert {$sync_count == $new_sync_count}
+
+ # However if the slave started with the full state of the
+ # scripting engine, we should now have the same digest.
+ wait_for_condition 50 1000 {
+ [$R($master_id) debug digest] == [$R($slave_id) debug digest]
+ } else {
+ fail "Debug digest mismatch between master and slave in post-restart handshake"
+ }
+ }
+
+ if {$no_exit} {
+ while 1 { puts -nonewline .; flush stdout; after 1000}
+ }
+
+}}}}}
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index 71876a6ed..58a098edc 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -7,19 +7,19 @@ start_server [list overrides [list "dir" $server_path "dbfilename" "encodings.rd
test "RDB encoding loading test" {
r select 0
csvdump r
- } {"compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-"hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000",
-"hash_zipped","hash","a","1","b","2","c","3",
-"list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000",
-"list_zipped","list","1","2","3","a","b","c","100000","6000000000",
-"number","string","10"
-"set","set","1","100000","2","3","6000000000","a","b","c",
-"set_zipped_1","set","1","2","3","4",
-"set_zipped_2","set","100000","200000","300000","400000",
-"set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000",
-"string","string","Hello World"
-"zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000",
-"zset_zipped","zset","a","1","b","2","c","3",
+ } {"0","compressible","string","aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+"0","hash","hash","a","1","aa","10","aaa","100","b","2","bb","20","bbb","200","c","3","cc","30","ccc","300","ddd","400","eee","5000000000",
+"0","hash_zipped","hash","a","1","b","2","c","3",
+"0","list","list","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000","1","2","3","a","b","c","100000","6000000000",
+"0","list_zipped","list","1","2","3","a","b","c","100000","6000000000",
+"0","number","string","10"
+"0","set","set","1","100000","2","3","6000000000","a","b","c",
+"0","set_zipped_1","set","1","2","3","4",
+"0","set_zipped_2","set","100000","200000","300000","400000",
+"0","set_zipped_3","set","1000000000","2000000000","3000000000","4000000000","5000000000","6000000000",
+"0","string","string","Hello World"
+"0","zset","zset","a","1","b","2","c","3","aa","10","bb","20","cc","30","aaa","100","bbb","200","ccc","300","aaaa","1000","cccc","123456789","bbbb","5000000000",
+"0","zset_zipped","zset","a","1","b","2","c","3",
}
}
@@ -39,6 +39,25 @@ start_server [list overrides [list "dir" $server_path]] {
} {0000000000000000000000000000000000000000}
}
+start_server [list overrides [list "dir" $server_path]] {
+ test {Test RDB stream encoding} {
+ for {set j 0} {$j < 1000} {incr j} {
+ if {rand() < 0.9} {
+ r xadd stream * foo $j
+ } else {
+ r xadd stream * bar $j
+ }
+ }
+ r xgroup create stream mygroup 0
+ r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
+ set digest [r debug digest]
+ r debug reload
+ set newdigest [r debug digest]
+ assert {$digest eq $newdigest}
+ r del stream
+ }
+}
+
# Helper function to start a server and kill it, just to check the error
# logged.
set defaults {}
@@ -66,7 +85,7 @@ if {!$isroot} {
test {Server should not start if RDB file can't be open} {
wait_for_condition 50 100 {
[string match {*Fatal error loading*} \
- [exec tail -n1 < [dict get $srv stdout]]]
+ [exec tail -1 < [dict get $srv stdout]]]
} else {
fail "Server started even if RDB was unreadable!"
}
@@ -89,8 +108,8 @@ close $fd
start_server_and_kill_it [list "dir" $server_path] {
test {Server should not start if RDB is corrupted} {
wait_for_condition 50 100 {
- [string match {*RDB checksum*} \
- [exec tail -n1 < [dict get $srv stdout]]]
+ [string match {*CRC error*} \
+ [exec tail -10 < [dict get $srv stdout]]]
} else {
fail "Server started even if RDB was corrupted!"
}
diff --git a/tests/integration/replication-2.tcl b/tests/integration/replication-2.tcl
index 9446e5cd9..2ff19c3c4 100644
--- a/tests/integration/replication-2.tcl
+++ b/tests/integration/replication-2.tcl
@@ -2,9 +2,12 @@ start_server {tags {"repl"}} {
start_server {} {
test {First server should have role slave after SLAVEOF} {
r -1 slaveof [srv 0 host] [srv 0 port]
- after 1000
- s -1 role
- } {slave}
+ wait_for_condition 50 100 {
+ [s -1 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
test {If min-slaves-to-write is honored, write is accepted} {
r config set min-slaves-to-write 1
diff --git a/tests/integration/replication-3.tcl b/tests/integration/replication-3.tcl
index 0fcbad45b..580be7602 100644
--- a/tests/integration/replication-3.tcl
+++ b/tests/integration/replication-3.tcl
@@ -30,6 +30,18 @@ start_server {tags {"repl"}} {
}
assert_equal [r debug digest] [r -1 debug digest]
}
+
+ test {Slave is able to evict keys created in writable slaves} {
+ r -1 select 5
+ assert {[r -1 dbsize] == 0}
+ r -1 config set slave-read-only no
+ r -1 set key1 1 ex 5
+ r -1 set key2 2 ex 5
+ r -1 set key3 3 ex 5
+ assert {[r -1 dbsize] == 3}
+ after 6000
+ r -1 dbsize
+ } {0}
}
}
@@ -88,7 +100,6 @@ start_server {tags {"repl"}} {
close $fd
puts "Master - Slave inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
-
}
set old_digest [r debug digest]
@@ -97,5 +108,27 @@ start_server {tags {"repl"}} {
set new_digest [r debug digest]
assert {$old_digest eq $new_digest}
}
+
+ test {SLAVE can reload "lua" AUX RDB fields of duplicated scripts} {
+ # Force a Slave full resynchronization
+ r debug change-repl-id
+ r -1 client kill type master
+
+ # Check that after a full resync the slave can still load
+ # correctly the RDB file: such file will contain "lua" AUX
+ # sections with scripts already in the memory of the master.
+
+ wait_for_condition 50 100 {
+ [s -1 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+
+ wait_for_condition 50 100 {
+ [r debug digest] eq [r -1 debug digest]
+ } else {
+ fail "DEBUG DIGEST mismatch after full SYNC with many scripts"
+ }
+ }
}
}
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index 6db9ffe2b..1c559b706 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -132,5 +132,24 @@ start_server {tags {"repl"}} {
}
assert {[$master dbsize] > 0}
}
+
+ test {Replication of SPOP command -- alsoPropagate() API} {
+ $master del myset
+ set size [expr 1+[randomInt 100]]
+ set content {}
+ for {set j 0} {$j < $size} {incr j} {
+ lappend content [randomValue]
+ }
+ $master sadd myset {*}$content
+
+ set count [randomInt 100]
+ set result [$master spop myset $count]
+
+ wait_for_condition 50 100 {
+ [$master debug digest] eq [$slave debug digest]
+ } else {
+ fail "SPOP replication inconsistency"
+ }
+ }
}
}
diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl
index f131dafe3..10052f7c1 100644
--- a/tests/integration/replication-psync.tcl
+++ b/tests/integration/replication-psync.tcl
@@ -11,9 +11,13 @@ proc stop_bg_complex_data {handle} {
# partial resyncs attempts, all this while flooding the master with
# write queries.
#
-# You can specifiy backlog size, ttl, delay before reconnection, test duration
+# You can specify backlog size, ttl, delay before reconnection, test duration
# in seconds, and an additional condition to verify at the end.
-proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
+#
+# If reconnect is > 0, the test actually try to break the connection and
+# reconnect with the master, otherwise just the initial synchronization is
+# checked for consistency.
+proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} {
start_server {tags {"repl"}} {
start_server {} {
@@ -24,6 +28,8 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
$master config set repl-backlog-size $backlog_size
$master config set repl-backlog-ttl $backlog_ttl
+ $master config set repl-diskless-sync $diskless
+ $master config set repl-diskless-sync-delay 1
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
@@ -41,29 +47,31 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
# Check that the background clients are actually writing.
test {Detect write load to master} {
- wait_for_condition 50 100 {
+ wait_for_condition 50 1000 {
[$master dbsize] > 100
} else {
fail "Can't detect write load from background clients."
}
}
- test "Test replication partial resync: $descr" {
+ test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" {
# Now while the clients are writing data, break the maste-slave
# link multiple times.
- for {set j 0} {$j < $duration*10} {incr j} {
- after 100
- # catch {puts "MASTER [$master dbsize] keys, SLAVE [$slave dbsize] keys"}
+ if ($reconnect) {
+ for {set j 0} {$j < $duration*10} {incr j} {
+ after 100
+ # catch {puts "MASTER [$master dbsize] keys, SLAVE [$slave dbsize] keys"}
- if {($j % 20) == 0} {
- catch {
- if {$delay} {
- $slave multi
- $slave client kill $master_host:$master_port
- $slave debug sleep $delay
- $slave exec
- } else {
- $slave client kill $master_host:$master_port
+ if {($j % 20) == 0} {
+ catch {
+ if {$delay} {
+ $slave multi
+ $slave client kill $master_host:$master_port
+ $slave debug sleep $delay
+ $slave exec
+ } else {
+ $slave client kill $master_host:$master_port
+ }
}
}
}
@@ -98,18 +106,23 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond} {
}
}
-test_psync {ok psync} 6 1000000 3600 0 {
- assert {[s -1 sync_partial_ok] > 0}
-}
+foreach diskless {no yes} {
+ test_psync {no reconnection, just sync} 6 1000000 3600 0 {
+ } $diskless 0
-test_psync {no backlog} 6 100 3600 0.5 {
- assert {[s -1 sync_partial_err] > 0}
-}
+ test_psync {ok psync} 6 100000000 3600 0 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $diskless 1
-test_psync {ok after delay} 3 100000000 3600 3 {
- assert {[s -1 sync_partial_ok] > 0}
-}
+ test_psync {no backlog} 6 100 3600 0.5 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $diskless 1
+
+ test_psync {ok after delay} 3 100000000 3600 3 {
+ assert {[s -1 sync_partial_ok] > 0}
+ } $diskless 1
-test_psync {backlog expired} 3 100000000 1 3 {
- assert {[s -1 sync_partial_err] > 0}
+ test_psync {backlog expired} 3 100000000 1 3 {
+ assert {[s -1 sync_partial_err] > 0}
+ } $diskless 1
}
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index 71a7ec60a..e811cf0ee 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -1,10 +1,70 @@
+proc log_file_matches {log pattern} {
+ set fp [open $log r]
+ set content [read $fp]
+ close $fp
+ string match $pattern $content
+}
+
+start_server {tags {"repl"}} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set slave_log [srv 0 stdout]
+ start_server {} {
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ # Configure the master in order to hang waiting for the BGSAVE
+ # operation, so that the slave remains in the handshake state.
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 1000
+
+ # Use a short replication timeout on the slave, so that if there
+ # are no bugs the timeout is triggered in a reasonable amount
+ # of time.
+ $slave config set repl-timeout 5
+
+ # Start the replication process...
+ $slave slaveof $master_host $master_port
+
+ test {Slave enters handshake} {
+ wait_for_condition 50 1000 {
+ [string match *handshake* [$slave role]]
+ } else {
+ fail "Slave does not enter handshake state"
+ }
+ }
+
+ # But make the master unable to send
+ # the periodic newlines to refresh the connection. The slave
+ # should detect the timeout.
+ $master debug sleep 10
+
+ test {Slave is able to detect timeout during handshake} {
+ wait_for_condition 50 1000 {
+ [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
+ } else {
+ fail "Slave is not able to detect timeout"
+ }
+ }
+ }
+}
+
start_server {tags {"repl"}} {
+ set A [srv 0 client]
+ set A_host [srv 0 host]
+ set A_port [srv 0 port]
start_server {} {
- test {First server should have role slave after SLAVEOF} {
- r -1 slaveof [srv 0 host] [srv 0 port]
+ set B [srv 0 client]
+ set B_host [srv 0 host]
+ set B_port [srv 0 port]
+
+ test {Set instance A as slave of B} {
+ $A slaveof $B_host $B_port
wait_for_condition 50 100 {
- [s -1 role] eq {slave} &&
- [string match {*master_link_status:up*} [r -1 info replication]]
+ [lindex [$A role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$A info replication]]
} else {
fail "Can't turn the instance into a slave"
}
@@ -15,9 +75,9 @@ start_server {tags {"repl"}} {
$rd brpoplpush a b 5
r lpush a foo
wait_for_condition 50 100 {
- [r debug digest] eq [r -1 debug digest]
+ [$A debug digest] eq [$B debug digest]
} else {
- fail "Master and slave have different digest: [r debug digest] VS [r -1 debug digest]"
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
}
}
@@ -28,7 +88,36 @@ start_server {tags {"repl"}} {
r lpush c 3
$rd brpoplpush c d 5
after 1000
- assert_equal [r debug digest] [r -1 debug digest]
+ assert_equal [$A debug digest] [$B debug digest]
+ }
+
+ test {BLPOP followed by role change, issue #2473} {
+ set rd [redis_deferring_client]
+ $rd blpop foo 0 ; # Block while B is a master
+
+ # Turn B into master of A
+ $A slaveof no one
+ $B slaveof $A_host $A_port
+ wait_for_condition 50 100 {
+ [lindex [$B role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$B info replication]]
+ } else {
+ fail "Can't turn the instance into a slave"
+ }
+
+ # Push elements into the "foo" list of the new slave.
+ # If the client is still attached to the instance, we'll get
+ # a desync between the two instances.
+ $A rpush foo a b c
+ after 100
+
+ wait_for_condition 50 100 {
+ [$A debug digest] eq [$B debug digest] &&
+ [$A lrange foo 0 -1] eq {a b c} &&
+ [$B lrange foo 0 -1] eq {a b c}
+ } else {
+ fail "Master and slave have different digest: [$A debug digest] VS [$B debug digest]"
+ }
}
}
}
@@ -113,7 +202,7 @@ foreach dl {no yes} {
start_server {} {
lappend slaves [srv 0 client]
test "Connect multiple slaves at the same time (issue #141), diskless=$dl" {
- # Send SALVEOF commands to slaves
+ # Send SLAVEOF commands to slaves
[lindex $slaves 0] slaveof $master_host $master_port
[lindex $slaves 1] slaveof $master_host $master_port
[lindex $slaves 2] slaveof $master_host $master_port
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index f33029959..9a2fcfb49 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -13,6 +13,7 @@ proc main {} {
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup
+ end_tests
}
if {[catch main e]} {
diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl
index 1a60d814b..5214fdce1 100644
--- a/tests/sentinel/tests/05-manual.tcl
+++ b/tests/sentinel/tests/05-manual.tcl
@@ -6,7 +6,8 @@ test "Manual failover works" {
set old_port [RI $master_id tcp_port]
set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
assert {[lindex $addr 1] == $old_port}
- S 0 SENTINEL FAILOVER mymaster
+ catch {S 0 SENTINEL FAILOVER mymaster} reply
+ assert {$reply eq "OK"}
foreach_sentinel_id id {
wait_for_condition 1000 50 {
[lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
diff --git a/tests/sentinel/tests/06-ckquorum.tcl b/tests/sentinel/tests/06-ckquorum.tcl
new file mode 100644
index 000000000..31e5fa2f8
--- /dev/null
+++ b/tests/sentinel/tests/06-ckquorum.tcl
@@ -0,0 +1,34 @@
+# Test for the SENTINEL CKQUORUM command
+
+source "../tests/includes/init-tests.tcl"
+set num_sentinels [llength $::sentinel_instances]
+
+test "CKQUORUM reports OK and the right amount of Sentinels" {
+ foreach_sentinel_id id {
+ assert_match "*OK $num_sentinels usable*" [S $id SENTINEL CKQUORUM mymaster]
+ }
+}
+
+test "CKQUORUM detects quorum cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum [expr {$num_sentinels+1}]
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+}
+
+test "CKQUORUM detects failover authorization cannot be reached" {
+ set orig_quorum [expr {$num_sentinels/2+1}]
+ S 0 SENTINEL SET mymaster quorum 1
+ kill_instance sentinel 1
+ kill_instance sentinel 2
+ kill_instance sentinel 3
+ after 5000
+ catch {[S 0 SENTINEL CKQUORUM mymaster]} err
+ assert_match "*NOQUORUM*" $err
+ S 0 SENTINEL SET mymaster quorum $orig_quorum
+ restart_instance sentinel 1
+ restart_instance sentinel 2
+ restart_instance sentinel 3
+}
+
diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl
new file mode 100644
index 000000000..fb2993b6f
--- /dev/null
+++ b/tests/sentinel/tests/07-down-conditions.tcl
@@ -0,0 +1,78 @@
+# Test conditions where an instance is considered to be down
+
+source "../tests/includes/init-tests.tcl"
+
+proc ensure_master_up {} {
+ wait_for_condition 1000 50 {
+ [dict get [S 4 sentinel master mymaster] flags] eq "master"
+ } else {
+ fail "Master flags are not just 'master'"
+ }
+}
+
+proc ensure_master_down {} {
+ wait_for_condition 1000 50 {
+ [string match *down* \
+ [dict get [S 4 sentinel master mymaster] flags]]
+ } else {
+ fail "Master is not flagged SDOWN"
+ }
+}
+
+test "Crash the majority of Sentinels to prevent failovers for this unit" {
+ for {set id 0} {$id < $quorum} {incr id} {
+ kill_instance sentinel $id
+ }
+}
+
+test "SDOWN is triggered by non-responding but not crashed instance" {
+ lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
+ ensure_master_up
+ exec ../../../src/redis-cli -h $host -p $port debug sleep 10 > /dev/null &
+ ensure_master_down
+ ensure_master_up
+}
+
+test "SDOWN is triggered by crashed instance" {
+ lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
+ ensure_master_up
+ kill_instance redis 0
+ ensure_master_down
+ restart_instance redis 0
+ ensure_master_up
+}
+
+test "SDOWN is triggered by masters advertising as slaves" {
+ ensure_master_up
+ R 0 slaveof 127.0.0.1 34567
+ ensure_master_down
+ R 0 slaveof no one
+ ensure_master_up
+}
+
+test "SDOWN is triggered by misconfigured instance repling with errors" {
+ ensure_master_up
+ set orig_dir [lindex [R 0 config get dir] 1]
+ set orig_save [lindex [R 0 config get save] 1]
+ # Set dir to / and filename to "tmp" to make sure it will fail.
+ R 0 config set dir /
+ R 0 config set dbfilename tmp
+ R 0 config set save "1000000 1000000"
+ R 0 bgsave
+ ensure_master_down
+ R 0 config set save $orig_save
+ R 0 config set dir $orig_dir
+ R 0 config set dbfilename dump.rdb
+ R 0 bgsave
+ ensure_master_up
+}
+
+# We use this test setup to also test command renaming, as a side
+# effect of the master going down if we send PONG instead of PING
+test "SDOWN is triggered if we rename PING to PONG" {
+ ensure_master_up
+ S 4 SENTINEL SET mymaster rename-command PING PONG
+ ensure_master_down
+ S 4 SENTINEL SET mymaster rename-command PING PING
+ ensure_master_up
+}
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index d4e7d2e5d..1576053b4 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -58,7 +58,8 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
set idx 0; # Index of the node that will respond.
set errmsg {}
foreach start_node $::redis_cluster::startup_nodes($id) {
- lassign [split $start_node :] start_host start_port
+ set ip_port [lindex [split $start_node @] 0]
+ lassign [split $ip_port :] start_host start_port
if {[catch {
set r {}
set r [redis $start_host $start_port]
@@ -68,7 +69,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
if {$r ne {}} {catch {$r close}}
incr idx
if {[string length $errmsg] < 200} {
- append errmsg " $start_node: $e"
+ append errmsg " $ip_port: $e"
}
continue ; # Try next.
} else {
@@ -98,6 +99,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
set args [split $line " "]
lassign $args nodeid addr flags slaveof pingsent pongrecv configepoch linkstate
set slots [lrange $args 8 end]
+ set addr [lindex [split $addr @] 0]
if {$addr eq {:0}} {
set addr $start_host:$start_port
}
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 67ee24528..5578f1fd6 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -54,10 +54,15 @@ proc kill_server config {
# kill server and wait for the process to be totally exited
catch {exec kill $pid}
+ if {$::valgrind} {
+ set max_wait 60000
+ } else {
+ set max_wait 10000
+ }
while {[is_alive $config]} {
incr wait 10
- if {$wait >= 5000} {
+ if {$wait >= $max_wait} {
puts "Forcing process $pid to exit..."
catch {exec kill -KILL $pid}
} elseif {$wait % 1000 == 0} {
@@ -207,7 +212,9 @@ proc start_server {options {code undefined}} {
set stderr [format "%s/%s" [dict get $config "dir"] "stderr"]
if {$::valgrind} {
- set pid [exec valgrind --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
+ set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
+ } elseif ($::stack_logging) {
+ set pid [exec /usr/bin/env MallocStackLogging=1 MallocLogFile=/tmp/malloc_log.txt src/redis-server $config_file > $stdout 2> $stderr &]
} else {
set pid [exec src/redis-server $config_file > $stdout 2> $stderr &]
}
@@ -269,9 +276,15 @@ proc start_server {options {code undefined}} {
error_and_quit $config_file $line
}
+ if {$::wait_server} {
+ set msg "server started PID: [dict get $srv "pid"]. press any key to continue..."
+ puts $msg
+ read stdin 1
+ }
+
while 1 {
# check that the server actually started and is ready for connections
- if {[exec grep "ready to accept" | wc -l < $stdout] > 0} {
+ if {[exec grep -i "Ready to accept" | wc -l < $stdout] > 0} {
break
}
after 10
diff --git a/tests/support/test.tcl b/tests/support/test.tcl
index 31371c567..6f02f2f12 100644
--- a/tests/support/test.tcl
+++ b/tests/support/test.tcl
@@ -1,6 +1,8 @@
set ::num_tests 0
set ::num_passed 0
set ::num_failed 0
+set ::num_skipped 0
+set ::num_aborted 0
set ::tests_failed {}
proc fail {msg} {
@@ -37,13 +39,7 @@ proc assert_error {pattern code} {
}
proc assert_encoding {enc key} {
- # Swapped out values don't have an encoding, so make sure that
- # the value is swapped in before checking the encoding.
set dbg [r debug object $key]
- while {[string match "* swapped at:*" $dbg]} {
- r debug swapin $key
- set dbg [r debug object $key]
- }
assert_match "* encoding:$enc *" $dbg
}
@@ -74,10 +70,26 @@ proc test {name code {okpattern undefined}} {
# abort if tagged with a tag to deny
foreach tag $::denytags {
if {[lsearch $::tags $tag] >= 0} {
+ incr ::num_aborted
+ send_data_packet $::test_server_fd ignore $name
return
}
}
+ # abort if test name in skiptests
+ if {[lsearch $::skiptests $name] >= 0} {
+ incr ::num_skipped
+ send_data_packet $::test_server_fd skip $name
+ return
+ }
+
+ # abort if test name in skiptests
+ if {[llength $::only_tests] > 0 && [lsearch $::only_tests $name] < 0} {
+ incr ::num_skipped
+ send_data_packet $::test_server_fd skip $name
+ return
+ }
+
# check if tagged with at least 1 tag to allow when there *is* a list
# of tags to allow, because default policy is to run everything
if {[llength $::allowtags] > 0} {
@@ -88,6 +100,8 @@ proc test {name code {okpattern undefined}} {
}
}
if {$matched < 1} {
+ incr ::num_aborted
+ send_data_packet $::test_server_fd ignore $name
return
}
}
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 4b9caced8..181c865fc 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -262,46 +262,50 @@ proc formatCommand {args} {
proc csvdump r {
set o {}
- foreach k [lsort [{*}$r keys *]] {
- set type [{*}$r type $k]
- append o [csvstring $k] , [csvstring $type] ,
- switch $type {
- string {
- append o [csvstring [{*}$r get $k]] "\n"
- }
- list {
- foreach e [{*}$r lrange $k 0 -1] {
- append o [csvstring $e] ,
+ for {set db 0} {$db < 16} {incr db} {
+ {*}$r select $db
+ foreach k [lsort [{*}$r keys *]] {
+ set type [{*}$r type $k]
+ append o [csvstring $db] , [csvstring $k] , [csvstring $type] ,
+ switch $type {
+ string {
+ append o [csvstring [{*}$r get $k]] "\n"
}
- append o "\n"
- }
- set {
- foreach e [lsort [{*}$r smembers $k]] {
- append o [csvstring $e] ,
+ list {
+ foreach e [{*}$r lrange $k 0 -1] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- append o "\n"
- }
- zset {
- foreach e [{*}$r zrange $k 0 -1 withscores] {
- append o [csvstring $e] ,
+ set {
+ foreach e [lsort [{*}$r smembers $k]] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- append o "\n"
- }
- hash {
- set fields [{*}$r hgetall $k]
- set newfields {}
- foreach {k v} $fields {
- lappend newfields [list $k $v]
+ zset {
+ foreach e [{*}$r zrange $k 0 -1 withscores] {
+ append o [csvstring $e] ,
+ }
+ append o "\n"
}
- set fields [lsort -index 0 $newfields]
- foreach kv $fields {
- append o [csvstring [lindex $kv 0]] ,
- append o [csvstring [lindex $kv 1]] ,
+ hash {
+ set fields [{*}$r hgetall $k]
+ set newfields {}
+ foreach {k v} $fields {
+ lappend newfields [list $k $v]
+ }
+ set fields [lsort -index 0 $newfields]
+ foreach kv $fields {
+ append o [csvstring [lindex $kv 0]] ,
+ append o [csvstring [lindex $kv 1]] ,
+ }
+ append o "\n"
}
- append o "\n"
}
}
}
+ {*}$r select 9
return $o
}
@@ -371,3 +375,19 @@ proc start_write_load {host port seconds} {
proc stop_write_load {handle} {
catch {exec /bin/kill -9 $handle}
}
+
+proc K { x y } { set x }
+
+# Shuffle a list. From Tcl wiki. Originally from Steve Cohen that improved
+# other versions. Code should be under public domain.
+proc lshuffle {list} {
+ set n [llength $list]
+ while {$n>0} {
+ set j [expr {int(rand()*$n)}]
+ lappend slist [lindex $list $j]
+ incr n -1
+ set temp [lindex $list $n]
+ set list [lreplace [K $list [set list {}]] $j $j $temp]
+ }
+ return $slist
+}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 212c95b4f..1a986e2f7 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -16,20 +16,25 @@ set ::all_tests {
unit/dump
unit/auth
unit/protocol
- unit/basic
+ unit/keyspace
unit/scan
+ unit/type/string
+ unit/type/incr
unit/type/list
unit/type/list-2
unit/type/list-3
unit/type/set
unit/type/zset
unit/type/hash
+ unit/type/stream
+ unit/type/stream-cgroups
unit/sort
unit/expire
unit/other
unit/multi
unit/quit
unit/aofrw
+ integration/block-repl
integration/replication
integration/replication-2
integration/replication-3
@@ -38,16 +43,25 @@ set ::all_tests {
integration/aof
integration/rdb
integration/convert-zipmap-hash-on-load
+ integration/logging
+ integration/psync2
+ integration/psync2-reg
unit/pubsub
unit/slowlog
unit/scripting
unit/maxmemory
unit/introspection
+ unit/introspection-2
unit/limits
unit/obuf-limits
unit/bitops
+ unit/bitfield
+ unit/geo
unit/memefficiency
unit/hyperloglog
+ unit/lazyfree
+ unit/wait
+ unit/pendingquerybuf
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
@@ -56,10 +70,15 @@ set ::host 127.0.0.1
set ::port 21111
set ::traceleaks 0
set ::valgrind 0
+set ::stack_logging 0
set ::verbose 0
set ::quiet 0
set ::denytags {}
+set ::skiptests {}
set ::allowtags {}
+set ::only_tests {}
+set ::single_tests {}
+set ::skip_till ""
set ::external 0; # If "1" this means, we are running against external instance
set ::file ""; # If set, runs only the tests in this comma separated list
set ::curfile ""; # Hold the filename of the current suite
@@ -68,6 +87,10 @@ set ::force_failure 0
set ::timeout 600; # 10 minutes without progresses will quit the test.
set ::last_progress [clock seconds]
set ::active_servers {} ; # Pids of active Redis instances.
+set ::dont_clean 0
+set ::wait_server 0
+set ::stop_on_failure 0
+set ::loop 0
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@@ -161,6 +184,9 @@ proc s {args} {
}
proc cleanup {} {
+ if {$::dont_clean} {
+ return
+ }
if {!$::quiet} {puts -nonewline "Cleanup: may take some time... "}
flush stdout
catch {exec rm -rf {*}[glob tests/tmp/redis.conf.*]}
@@ -210,6 +236,7 @@ proc test_server_cron {} {
if {$elapsed > $::timeout} {
set err "\[[colorstr red TIMEOUT]\]: clients state report follows."
puts $err
+ lappend ::failed_tests $err
show_clients_state
kill_clients
force_kill_all_servers
@@ -234,6 +261,8 @@ proc accept_test_clients {fd addr port} {
# testing: just used to signal that a given test started.
# ok: a test was executed with success.
# err: a test was executed with an error.
+# skip: a test was skipped by skipfile or individual test options.
+# ignore: a test was skipped by a group tag.
# exception: there was a runtime exception while executing the test.
# done: all the specified test file was processed, this test client is
# ready to accept a new task.
@@ -262,11 +291,24 @@ proc read_from_test_client fd {
puts "\[[colorstr green $status]\]: $data"
}
set ::active_clients_task($fd) "(OK) $data"
+ } elseif {$status eq {skip}} {
+ if {!$::quiet} {
+ puts "\[[colorstr yellow $status]\]: $data"
+ }
+ } elseif {$status eq {ignore}} {
+ if {!$::quiet} {
+ puts "\[[colorstr cyan $status]\]: $data"
+ }
} elseif {$status eq {err}} {
set err "\[[colorstr red $status]\]: $data"
puts $err
lappend ::failed_tests $err
set ::active_clients_task($fd) "(ERR) $data"
+ if {$::stop_on_failure} {
+ puts -nonewline "(Test stopped, press enter to continue)"
+ flush stdout
+ gets stdin
+ }
} elseif {$status eq {exception}} {
puts "\[[colorstr red $status]\]: $data"
kill_clients
@@ -329,6 +371,9 @@ proc signal_idle_client fd {
send_data_packet $fd run [lindex $::all_tests $::next_test]
lappend ::active_clients $fd
incr ::next_test
+ if {$::loop && $::next_test == [llength $::all_tests]} {
+ set ::next_test 0
+ }
} else {
lappend ::idle_clients $fd
if {[llength $::active_clients] == 0} {
@@ -388,13 +433,22 @@ proc send_data_packet {fd status data} {
proc print_help_screen {} {
puts [join {
"--valgrind Run the test over valgrind."
+ "--stack-logging Enable OSX leaks/malloc stack logging."
"--accurate Run slow randomized tests for more iterations."
"--quiet Don't show individual tests."
- "--single <unit> Just execute the specified unit (see next option)."
+ "--single <unit> Just execute the specified unit (see next option). this option can be repeated."
"--list-tests List all the available test units."
+ "--only <test> Just execute the specified test by test name. this option can be repeated."
+ "--skip-till <unit> Skip all units until (and including) the specified one."
"--clients <num> Number of test clients (default 16)."
"--timeout <sec> Test timeout in seconds (default 10 min)."
"--force-failure Force the execution of a test that always fails."
+ "--config <k> <v> Extra config file argument."
+ "--skipfile <file> Name of a file containing test names that should be skipped (one per line)."
+ "--dont-clean Don't delete redis log files after the run."
+ "--stop Blocks once the first test fails."
+ "--loop Execute the specified set of tests forever."
+ "--wait-server Wait after server is started (so that you can attach a debugger)."
"--help Print this help screen."
} "\n"]
}
@@ -412,8 +466,23 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
}
}
incr j
+ } elseif {$opt eq {--config}} {
+ set arg2 [lindex $argv [expr $j+2]]
+ lappend ::global_overrides $arg
+ lappend ::global_overrides $arg2
+ incr j 2
+ } elseif {$opt eq {--skipfile}} {
+ incr j
+ set fp [open $arg r]
+ set file_data [read $fp]
+ close $fp
+ set ::skiptests [split $file_data "\n"]
} elseif {$opt eq {--valgrind}} {
set ::valgrind 1
+ } elseif {$opt eq {--stack-logging}} {
+ if {[string match {*Darwin*} [exec uname -a]]} {
+ set ::stack_logging 1
+ }
} elseif {$opt eq {--quiet}} {
set ::quiet 1
} elseif {$opt eq {--host}} {
@@ -428,13 +497,21 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--force-failure}} {
set ::force_failure 1
} elseif {$opt eq {--single}} {
- set ::all_tests $arg
+ lappend ::single_tests $arg
+ incr j
+ } elseif {$opt eq {--only}} {
+ lappend ::only_tests $arg
+ incr j
+ } elseif {$opt eq {--skiptill}} {
+ set ::skip_till $arg
incr j
} elseif {$opt eq {--list-tests}} {
foreach t $::all_tests {
puts $t
}
exit 0
+ } elseif {$opt eq {--verbose}} {
+ set ::verbose 1
} elseif {$opt eq {--client}} {
set ::client 1
set ::test_server_port $arg
@@ -442,6 +519,14 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--clients}} {
set ::numclients $arg
incr j
+ } elseif {$opt eq {--dont-clean}} {
+ set ::dont_clean 1
+ } elseif {$opt eq {--wait-server}} {
+ set ::wait_server 1
+ } elseif {$opt eq {--stop}} {
+ set ::stop_on_failure 1
+ } elseif {$opt eq {--loop}} {
+ set ::loop 1
} elseif {$opt eq {--timeout}} {
set ::timeout $arg
incr j
@@ -454,6 +539,30 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
}
}
+# If --skil-till option was given, we populate the list of single tests
+# to run with everything *after* the specified unit.
+if {$::skip_till != ""} {
+ set skipping 1
+ foreach t $::all_tests {
+ if {$skipping == 0} {
+ lappend ::single_tests $t
+ }
+ if {$t == $::skip_till} {
+ set skipping 0
+ }
+ }
+ if {$skipping} {
+ puts "test $::skip_till not found"
+ exit 0
+ }
+}
+
+# Override the list of tests with the specific tests we want to run
+# in case there was some filter, that is --single or --skip-till options.
+if {[llength $::single_tests] > 0} {
+ set ::all_tests $::single_tests
+}
+
proc attach_to_replication_stream {} {
set s [socket [srv 0 "host"] [srv 0 "port"]]
fconfigure $s -translation binary
@@ -461,8 +570,11 @@ proc attach_to_replication_stream {} {
flush $s
# Get the count
- set count [gets $s]
- set prefix [string range $count 0 0]
+ while 1 {
+ set count [gets $s]
+ set prefix [string range $count 0 0]
+ if {$prefix ne {}} break; # Newlines are allowed as PINGs.
+ }
if {$prefix ne {$}} {
error "attach_to_replication_stream error. Received '$count' as count."
}
diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl
index 4fdbdc6c6..1a686a2fa 100644
--- a/tests/unit/aofrw.tcl
+++ b/tests/unit/aofrw.tcl
@@ -4,64 +4,67 @@ start_server {tags {"aofrw"}} {
r config set auto-aof-rewrite-percentage 0 ; # Disable auto-rewrite.
waitForBgrewriteaof r
- test {AOF rewrite during write load} {
- # Start a write load for 10 seconds
- set master [srv 0 client]
- set master_host [srv 0 host]
- set master_port [srv 0 port]
- set load_handle0 [start_write_load $master_host $master_port 10]
- set load_handle1 [start_write_load $master_host $master_port 10]
- set load_handle2 [start_write_load $master_host $master_port 10]
- set load_handle3 [start_write_load $master_host $master_port 10]
- set load_handle4 [start_write_load $master_host $master_port 10]
-
- # Make sure the instance is really receiving data
- wait_for_condition 50 100 {
- [r dbsize] > 0
- } else {
- fail "No write load detected."
- }
+ foreach rdbpre {yes no} {
+ r config set aof-use-rdb-preamble $rdbpre
+ test "AOF rewrite during write load: RDB preamble=$rdbpre" {
+ # Start a write load for 10 seconds
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set load_handle0 [start_write_load $master_host $master_port 10]
+ set load_handle1 [start_write_load $master_host $master_port 10]
+ set load_handle2 [start_write_load $master_host $master_port 10]
+ set load_handle3 [start_write_load $master_host $master_port 10]
+ set load_handle4 [start_write_load $master_host $master_port 10]
+
+ # Make sure the instance is really receiving data
+ wait_for_condition 50 100 {
+ [r dbsize] > 0
+ } else {
+ fail "No write load detected."
+ }
- # After 3 seconds, start a rewrite, while the write load is still
- # active.
- after 3000
- r bgrewriteaof
- waitForBgrewriteaof r
+ # After 3 seconds, start a rewrite, while the write load is still
+ # active.
+ after 3000
+ r bgrewriteaof
+ waitForBgrewriteaof r
+
+ # Let it run a bit more so that we'll append some data to the new
+ # AOF.
+ after 1000
+
+ # Stop the processes generating the load if they are still active
+ stop_write_load $load_handle0
+ stop_write_load $load_handle1
+ stop_write_load $load_handle2
+ stop_write_load $load_handle3
+ stop_write_load $load_handle4
+
+ # Make sure that we remain the only connected client.
+ # This step is needed to make sure there are no pending writes
+ # that will be processed between the two "debug digest" calls.
+ wait_for_condition 50 100 {
+ [llength [split [string trim [r client list]] "\n"]] == 1
+ } else {
+ puts [r client list]
+ fail "Clients generating loads are not disconnecting"
+ }
- # Let it run a bit more so that we'll append some data to the new
- # AOF.
- after 1000
+ # Get the data set digest
+ set d1 [r debug digest]
- # Stop the processes generating the load if they are still active
- stop_write_load $load_handle0
- stop_write_load $load_handle1
- stop_write_load $load_handle2
- stop_write_load $load_handle3
- stop_write_load $load_handle4
+ # Load the AOF
+ r debug loadaof
+ set d2 [r debug digest]
- # Make sure that we remain the only connected client.
- # This step is needed to make sure there are no pending writes
- # that will be processed between the two "debug digest" calls.
- wait_for_condition 50 100 {
- [llength [split [string trim [r client list]] "\n"]] == 1
- } else {
- puts [r client list]
- fail "Clients generating loads are not disconnecting"
+ # Make sure they are the same
+ assert {$d1 eq $d2}
}
-
- # Get the data set digest
- set d1 [r debug digest]
-
- # Load the AOF
- r debug loadaof
- set d2 [r debug digest]
-
- # Make sure they are the same
- assert {$d1 eq $d2}
}
}
-start_server {tags {"aofrw"}} {
+start_server {tags {"aofrw"} overrides {aof-use-rdb-preamble no}} {
test {Turning off AOF kills the background writing child if any} {
r config set appendonly yes
waitForBgrewriteaof r
@@ -70,7 +73,7 @@ start_server {tags {"aofrw"}} {
r config set appendonly no
r exec
wait_for_condition 50 100 {
- [string match {*Killing*AOF*child*} [exec tail -n5 < [srv 0 stdout]]]
+ [string match {*Killing*AOF*child*} [exec tail -5 < [srv 0 stdout]]]
} else {
fail "Can't find 'Killing AOF child' into recent logs"
}
diff --git a/tests/unit/bitfield.tcl b/tests/unit/bitfield.tcl
new file mode 100644
index 000000000..d76452b1b
--- /dev/null
+++ b/tests/unit/bitfield.tcl
@@ -0,0 +1,201 @@
+start_server {tags {"bitops"}} {
+ test {BITFIELD signed SET and GET basics} {
+ r del bits
+ set results {}
+ lappend results [r bitfield bits set i8 0 -100]
+ lappend results [r bitfield bits set i8 0 101]
+ lappend results [r bitfield bits get i8 0]
+ set results
+ } {0 -100 101}
+
+ test {BITFIELD unsigned SET and GET basics} {
+ r del bits
+ set results {}
+ lappend results [r bitfield bits set u8 0 255]
+ lappend results [r bitfield bits set u8 0 100]
+ lappend results [r bitfield bits get u8 0]
+ set results
+ } {0 255 100}
+
+ test {BITFIELD #<idx> form} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 65
+ r bitfield bits set u8 #1 66
+ r bitfield bits set u8 #2 67
+ r get bits
+ } {ABC}
+
+ test {BITFIELD basic INCRBY form} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 10
+ lappend results [r bitfield bits incrby u8 #0 100]
+ lappend results [r bitfield bits incrby u8 #0 100]
+ set results
+ } {110 210}
+
+ test {BITFIELD chaining of multiple commands} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 10
+ lappend results [r bitfield bits incrby u8 #0 100 incrby u8 #0 100]
+ set results
+ } {{110 210}}
+
+ test {BITFIELD unsigned overflow wrap} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow wrap incrby u8 #0 257]
+ lappend results [r bitfield bits get u8 #0]
+ lappend results [r bitfield bits overflow wrap incrby u8 #0 255]
+ lappend results [r bitfield bits get u8 #0]
+ } {101 101 100 100}
+
+ test {BITFIELD unsigned overflow sat} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow sat incrby u8 #0 257]
+ lappend results [r bitfield bits get u8 #0]
+ lappend results [r bitfield bits overflow sat incrby u8 #0 -255]
+ lappend results [r bitfield bits get u8 #0]
+ } {255 255 0 0}
+
+ test {BITFIELD signed overflow wrap} {
+ r del bits
+ set results {}
+ r bitfield bits set i8 #0 100
+ lappend results [r bitfield bits overflow wrap incrby i8 #0 257]
+ lappend results [r bitfield bits get i8 #0]
+ lappend results [r bitfield bits overflow wrap incrby i8 #0 255]
+ lappend results [r bitfield bits get i8 #0]
+ } {101 101 100 100}
+
+ test {BITFIELD signed overflow sat} {
+ r del bits
+ set results {}
+ r bitfield bits set u8 #0 100
+ lappend results [r bitfield bits overflow sat incrby i8 #0 257]
+ lappend results [r bitfield bits get i8 #0]
+ lappend results [r bitfield bits overflow sat incrby i8 #0 -255]
+ lappend results [r bitfield bits get i8 #0]
+ } {127 127 -128 -128}
+
+ test {BITFIELD overflow detection fuzzing} {
+ for {set j 0} {$j < 1000} {incr j} {
+ set bits [expr {[randomInt 64]+1}]
+ set sign [randomInt 2]
+ set range [expr {2**$bits}]
+ if {$bits == 64} {set sign 1} ; # u64 is not supported by BITFIELD.
+ if {$sign} {
+ set min [expr {-($range/2)}]
+ set type "i$bits"
+ } else {
+ set min 0
+ set type "u$bits"
+ }
+ set max [expr {$min+$range-1}]
+
+ # Compare Tcl vs Redis
+ set range2 [expr {$range*2}]
+ set value [expr {($min*2)+[randomInt $range2]}]
+ set increment [expr {($min*2)+[randomInt $range2]}]
+ if {$value > 9223372036854775807} {
+ set value 9223372036854775807
+ }
+ if {$value < -9223372036854775808} {
+ set value -9223372036854775808
+ }
+ if {$increment > 9223372036854775807} {
+ set increment 9223372036854775807
+ }
+ if {$increment < -9223372036854775808} {
+ set increment -9223372036854775808
+ }
+
+ set overflow 0
+ if {$value > $max || $value < $min} {set overflow 1}
+ if {($value + $increment) > $max} {set overflow 1}
+ if {($value + $increment) < $min} {set overflow 1}
+
+ r del bits
+ set res1 [r bitfield bits overflow fail set $type 0 $value]
+ set res2 [r bitfield bits overflow fail incrby $type 0 $increment]
+
+ if {$overflow && [lindex $res1 0] ne {} &&
+ [lindex $res2 0] ne {}} {
+ fail "OW not detected where needed: $type $value+$increment"
+ }
+ if {!$overflow && ([lindex $res1 0] eq {} ||
+ [lindex $res2 0] eq {})} {
+ fail "OW detected where NOT needed: $type $value+$increment"
+ }
+ }
+ }
+
+ test {BITFIELD overflow wrap fuzzing} {
+ for {set j 0} {$j < 1000} {incr j} {
+ set bits [expr {[randomInt 64]+1}]
+ set sign [randomInt 2]
+ set range [expr {2**$bits}]
+ if {$bits == 64} {set sign 1} ; # u64 is not supported by BITFIELD.
+ if {$sign} {
+ set min [expr {-($range/2)}]
+ set type "i$bits"
+ } else {
+ set min 0
+ set type "u$bits"
+ }
+ set max [expr {$min+$range-1}]
+
+ # Compare Tcl vs Redis
+ set range2 [expr {$range*2}]
+ set value [expr {($min*2)+[randomInt $range2]}]
+ set increment [expr {($min*2)+[randomInt $range2]}]
+ if {$value > 9223372036854775807} {
+ set value 9223372036854775807
+ }
+ if {$value < -9223372036854775808} {
+ set value -9223372036854775808
+ }
+ if {$increment > 9223372036854775807} {
+ set increment 9223372036854775807
+ }
+ if {$increment < -9223372036854775808} {
+ set increment -9223372036854775808
+ }
+
+ r del bits
+ r bitfield bits overflow wrap set $type 0 $value
+ r bitfield bits overflow wrap incrby $type 0 $increment
+ set res [lindex [r bitfield bits get $type 0] 0]
+
+ set expected 0
+ if {$sign} {incr expected [expr {$max+1}]}
+ incr expected $value
+ incr expected $increment
+ set expected [expr {$expected % $range}]
+ if {$sign} {incr expected $min}
+
+ if {$res != $expected} {
+ fail "WRAP error: $type $value+$increment = $res, should be $expected"
+ }
+ }
+ }
+
+ test {BITFIELD regression for #3221} {
+ r set bits 1
+ r bitfield bits get u1 0
+ } {0}
+
+ test {BITFIELD regression for #3564} {
+ for {set j 0} {$j < 10} {incr j} {
+ r del mystring
+ set res [r BITFIELD mystring SET i8 0 10 SET i8 64 10 INCRBY i8 10 99900]
+ assert {$res eq {0 0 60}}
+ }
+ r del mystring
+ }
+}
diff --git a/tests/unit/bitops.tcl b/tests/unit/bitops.tcl
index 9751850ad..926f38295 100644
--- a/tests/unit/bitops.tcl
+++ b/tests/unit/bitops.tcl
@@ -1,4 +1,4 @@
-# Compare Redis commadns against Tcl implementations of the same commands.
+# Compare Redis commands against Tcl implementations of the same commands.
proc count_bits s {
binary scan $s b* bits
string length [regsub -all {0} $bits {}]
@@ -43,6 +43,16 @@ start_server {tags {"bitops"}} {
r bitcount no-key
} 0
+ test {BITCOUNT returns 0 with out of range indexes} {
+ r set str "xxxx"
+ r bitcount str 4 10
+ } 0
+
+ test {BITCOUNT returns 0 with negative indexes where start > end} {
+ r set str "xxxx"
+ r bitcount str -6 -7
+ } 0
+
catch {unset num}
foreach vec [list "" "\xaa" "\x00\x00\xff" "foobar" "123"] {
incr num
@@ -88,7 +98,7 @@ start_server {tags {"bitops"}} {
} {ERR*syntax*}
test {BITCOUNT regression test for github issue #582} {
- r del str
+ r del foo
r setbit foo 0 1
if {[catch {r bitcount foo 0 4294967296} e]} {
assert_match {*ERR*out of range*} $e
diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl
index 5af53db8d..91a4df09e 100644
--- a/tests/unit/dump.tcl
+++ b/tests/unit/dump.tcl
@@ -25,6 +25,39 @@ start_server {tags {"dump"}} {
assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501}
r get foo
} {bar}
+
+ test {RESTORE can set an absolute expire} {
+ r set foo bar
+ set encoded [r dump foo]
+ r del foo
+ set now [clock milliseconds]
+ r restore foo [expr $now+3000] $encoded absttl
+ set ttl [r pttl foo]
+ assert {$ttl >= 2998 && $ttl <= 3000}
+ r get foo
+ } {bar}
+
+ test {RESTORE can set LRU} {
+ r set foo bar
+ set encoded [r dump foo]
+ r del foo
+ r config set maxmemory-policy allkeys-lru
+ r restore foo 0 $encoded idletime 1000
+ set idle [r object idletime foo]
+ assert {$idle >= 1000 && $idle <= 1002}
+ r get foo
+ } {bar}
+
+ test {RESTORE can set LFU} {
+ r set foo bar
+ set encoded [r dump foo]
+ r del foo
+ r config set maxmemory-policy allkeys-lfu
+ r restore foo 0 $encoded freq 100
+ set freq [r object freq foo]
+ assert {$freq == 100}
+ r get foo
+ } {bar}
test {RESTORE returns an error of the key already exists} {
r set foo bar
@@ -217,4 +250,119 @@ start_server {tags {"dump"}} {
assert_match {IOERR*} $e
}
}
+
+ test {MIGRATE can migrate multiple keys at once} {
+ set first [srv 0 client]
+ r set key1 "v1"
+ r set key2 "v2"
+ r set key3 "v3"
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ assert {[$first exists key1] == 1}
+ assert {[$second exists key1] == 0}
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys key1 key2 key3]
+ assert {$ret eq {OK}}
+ assert {[$first exists key1] == 0}
+ assert {[$first exists key2] == 0}
+ assert {[$first exists key3] == 0}
+ assert {[$second get key1] eq {v1}}
+ assert {[$second get key2] eq {v2}}
+ assert {[$second get key3] eq {v3}}
+ }
+ }
+
+ test {MIGRATE with multiple keys must have empty key arg} {
+ catch {r MIGRATE 127.0.0.1 6379 NotEmpty 9 5000 keys a b c} e
+ set e
+ } {*empty string*}
+
+ test {MIGRATE with multiple keys migrate just existing ones} {
+ set first [srv 0 client]
+ r set key1 "v1"
+ r set key2 "v2"
+ r set key3 "v3"
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys nokey-1 nokey-2 nokey-2]
+ assert {$ret eq {NOKEY}}
+
+ assert {[$first exists key1] == 1}
+ assert {[$second exists key1] == 0}
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys nokey-1 key1 nokey-2 key2 nokey-3 key3]
+ assert {$ret eq {OK}}
+ assert {[$first exists key1] == 0}
+ assert {[$first exists key2] == 0}
+ assert {[$first exists key3] == 0}
+ assert {[$second get key1] eq {v1}}
+ assert {[$second get key2] eq {v2}}
+ assert {[$second get key3] eq {v3}}
+ }
+ }
+
+ test {MIGRATE with multiple keys: stress command rewriting} {
+ set first [srv 0 client]
+ r flushdb
+ r mset a 1 b 2 c 3 d 4 c 5 e 6 f 7 g 8 h 9 i 10 l 11 m 12 n 13 o 14 p 15 q 16
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ set ret [r -1 migrate $second_host $second_port "" 9 5000 keys a b c d e f g h i l m n o p q]
+
+ assert {[$first dbsize] == 0}
+ assert {[$second dbsize] == 15}
+ }
+ }
+
+ test {MIGRATE with multiple keys: delete just ack keys} {
+ set first [srv 0 client]
+ r flushdb
+ r mset a 1 b 2 c 3 d 4 c 5 e 6 f 7 g 8 h 9 i 10 l 11 m 12 n 13 o 14 p 15 q 16
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+
+ $second mset c _ d _; # Two busy keys and no REPLACE used
+
+ catch {r -1 migrate $second_host $second_port "" 9 5000 keys a b c d e f g h i l m n o p q} e
+
+ assert {[$first dbsize] == 2}
+ assert {[$second dbsize] == 15}
+ assert {[$first exists c] == 1}
+ assert {[$first exists d] == 1}
+ }
+ }
+
+ test {MIGRATE AUTH: correct and wrong password cases} {
+ set first [srv 0 client]
+ r del list
+ r lpush list a b c d
+ start_server {tags {"repl"}} {
+ set second [srv 0 client]
+ set second_host [srv 0 host]
+ set second_port [srv 0 port]
+ $second config set requirepass foobar
+ $second auth foobar
+
+ assert {[$first exists list] == 1}
+ assert {[$second exists list] == 0}
+ set ret [r -1 migrate $second_host $second_port list 9 5000 AUTH foobar]
+ assert {$ret eq {OK}}
+ assert {[$second exists list] == 1}
+ assert {[$second lrange list 0 -1] eq {d c b a}}
+
+ r -1 lpush list a b c d
+ $second config set requirepass foobar2
+ catch {r -1 migrate $second_host $second_port list 9 5000 AUTH foobar} err
+ assert_match {*invalid password*} $err
+ }
+ }
}
diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl
index ff3dacb33..de24eabed 100644
--- a/tests/unit/expire.tcl
+++ b/tests/unit/expire.tcl
@@ -121,7 +121,7 @@ start_server {tags {"expire"}} {
list $a $b
} {somevalue {}}
- test {TTL returns tiem to live in seconds} {
+ test {TTL returns time to live in seconds} {
r del x
r setex x 10 somevalue
set ttl [r ttl x]
@@ -198,4 +198,25 @@ start_server {tags {"expire"}} {
r set foo b
lsort [r keys *]
} {a e foo s t}
+
+ test {EXPIRE with empty string as TTL should report an error} {
+ r set foo bar
+ catch {r expire foo ""} e
+ set e
+ } {*not an integer*}
+
+ test {SET - use EX/PX option, TTL should not be reseted after loadaof} {
+ r config set appendonly yes
+ r set foo bar EX 100
+ after 2000
+ r debug loadaof
+ set ttl [r ttl foo]
+ assert {$ttl <= 98 && $ttl > 90}
+
+ r set foo bar PX 100000
+ after 2000
+ r debug loadaof
+ set ttl [r ttl foo]
+ assert {$ttl <= 98 && $ttl > 90}
+ }
}
diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl
new file mode 100644
index 000000000..604697be4
--- /dev/null
+++ b/tests/unit/geo.tcl
@@ -0,0 +1,311 @@
+# Helper functions to simulate search-in-radius in the Tcl side in order to
+# verify the Redis implementation with a fuzzy test.
+proc geo_degrad deg {expr {$deg*atan(1)*8/360}}
+
+proc geo_distance {lon1d lat1d lon2d lat2d} {
+ set lon1r [geo_degrad $lon1d]
+ set lat1r [geo_degrad $lat1d]
+ set lon2r [geo_degrad $lon2d]
+ set lat2r [geo_degrad $lat2d]
+ set v [expr {sin(($lon2r - $lon1r) / 2)}]
+ set u [expr {sin(($lat2r - $lat1r) / 2)}]
+ expr {2.0 * 6372797.560856 * \
+ asin(sqrt($u * $u + cos($lat1r) * cos($lat2r) * $v * $v))}
+}
+
+proc geo_random_point {lonvar latvar} {
+ upvar 1 $lonvar lon
+ upvar 1 $latvar lat
+ # Note that the actual latitude limit should be -85 to +85, we restrict
+ # the test to -70 to +70 since in this range the algorithm is more precise
+ # while outside this range occasionally some element may be missing.
+ set lon [expr {-180 + rand()*360}]
+ set lat [expr {-70 + rand()*140}]
+}
+
+# Return elements non common to both the lists.
+# This code is from http://wiki.tcl.tk/15489
+proc compare_lists {List1 List2} {
+ set DiffList {}
+ foreach Item $List1 {
+ if {[lsearch -exact $List2 $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ foreach Item $List2 {
+ if {[lsearch -exact $List1 $Item] == -1} {
+ if {[lsearch -exact $DiffList $Item] == -1} {
+ lappend DiffList $Item
+ }
+ }
+ }
+ return $DiffList
+}
+
+# The following list represents sets of random seed, search position
+# and radius that caused bugs in the past. It is used by the randomized
+# test later as a starting point. When the regression vectors are scanned
+# the code reverts to using random data.
+#
+# The format is: seed km lon lat
+set regression_vectors {
+ {1482225976969 7083 81.634948934258375 30.561509253718668}
+ {1482340074151 5416 -70.863281847379767 -46.347003465679947}
+ {1499014685896 6064 -89.818768962202014 -40.463868561416803}
+ {1412 156 149.29737817929004 15.95807862745508}
+ {441574 143 59.235461856813856 66.269555127373678}
+ {160645 187 -101.88575239939883 49.061997951502917}
+ {750269 154 -90.187939661642517 66.615930412251487}
+ {342880 145 163.03472387745728 64.012747720821181}
+ {729955 143 137.86663517256579 63.986745399416776}
+ {939895 151 59.149620271823181 65.204186651485145}
+ {1412 156 149.29737817929004 15.95807862745508}
+ {564862 149 84.062063109158544 -65.685403922426232}
+}
+set rv_idx 0
+
+start_server {tags {"geo"}} {
+ test {GEOADD create} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {1}
+
+ test {GEOADD update} {
+ r geoadd nyc -73.9454966 40.747533 "lic market"
+ } {0}
+
+ test {GEOADD invalid coordinates} {
+ catch {
+ r geoadd nyc -73.9454966 40.747533 "lic market" \
+ foo bar "luck market"
+ } err
+ set err
+ } {*valid*}
+
+ test {GEOADD multi add} {
+ r geoadd nyc -73.9733487 40.7648057 "central park n/q/r" -73.9903085 40.7362513 "union square" -74.0131604 40.7126674 "wtc one" -73.7858139 40.6428986 "jfk" -73.9375699 40.7498929 "q4" -73.9564142 40.7480973 4545
+ } {6}
+
+ test {Check geoset values} {
+ r zrange nyc 0 -1 withscores
+ } {{wtc one} 1791873972053020 {union square} 1791875485187452 {central park n/q/r} 1791875761332224 4545 1791875796750882 {lic market} 1791875804419201 q4 1791875830079666 jfk 1791895905559723}
+
+ test {GEORADIUS simple (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km asc
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS withdist (sorted)} {
+ r georadius nyc -73.9798091 40.7598464 3 km withdist asc
+ } {{{central park n/q/r} 0.7750} {4545 2.3651} {{union square} 2.7697}}
+
+ test {GEORADIUS with COUNT} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 3
+ } {{central park n/q/r} 4545 {union square}}
+
+ test {GEORADIUS with COUNT but missing integer argument} {
+ catch {r georadius nyc -73.9798091 40.7598464 10 km COUNT} e
+ set e
+ } {ERR*syntax*}
+
+ test {GEORADIUS with COUNT DESC} {
+ r georadius nyc -73.9798091 40.7598464 10 km COUNT 2 DESC
+ } {{wtc one} q4}
+
+ test {GEORADIUS HUGE, issue #2767} {
+ r geoadd users -47.271613776683807 -54.534504198047678 user_000000
+ llength [r GEORADIUS users 0 0 50000 km WITHCOORD]
+ } {1}
+
+ test {GEORADIUSBYMEMBER simple (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km
+ } {{wtc one} {union square} {central park n/q/r} 4545 {lic market}}
+
+ test {GEORADIUSBYMEMBER withdist (sorted)} {
+ r georadiusbymember nyc "wtc one" 7 km withdist
+ } {{{wtc one} 0.0000} {{union square} 3.2544} {{central park n/q/r} 6.7000} {4545 6.1975} {{lic market} 6.8969}}
+
+ test {GEOHASH is able to return geohash strings} {
+ # Example from Wikipedia.
+ r del points
+ r geoadd points -5.6 42.6 test
+ lindex [r geohash points test] 0
+ } {ezs42e44yx0}
+
+ test {GEOPOS simple} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lassign [lindex [r geopos points a b] 0] x1 y1
+ lassign [lindex [r geopos points a b] 1] x2 y2
+ assert {abs($x1 - 10) < 0.001}
+ assert {abs($y1 - 20) < 0.001}
+ assert {abs($x2 - 30) < 0.001}
+ assert {abs($y2 - 40) < 0.001}
+ }
+
+ test {GEOPOS missing element} {
+ r del points
+ r geoadd points 10 20 a 30 40 b
+ lindex [r geopos points a x b] 1
+ } {}
+
+ test {GEODIST simple & unit} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Catania]
+ assert {$m > 166274 && $m < 166275}
+ set km [r geodist points Palermo Catania km]
+ assert {$km > 166.2 && $km < 166.3}
+ }
+
+ test {GEODIST missing elements} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ set m [r geodist points Palermo Agrigento]
+ assert {$m eq {}}
+ set m [r geodist points Ragusa Agrigento]
+ assert {$m eq {}}
+ set m [r geodist empty_key Palermo Catania]
+ assert {$m eq {}}
+ }
+
+ test {GEORADIUS STORE option: syntax error} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ catch {r georadius points 13.361389 38.115556 50 km store} e
+ set e
+ } {*ERR*syntax*}
+
+ test {GEORANGE STORE option: incompatible options} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withdist} e
+ assert_match {*ERR*} $e
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withhash} e
+ assert_match {*ERR*} $e
+ catch {r georadius points 13.361389 38.115556 50 km store points2 withcoords} e
+ assert_match {*ERR*} $e
+ }
+
+ test {GEORANGE STORE option: plain usage} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km store points2
+ assert_equal [r zrange points 0 -1] [r zrange points2 0 -1]
+ }
+
+ test {GEORANGE STOREDIST option: plain usage} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km storedist points2
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 1] < 1}
+ assert {[lindex $res 3] > 166}
+ assert {[lindex $res 3] < 167}
+ }
+
+ test {GEORANGE STOREDIST option: COUNT ASC and DESC} {
+ r del points
+ r geoadd points 13.361389 38.115556 "Palermo" \
+ 15.087269 37.502669 "Catania"
+ r georadius points 13.361389 38.115556 500 km storedist points2 asc count 1
+ assert {[r zcard points2] == 1}
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 0] eq "Palermo"}
+
+ r georadius points 13.361389 38.115556 500 km storedist points2 desc count 1
+ assert {[r zcard points2] == 1}
+ set res [r zrange points2 0 -1 withscores]
+ assert {[lindex $res 0] eq "Catania"}
+ }
+
+ test {GEOADD + GEORANGE randomized test} {
+ set attempt 30
+ while {[incr attempt -1]} {
+ set rv [lindex $regression_vectors $rv_idx]
+ incr rv_idx
+
+ unset -nocomplain debuginfo
+ set srand_seed [clock milliseconds]
+ if {$rv ne {}} {set srand_seed [lindex $rv 0]}
+ lappend debuginfo "srand_seed is $srand_seed"
+ expr {srand($srand_seed)} ; # If you need a reproducible run
+ r del mypoints
+
+ if {[randomInt 10] == 0} {
+ # From time to time use very big radiuses
+ set radius_km [expr {[randomInt 50000]+10}]
+ } else {
+ # Normally use a few - ~200km radiuses to stress
+ # test the code the most in edge cases.
+ set radius_km [expr {[randomInt 200]+10}]
+ }
+ if {$rv ne {}} {set radius_km [lindex $rv 1]}
+ set radius_m [expr {$radius_km*1000}]
+ geo_random_point search_lon search_lat
+ if {$rv ne {}} {
+ set search_lon [lindex $rv 2]
+ set search_lat [lindex $rv 3]
+ }
+ lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km"
+ set tcl_result {}
+ set argv {}
+ for {set j 0} {$j < 20000} {incr j} {
+ geo_random_point lon lat
+ lappend argv $lon $lat "place:$j"
+ set distance [geo_distance $lon $lat $search_lon $search_lat]
+ if {$distance < $radius_m} {
+ lappend tcl_result "place:$j"
+ }
+ lappend debuginfo "place:$j $lon $lat [expr {$distance/1000}] km"
+ }
+ r geoadd mypoints {*}$argv
+ set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]]
+ set res2 [lsort $tcl_result]
+ set test_result OK
+
+ if {$res != $res2} {
+ set rounding_errors 0
+ set diff [compare_lists $res $res2]
+ foreach place $diff {
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
+ # Make sure this is a real error and not a rounidng issue.
+ if {[llength $diff] == $rounding_errors} {
+ set res $res2; # Error silenced
+ }
+ }
+
+ if {$res != $res2} {
+ set diff [compare_lists $res $res2]
+ puts "*** Possible problem in GEO radius query ***"
+ puts "Redis: $res"
+ puts "Tcl : $res2"
+ puts "Diff : $diff"
+ puts [join $debuginfo "\n"]
+ foreach place $diff {
+ if {[lsearch -exact $res2 $place] != -1} {
+ set where "(only in Tcl)"
+ } else {
+ set where "(only in Redis)"
+ }
+ lassign [lindex [r geopos mypoints $place] 0] lon lat
+ set mydist [geo_distance $lon $lat $search_lon $search_lat]
+ set mydist [expr $mydist/1000]
+ puts "$place -> [r geopos mypoints $place] $mydist $where"
+ if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
+ }
+ set test_result FAIL
+ }
+ unset -nocomplain debuginfo
+ if {$test_result ne {OK}} break
+ }
+ set test_result
+ } {OK}
+}
diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl
index 3f5142076..7d36b7a35 100644
--- a/tests/unit/hyperloglog.tcl
+++ b/tests/unit/hyperloglog.tcl
@@ -136,10 +136,9 @@ start_server {tags {"hll"}} {
r pfcount hll
} {5}
- test {PFCOUNT multiple-keys merge returns cardinality of union} {
+ test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1 hll2 hll3
for {set x 1} {$x < 10000} {incr x} {
- # Force dense representation of hll2
r pfadd hll1 "foo-$x"
r pfadd hll2 "bar-$x"
r pfadd hll3 "zap-$x"
@@ -151,6 +150,22 @@ start_server {tags {"hll"}} {
}
}
+ test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
+ r del hll1 hll2 hll3
+ set elements {}
+ for {set x 1} {$x < 10000} {incr x} {
+ for {set j 1} {$j <= 3} {incr j} {
+ set rint [randomInt 20000]
+ r pfadd hll$j $rint
+ lappend elements $rint
+ }
+ }
+ set realcard [llength [lsort -unique $elements]]
+ set card [r pfcount hll1 hll2 hll3]
+ set err [expr {abs($card-$realcard)}]
+ assert {$err < (double($card)/100)*5}
+ }
+
test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
r del hll
r pfadd hll 1 2 3
diff --git a/tests/unit/introspection-2.tcl b/tests/unit/introspection-2.tcl
new file mode 100644
index 000000000..350a8a016
--- /dev/null
+++ b/tests/unit/introspection-2.tcl
@@ -0,0 +1,23 @@
+start_server {tags {"introspection"}} {
+ test {TTL and TYPYE do not alter the last access time of a key} {
+ r set foo bar
+ after 3000
+ r ttl foo
+ r type foo
+ assert {[r object idletime foo] >= 2}
+ }
+
+ test {TOUCH alters the last access time of a key} {
+ r set foo bar
+ after 3000
+ r touch foo
+ assert {[r object idletime foo] < 2}
+ }
+
+ test {TOUCH returns the number of existing keys specified} {
+ r flushdb
+ r set key1 1
+ r set key2 2
+ r touch key0 key1 key2 key3
+ } 2
+}
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index 342bb939a..f6477d9c5 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -6,16 +6,17 @@ start_server {tags {"introspection"}} {
test {MONITOR can log executed commands} {
set rd [redis_deferring_client]
$rd monitor
+ assert_match {*OK*} [$rd read]
r set foo bar
r get foo
- list [$rd read] [$rd read] [$rd read]
- } {*OK*"set" "foo"*"get" "foo"*}
+ list [$rd read] [$rd read]
+ } {*"set" "foo"*"get" "foo"*}
test {MONITOR can log commands issued by the scripting engine} {
set rd [redis_deferring_client]
$rd monitor
- r eval {redis.call('set',KEYS[1],ARGV[1])} 1 foo bar
$rd read ;# Discard the OK
+ r eval {redis.call('set',KEYS[1],ARGV[1])} 1 foo bar
assert_match {*eval*} [$rd read]
assert_match {*lua*"set"*"foo"*"bar"*} [$rd read]
}
diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl
new file mode 100644
index 000000000..d4e7bf51c
--- /dev/null
+++ b/tests/unit/keyspace.tcl
@@ -0,0 +1,275 @@
+start_server {tags {"keyspace"}} {
+ test {DEL against a single item} {
+ r set x foo
+ assert {[r get x] eq "foo"}
+ r del x
+ r get x
+ } {}
+
+ test {Vararg DEL} {
+ r set foo1 a
+ r set foo2 b
+ r set foo3 c
+ list [r del foo1 foo2 foo3 foo4] [r mget foo1 foo2 foo3]
+ } {3 {{} {} {}}}
+
+ test {KEYS with pattern} {
+ foreach key {key_x key_y key_z foo_a foo_b foo_c} {
+ r set $key hello
+ }
+ lsort [r keys foo*]
+ } {foo_a foo_b foo_c}
+
+ test {KEYS to get all keys} {
+ lsort [r keys *]
+ } {foo_a foo_b foo_c key_x key_y key_z}
+
+ test {DBSIZE} {
+ r dbsize
+ } {6}
+
+ test {DEL all keys} {
+ foreach key [r keys *] {r del $key}
+ r dbsize
+ } {0}
+
+ test "DEL against expired key" {
+ r debug set-active-expire 0
+ r setex keyExpire 1 valExpire
+ after 1100
+ assert_equal 0 [r del keyExpire]
+ r debug set-active-expire 1
+ }
+
+ test {EXISTS} {
+ set res {}
+ r set newkey test
+ append res [r exists newkey]
+ r del newkey
+ append res [r exists newkey]
+ } {10}
+
+ test {Zero length value in key. SET/GET/EXISTS} {
+ r set emptykey {}
+ set res [r get emptykey]
+ append res [r exists emptykey]
+ r del emptykey
+ append res [r exists emptykey]
+ } {10}
+
+ test {Commands pipelining} {
+ set fd [r channel]
+ puts -nonewline $fd "SET k1 xyzk\r\nGET k1\r\nPING\r\n"
+ flush $fd
+ set res {}
+ append res [string match OK* [r read]]
+ append res [r read]
+ append res [string match PONG* [r read]]
+ format $res
+ } {1xyzk1}
+
+ test {Non existing command} {
+ catch {r foobaredcommand} err
+ string match ERR* $err
+ } {1}
+
+ test {RENAME basic usage} {
+ r set mykey hello
+ r rename mykey mykey1
+ r rename mykey1 mykey2
+ r get mykey2
+ } {hello}
+
+ test {RENAME source key should no longer exist} {
+ r exists mykey
+ } {0}
+
+ test {RENAME against already existing key} {
+ r set mykey a
+ r set mykey2 b
+ r rename mykey2 mykey
+ set res [r get mykey]
+ append res [r exists mykey2]
+ } {b0}
+
+ test {RENAMENX basic usage} {
+ r del mykey
+ r del mykey2
+ r set mykey foobar
+ r renamenx mykey mykey2
+ set res [r get mykey2]
+ append res [r exists mykey]
+ } {foobar0}
+
+ test {RENAMENX against already existing key} {
+ r set mykey foo
+ r set mykey2 bar
+ r renamenx mykey mykey2
+ } {0}
+
+ test {RENAMENX against already existing key (2)} {
+ set res [r get mykey]
+ append res [r get mykey2]
+ } {foobar}
+
+ test {RENAME against non existing source key} {
+ catch {r rename nokey foobar} err
+ format $err
+ } {ERR*}
+
+ test {RENAME where source and dest key are the same (existing)} {
+ r set mykey foo
+ r rename mykey mykey
+ } {OK}
+
+ test {RENAMENX where source and dest key are the same (existing)} {
+ r set mykey foo
+ r renamenx mykey mykey
+ } {0}
+
+ test {RENAME where source and dest key are the same (non existing)} {
+ r del mykey
+ catch {r rename mykey mykey} err
+ format $err
+ } {ERR*}
+
+ test {RENAME with volatile key, should move the TTL as well} {
+ r del mykey mykey2
+ r set mykey foo
+ r expire mykey 100
+ assert {[r ttl mykey] > 95 && [r ttl mykey] <= 100}
+ r rename mykey mykey2
+ assert {[r ttl mykey2] > 95 && [r ttl mykey2] <= 100}
+ }
+
+ test {RENAME with volatile key, should not inherit TTL of target key} {
+ r del mykey mykey2
+ r set mykey foo
+ r set mykey2 bar
+ r expire mykey2 100
+ assert {[r ttl mykey] == -1 && [r ttl mykey2] > 0}
+ r rename mykey mykey2
+ r ttl mykey2
+ } {-1}
+
+ test {DEL all keys again (DB 0)} {
+ foreach key [r keys *] {
+ r del $key
+ }
+ r dbsize
+ } {0}
+
+ test {DEL all keys again (DB 1)} {
+ r select 10
+ foreach key [r keys *] {
+ r del $key
+ }
+ set res [r dbsize]
+ r select 9
+ format $res
+ } {0}
+
+ test {MOVE basic usage} {
+ r set mykey foobar
+ r move mykey 10
+ set res {}
+ lappend res [r exists mykey]
+ lappend res [r dbsize]
+ r select 10
+ lappend res [r get mykey]
+ lappend res [r dbsize]
+ r select 9
+ format $res
+ } [list 0 0 foobar 1]
+
+ test {MOVE against key existing in the target DB} {
+ r set mykey hello
+ r move mykey 10
+ } {0}
+
+ test {MOVE against non-integer DB (#1428)} {
+ r set mykey hello
+ catch {r move mykey notanumber} e
+ set e
+ } {*ERR*index out of range}
+
+ test {MOVE can move key expire metadata as well} {
+ r select 10
+ r flushdb
+ r select 9
+ r set mykey foo ex 100
+ r move mykey 10
+ assert {[r ttl mykey] == -2}
+ r select 10
+ assert {[r ttl mykey] > 0 && [r ttl mykey] <= 100}
+ assert {[r get mykey] eq "foo"}
+ r select 9
+ }
+
+ test {MOVE does not create an expire if it does not exist} {
+ r select 10
+ r flushdb
+ r select 9
+ r set mykey foo
+ r move mykey 10
+ assert {[r ttl mykey] == -2}
+ r select 10
+ assert {[r ttl mykey] == -1}
+ assert {[r get mykey] eq "foo"}
+ r select 9
+ }
+
+ test {SET/GET keys in different DBs} {
+ r set a hello
+ r set b world
+ r select 10
+ r set a foo
+ r set b bared
+ r select 9
+ set res {}
+ lappend res [r get a]
+ lappend res [r get b]
+ r select 10
+ lappend res [r get a]
+ lappend res [r get b]
+ r select 9
+ format $res
+ } {hello world foo bared}
+
+ test {RANDOMKEY} {
+ r flushdb
+ r set foo x
+ r set bar y
+ set foo_seen 0
+ set bar_seen 0
+ for {set i 0} {$i < 100} {incr i} {
+ set rkey [r randomkey]
+ if {$rkey eq {foo}} {
+ set foo_seen 1
+ }
+ if {$rkey eq {bar}} {
+ set bar_seen 1
+ }
+ }
+ list $foo_seen $bar_seen
+ } {1 1}
+
+ test {RANDOMKEY against empty DB} {
+ r flushdb
+ r randomkey
+ } {}
+
+ test {RANDOMKEY regression 1} {
+ r flushdb
+ r set x 10
+ r del x
+ r randomkey
+ } {}
+
+ test {KEYS * two times with long key, Github issue #1208} {
+ r flushdb
+ r set dlskeriewrioeuwqoirueioqwrueoqwrueqw test
+ r keys *
+ r keys *
+ } {dlskeriewrioeuwqoirueioqwrueoqwrueqw}
+}
diff --git a/tests/unit/latency-monitor.tcl b/tests/unit/latency-monitor.tcl
index b736cad98..69da13f06 100644
--- a/tests/unit/latency-monitor.tcl
+++ b/tests/unit/latency-monitor.tcl
@@ -47,4 +47,18 @@ start_server {tags {"latency-monitor"}} {
assert {[r latency reset] > 0}
assert {[r latency latest] eq {}}
}
+
+ test {LATENCY of expire events are correctly collected} {
+ r config set latency-monitor-threshold 20
+ r eval {
+ local i = 0
+ while (i < 1000000) do
+ redis.call('sadd','mybigkey',i)
+ i = i+1
+ end
+ } 0
+ r pexpire mybigkey 1
+ after 500
+ assert_match {*expire-cycle*} [r latency latest]
+ }
}
diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl
new file mode 100644
index 000000000..4e994494b
--- /dev/null
+++ b/tests/unit/lazyfree.tcl
@@ -0,0 +1,39 @@
+start_server {tags {"lazyfree"}} {
+ test "UNLINK can reclaim memory in background" {
+ set orig_mem [s used_memory]
+ set args {}
+ for {set i 0} {$i < 100000} {incr i} {
+ lappend args $i
+ }
+ r sadd myset {*}$args
+ assert {[r scard myset] == 100000}
+ set peak_mem [s used_memory]
+ assert {[r unlink myset] == 1}
+ assert {$peak_mem > $orig_mem+1000000}
+ wait_for_condition 50 100 {
+ [s used_memory] < $peak_mem &&
+ [s used_memory] < $orig_mem*2
+ } else {
+ fail "Memory is not reclaimed by UNLINK"
+ }
+ }
+
+ test "FLUSHDB ASYNC can reclaim memory in background" {
+ set orig_mem [s used_memory]
+ set args {}
+ for {set i 0} {$i < 100000} {incr i} {
+ lappend args $i
+ }
+ r sadd myset {*}$args
+ assert {[r scard myset] == 100000}
+ set peak_mem [s used_memory]
+ r flushdb async
+ assert {$peak_mem > $orig_mem+1000000}
+ wait_for_condition 50 100 {
+ [s used_memory] < $peak_mem &&
+ [s used_memory] < $orig_mem*2
+ } else {
+ fail "Memory is not reclaimed by FLUSHDB ASYNC"
+ }
+ }
+}
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index e6bf7860c..7629fe05e 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -24,7 +24,7 @@ start_server {tags {"maxmemory"}} {
}
foreach policy {
- allkeys-random allkeys-lru volatile-lru volatile-random volatile-ttl
+ allkeys-random allkeys-lru allkeys-lfu volatile-lru volatile-lfu volatile-random volatile-ttl
} {
test "maxmemory - is the memory limit honoured? (policy $policy)" {
# make sure to start with a blank instance
@@ -98,7 +98,7 @@ start_server {tags {"maxmemory"}} {
}
foreach policy {
- volatile-lru volatile-random volatile-ttl
+ volatile-lru volatile-lfu volatile-random volatile-ttl
} {
test "maxmemory - policy $policy should only remove volatile keys." {
# make sure to start with a blank instance
@@ -142,3 +142,95 @@ start_server {tags {"maxmemory"}} {
}
}
}
+
+proc test_slave_buffers {cmd_count payload_len limit_memory pipeline} {
+ start_server {tags {"maxmemory"}} {
+ start_server {} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+
+ # add 100 keys of 100k (10MB total)
+ for {set j 0} {$j < 100} {incr j} {
+ $master setrange "key:$j" 100000 asdf
+ }
+
+ $master config set maxmemory-policy allkeys-random
+ $master config set client-output-buffer-limit "slave 100000000 100000000 60"
+ $master config set repl-backlog-size [expr {10*1024}]
+
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+
+ # measure used memory after the slave connected and set maxmemory
+ set orig_used [s -1 used_memory]
+ set orig_client_buf [s -1 mem_clients_normal]
+ set orig_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
+ set orig_used_no_repl [expr {$orig_used - $orig_mem_not_counted_for_evict}]
+ set limit [expr {$orig_used - $orig_mem_not_counted_for_evict + 20*1024}]
+
+ if {$limit_memory==1} {
+ $master config set maxmemory $limit
+ }
+
+ # put the slave to sleep
+ set rd_slave [redis_deferring_client]
+ $rd_slave debug sleep 300
+
+ # send some 10mb woth of commands that don't increase the memory usage
+ if {$pipeline == 1} {
+ set rd_master [redis_deferring_client -1]
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ $rd_master setrange key:0 0 [string repeat A $payload_len]
+ }
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ #$rd_master read
+ }
+ } else {
+ for {set k 0} {$k < $cmd_count} {incr k} {
+ $master setrange key:0 0 [string repeat A $payload_len]
+ }
+ }
+
+ set new_used [s -1 used_memory]
+ set slave_buf [s -1 mem_clients_slaves]
+ set client_buf [s -1 mem_clients_normal]
+ set mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
+ set used_no_repl [expr {$new_used - $mem_not_counted_for_evict}]
+ set delta [expr {($used_no_repl - $client_buf) - ($orig_used_no_repl - $orig_client_buf)}]
+
+ assert {[$master dbsize] == 100}
+ assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers
+ assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
+
+ $master client kill type slave
+ set killed_used [s -1 used_memory]
+ set killed_slave_buf [s -1 mem_clients_slaves]
+ set killed_mem_not_counted_for_evict [s -1 mem_not_counted_for_evict]
+ set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}]
+ set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}]
+ assert {$killed_slave_buf == 0}
+ assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
+ }
+ }
+}
+
+test {slave buffer are counted correctly} {
+ # we wanna use many small commands, and we don't wanna wait long
+ # so we need to use a pipeline (redis_deferring_client)
+ # that may cause query buffer to fill and induce eviction, so we disable it
+ test_slave_buffers 1000000 10 0 1
+}
+
+test {slave buffer don't induce eviction} {
+ # test again with fewer (and bigger) commands without pipeline, but with eviction
+ test_slave_buffers 100000 100 1 0
+}
+
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 14e135ced..ec71a36b1 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -1,15 +1,20 @@
proc test_memory_efficiency {range} {
r flushall
+ set rd [redis_deferring_client]
set base_mem [s used_memory]
set written 0
for {set j 0} {$j < 10000} {incr j} {
set key key:$j
set val [string repeat A [expr {int(rand()*$range)}]]
- r set $key $val
+ $rd set $key $val
incr written [string length $key]
incr written [string length $val]
incr written 2 ;# A separator is the minimum to store key-value data.
}
+ for {set j 0} {$j < 10000} {incr j} {
+ $rd read ; # Discard replies
+ }
+
set current_mem [s used_memory]
set used [expr {$current_mem-$base_mem}]
set efficiency [expr {double($written)/$used}]
@@ -30,3 +35,178 @@ start_server {tags {"memefficiency"}} {
}
}
}
+
+start_server {tags {"defrag"}} {
+ if {[string match {*jemalloc*} [s mem_allocator]]} {
+ test "Active defrag" {
+ r config set activedefrag no
+ r config set active-defrag-threshold-lower 5
+ r config set active-defrag-cycle-min 65
+ r config set active-defrag-cycle-max 75
+ r config set active-defrag-ignore-bytes 2mb
+ r config set maxmemory 100mb
+ r config set maxmemory-policy allkeys-lru
+ r debug populate 700000 asdf 150
+ r debug populate 170000 asdf 300
+ r ping ;# trigger eviction following the previous population
+ after 120 ;# serverCron only updates the info once in 100ms
+ set frag [s allocator_frag_ratio]
+ if {$::verbose} {
+ puts "frag $frag"
+ }
+ assert {$frag >= 1.4}
+ catch {r config set activedefrag yes} e
+ if {![string match {DISABLED*} $e]} {
+ # Wait for the active defrag to start working (decision once a
+ # second).
+ wait_for_condition 50 100 {
+ [s active_defrag_running] ne 0
+ } else {
+ fail "defrag not started."
+ }
+
+ # Wait for the active defrag to stop working.
+ wait_for_condition 150 100 {
+ [s active_defrag_running] eq 0
+ } else {
+ after 120 ;# serverCron only updates the info once in 100ms
+ puts [r info memory]
+ puts [r memory malloc-stats]
+ fail "defrag didn't stop."
+ }
+
+ # Test the the fragmentation is lower.
+ after 120 ;# serverCron only updates the info once in 100ms
+ set frag [s allocator_frag_ratio]
+ if {$::verbose} {
+ puts "frag $frag"
+ }
+ assert {$frag < 1.1}
+ } else {
+ set _ ""
+ }
+ } {}
+
+ test "Active defrag big keys" {
+ r flushdb
+ r config resetstat
+ r config set activedefrag no
+ r config set active-defrag-max-scan-fields 1000
+ r config set active-defrag-threshold-lower 5
+ r config set active-defrag-cycle-min 65
+ r config set active-defrag-cycle-max 75
+ r config set active-defrag-ignore-bytes 2mb
+ r config set maxmemory 0
+ r config set list-max-ziplist-size 5 ;# list of 10k items will have 2000 quicklist nodes
+ r config set stream-node-max-entries 5
+ r hmset hash h1 v1 h2 v2 h3 v3
+ r lpush list a b c d
+ r zadd zset 0 a 1 b 2 c 3 d
+ r sadd set a b c d
+ r xadd stream * item 1 value a
+ r xadd stream * item 2 value b
+ r xgroup create stream mygroup 0
+ r xreadgroup GROUP mygroup Alice COUNT 1 STREAMS stream >
+
+ # create big keys with 10k items
+ set rd [redis_deferring_client]
+ for {set j 0} {$j < 10000} {incr j} {
+ $rd hset bighash $j [concat "asdfasdfasdf" $j]
+ $rd lpush biglist [concat "asdfasdfasdf" $j]
+ $rd zadd bigzset $j [concat "asdfasdfasdf" $j]
+ $rd sadd bigset [concat "asdfasdfasdf" $j]
+ $rd xadd bigstream * item 1 value a
+ }
+ for {set j 0} {$j < 50000} {incr j} {
+ $rd read ; # Discard replies
+ }
+
+ set expected_frag 1.7
+ if {$::accurate} {
+ # scale the hash to 1m fields in order to have a measurable the latency
+ for {set j 10000} {$j < 1000000} {incr j} {
+ $rd hset bighash $j [concat "asdfasdfasdf" $j]
+ }
+ for {set j 10000} {$j < 1000000} {incr j} {
+ $rd read ; # Discard replies
+ }
+ # creating that big hash, increased used_memory, so the relative frag goes down
+ set expected_frag 1.3
+ }
+
+ # add a mass of string keys
+ for {set j 0} {$j < 500000} {incr j} {
+ $rd setrange $j 150 a
+ }
+ for {set j 0} {$j < 500000} {incr j} {
+ $rd read ; # Discard replies
+ }
+ assert {[r dbsize] == 500010}
+
+ # create some fragmentation
+ for {set j 0} {$j < 500000} {incr j 2} {
+ $rd del $j
+ }
+ for {set j 0} {$j < 500000} {incr j 2} {
+ $rd read ; # Discard replies
+ }
+ assert {[r dbsize] == 250010}
+
+ # start defrag
+ after 120 ;# serverCron only updates the info once in 100ms
+ set frag [s allocator_frag_ratio]
+ if {$::verbose} {
+ puts "frag $frag"
+ }
+ assert {$frag >= $expected_frag}
+ r config set latency-monitor-threshold 5
+ r latency reset
+
+ set digest [r debug digest]
+ catch {r config set activedefrag yes} e
+ if {![string match {DISABLED*} $e]} {
+ # wait for the active defrag to start working (decision once a second)
+ wait_for_condition 50 100 {
+ [s active_defrag_running] ne 0
+ } else {
+ fail "defrag not started."
+ }
+
+ # wait for the active defrag to stop working
+ wait_for_condition 500 100 {
+ [s active_defrag_running] eq 0
+ } else {
+ after 120 ;# serverCron only updates the info once in 100ms
+ puts [r info memory]
+ puts [r memory malloc-stats]
+ fail "defrag didn't stop."
+ }
+
+ # test the the fragmentation is lower
+ after 120 ;# serverCron only updates the info once in 100ms
+ set frag [s allocator_frag_ratio]
+ set max_latency 0
+ foreach event [r latency latest] {
+ lassign $event eventname time latency max
+ if {$eventname == "active-defrag-cycle"} {
+ set max_latency $max
+ }
+ }
+ if {$::verbose} {
+ puts "frag $frag"
+ puts "max latency $max_latency"
+ puts [r latency latest]
+ puts [r latency history active-defrag-cycle]
+ }
+ assert {$frag < 1.1}
+ # due to high fragmentation, 10hz, and active-defrag-cycle-max set to 75,
+ # we expect max latency to be not much higher than 75ms
+ assert {$max_latency <= 80}
+ }
+ # verify the data isn't corrupted or changed
+ set newdigest [r debug digest]
+ assert {$digest eq $newdigest}
+ r save ;# saving an rdb iterates over all the data / pointers
+ } {OK}
+ }
+}
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index a53f3f5c8..965902456 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -52,7 +52,7 @@ start_server {tags {"other"}} {
test {SELECT an out of range DB} {
catch {r select 1000000} err
set _ $err
- } {*invalid*}
+ } {*index is out of range*}
tags {consistency} {
if {![catch {package require sha1}]} {
@@ -83,6 +83,7 @@ start_server {tags {"other"}} {
} {1}
test {Same dataset digest if saving/reloading as AOF?} {
+ r config set aof-use-rdb-preamble no
r bgrewriteaof
waitForBgrewriteaof r
r debug loadaof
@@ -126,6 +127,7 @@ start_server {tags {"other"}} {
test {EXPIRES after AOF reload (without rewrite)} {
r flushdb
r config set appendonly yes
+ r config set aof-use-rdb-preamble no
r set x somevalue
r expire x 1000
r setex y 2000 somevalue
@@ -194,6 +196,7 @@ start_server {tags {"other"}} {
}
test {APPEND basics} {
+ r del foo
list [r append foo bar] [r get foo] \
[r append foo 100] [r get foo]
} {3 bar 6 bar100}
diff --git a/tests/unit/pendingquerybuf.tcl b/tests/unit/pendingquerybuf.tcl
new file mode 100644
index 000000000..caa940d8e
--- /dev/null
+++ b/tests/unit/pendingquerybuf.tcl
@@ -0,0 +1,35 @@
+proc info_memory {r property} {
+ if {[regexp "\r\n$property:(.*?)\r\n" [{*}$r info memory] _ value]} {
+ set _ $value
+ }
+}
+
+proc prepare_value {size} {
+ set _v "c"
+ for {set i 1} {$i < $size} {incr i} {
+ append _v 0
+ }
+ return $_v
+}
+
+start_server {tags {"wait"}} {
+start_server {} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+
+ test "pending querybuf: check size of pending_querybuf after set a big value" {
+ $slave slaveof $master_host $master_port
+ set _v [prepare_value [expr 32*1024*1024]]
+ $master set key $_v
+ after 2000
+ set m_usedmemory [info_memory $master used_memory]
+ set s_usedmemory [info_memory $slave used_memory]
+ if { $s_usedmemory > $m_usedmemory + 10*1024*1024 } {
+ fail "the used_memory of slave is too larger than master.Master:$m_usedmemory Slave:$s_usedmemory"
+ }
+ }
+}}
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 1d84f128d..c0f4349d2 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -236,4 +236,50 @@ start_server {tags {"scan"}} {
set first_score [lindex $res 1]
assert {$first_score != 0}
}
+
+ test "SCAN regression test for issue #4906" {
+ for {set k 0} {$k < 100} {incr k} {
+ r del set
+ r sadd set x; # Make sure it's not intset encoded
+ set toremove {}
+ unset -nocomplain found
+ array set found {}
+
+ # Populate the set
+ set numele [expr {101+[randomInt 1000]}]
+ for {set j 0} {$j < $numele} {incr j} {
+ r sadd set $j
+ if {$j >= 100} {
+ lappend toremove $j
+ }
+ }
+
+ # Start scanning
+ set cursor 0
+ set iteration 0
+ set del_iteration [randomInt 10]
+ while {!($cursor == 0 && $iteration != 0)} {
+ lassign [r sscan set $cursor] cursor items
+
+ # Mark found items. We expect to find from 0 to 99 at the end
+ # since those elements will never be removed during the scanning.
+ foreach i $items {
+ set found($i) 1
+ }
+ incr iteration
+ # At some point remove most of the items to trigger the
+ # rehashing to a smaller hash table.
+ if {$iteration == $del_iteration} {
+ r srem set {*}$toremove
+ }
+ }
+
+ # Verify that SSCAN reported everything from 0 to 99
+ for {set j 0} {$j < 100} {incr j} {
+ if {![info exists found($j)]} {
+ fail "SSCAN element missing $j"
+ }
+ }
+ }
+ }
}
diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl
index 921382e34..bcde721c3 100644
--- a/tests/unit/scripting.tcl
+++ b/tests/unit/scripting.tcl
@@ -62,18 +62,19 @@ start_server {tags {"scripting"}} {
} {NOSCRIPT*}
test {EVAL - Redis integer -> Lua type conversion} {
+ r set x 0
r eval {
- local foo = redis.pcall('incr','x')
+ local foo = redis.pcall('incr',KEYS[1])
return {type(foo),foo}
- } 0
+ } 1 x
} {number 1}
test {EVAL - Redis bulk -> Lua type conversion} {
r set mykey myval
r eval {
- local foo = redis.pcall('get','mykey')
+ local foo = redis.pcall('get',KEYS[1])
return {type(foo),foo}
- } 0
+ } 1 mykey
} {string myval}
test {EVAL - Redis multi bulk -> Lua type conversion} {
@@ -82,39 +83,39 @@ start_server {tags {"scripting"}} {
r rpush mylist b
r rpush mylist c
r eval {
- local foo = redis.pcall('lrange','mylist',0,-1)
+ local foo = redis.pcall('lrange',KEYS[1],0,-1)
return {type(foo),foo[1],foo[2],foo[3],# foo}
- } 0
+ } 1 mylist
} {table a b c 3}
test {EVAL - Redis status reply -> Lua type conversion} {
r eval {
- local foo = redis.pcall('set','mykey','myval')
+ local foo = redis.pcall('set',KEYS[1],'myval')
return {type(foo),foo['ok']}
- } 0
+ } 1 mykey
} {table OK}
test {EVAL - Redis error reply -> Lua type conversion} {
r set mykey myval
r eval {
- local foo = redis.pcall('incr','mykey')
+ local foo = redis.pcall('incr',KEYS[1])
return {type(foo),foo['err']}
- } 0
+ } 1 mykey
} {table {ERR value is not an integer or out of range}}
test {EVAL - Redis nil bulk reply -> Lua type conversion} {
r del mykey
r eval {
- local foo = redis.pcall('get','mykey')
+ local foo = redis.pcall('get',KEYS[1])
return {type(foo),foo == false}
- } 0
+ } 1 mykey
} {boolean 1}
test {EVAL - Is the Lua client using the currently selected DB?} {
r set mykey "this is DB 9"
r select 10
r set mykey "this is DB 10"
- r eval {return redis.pcall('get','mykey')} 0
+ r eval {return redis.pcall('get',KEYS[1])} 1 mykey
} {this is DB 10}
test {EVAL - SELECT inside Lua should not affect the caller} {
@@ -141,7 +142,7 @@ start_server {tags {"scripting"}} {
test {EVAL - Scripts can't run certain commands} {
set e {}
- catch {r eval {return redis.pcall('spop','x')} 0} e
+ catch {r eval {return redis.pcall('blpop','x',0)} 0} e
set e
} {*not allowed*}
@@ -272,6 +273,10 @@ start_server {tags {"scripting"}} {
} 0
} {}
+ test {EVAL - Able to parse trailing comments} {
+ r eval {return 'hello' --trailing comment} 0
+ } {hello}
+
test {SCRIPTING FLUSH - is able to clear the scripts cache?} {
r set mykey myval
set v [r evalsha fd758d1589d044dd850a6f05d52f2eefd27f033f 1 mykey]
@@ -325,7 +330,7 @@ start_server {tags {"scripting"}} {
test {Globals protection reading an undeclared global variable} {
catch {r eval {return a} 0} e
set e
- } {*ERR*attempted to access unexisting global*}
+ } {*ERR*attempted to access * global*}
test {Globals protection setting an undeclared global*} {
catch {r eval {a=10} 0} e
@@ -392,6 +397,7 @@ start_server {tags {"scripting"}} {
test {EVAL processes writes from AOF in read-only slaves} {
r flushall
r config set appendonly yes
+ r config set aof-use-rdb-preamble no
r eval {redis.call("set",KEYS[1],"100")} 1 foo
r eval {redis.call("incr",KEYS[1])} 1 foo
r eval {redis.call("incr",KEYS[1])} 1 foo
@@ -463,6 +469,15 @@ start_server {tags {"scripting"}} {
end
} 0
}
+
+ test {Functions in the Redis namespace are able to report errors} {
+ catch {
+ r eval {
+ redis.sha1hex()
+ } 0
+ } e
+ set e
+ } {*wrong number*}
}
# Start a new server since the last test in this stanza will kill the
@@ -502,7 +517,7 @@ start_server {tags {"scripting"}} {
# Note: keep this test at the end of this server stanza because it
# kills the server.
test {SHUTDOWN NOSAVE can kill a timedout script anyway} {
- # The server sould be still unresponding to normal commands.
+ # The server could be still unresponding to normal commands.
catch {r ping} e
assert_match {BUSY*} $e
catch {r shutdown nosave}
@@ -512,20 +527,111 @@ start_server {tags {"scripting"}} {
}
}
-start_server {tags {"scripting repl"}} {
- start_server {} {
- test {Before the slave connects we issue two EVAL commands} {
- # One with an error, but still executing a command.
- # SHA is: 67164fc43fa971f76fd1aaeeaf60c1c178d25876
- catch {
- r eval {redis.call('incr',KEYS[1]); redis.call('nonexisting')} 1 x
+foreach cmdrepl {0 1} {
+ start_server {tags {"scripting repl"}} {
+ start_server {} {
+ if {$cmdrepl == 1} {
+ set rt "(commmands replication)"
+ } else {
+ set rt "(scripts replication)"
+ r debug lua-always-replicate-commands 1
}
- # One command is correct:
- # SHA is: 6f5ade10a69975e903c6d07b10ea44c6382381a5
- r eval {return redis.call('incr',KEYS[1])} 1 x
- } {2}
- test {Connect a slave to the main instance} {
+ test "Before the slave connects we issue two EVAL commands $rt" {
+ # One with an error, but still executing a command.
+ # SHA is: 67164fc43fa971f76fd1aaeeaf60c1c178d25876
+ catch {
+ r eval {redis.call('incr',KEYS[1]); redis.call('nonexisting')} 1 x
+ }
+ # One command is correct:
+ # SHA is: 6f5ade10a69975e903c6d07b10ea44c6382381a5
+ r eval {return redis.call('incr',KEYS[1])} 1 x
+ } {2}
+
+ test "Connect a slave to the master instance $rt" {
+ r -1 slaveof [srv 0 host] [srv 0 port]
+ wait_for_condition 50 100 {
+ [s -1 role] eq {slave} &&
+ [string match {*master_link_status:up*} [r -1 info replication]]
+ } else {
+ fail "Can't turn the instance into a slave"
+ }
+ }
+
+ test "Now use EVALSHA against the master, with both SHAs $rt" {
+ # The server should replicate successful and unsuccessful
+ # commands as EVAL instead of EVALSHA.
+ catch {
+ r evalsha 67164fc43fa971f76fd1aaeeaf60c1c178d25876 1 x
+ }
+ r evalsha 6f5ade10a69975e903c6d07b10ea44c6382381a5 1 x
+ } {4}
+
+ test "If EVALSHA was replicated as EVAL, 'x' should be '4' $rt" {
+ wait_for_condition 50 100 {
+ [r -1 get x] eq {4}
+ } else {
+ fail "Expected 4 in x, but value is '[r -1 get x]'"
+ }
+ }
+
+ test "Replication of script multiple pushes to list with BLPOP $rt" {
+ set rd [redis_deferring_client]
+ $rd brpop a 0
+ r eval {
+ redis.call("lpush",KEYS[1],"1");
+ redis.call("lpush",KEYS[1],"2");
+ } 1 a
+ set res [$rd read]
+ $rd close
+ wait_for_condition 50 100 {
+ [r -1 lrange a 0 -1] eq [r lrange a 0 -1]
+ } else {
+ fail "Expected list 'a' in slave and master to be the same, but they are respectively '[r -1 lrange a 0 -1]' and '[r lrange a 0 -1]'"
+ }
+ set res
+ } {a 1}
+
+ test "EVALSHA replication when first call is readonly $rt" {
+ r del x
+ r eval {if tonumber(ARGV[1]) > 0 then redis.call('incr', KEYS[1]) end} 1 x 0
+ r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 0
+ r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 1
+ wait_for_condition 50 100 {
+ [r -1 get x] eq {1}
+ } else {
+ fail "Expected 1 in x, but value is '[r -1 get x]'"
+ }
+ }
+
+ test "Lua scripts using SELECT are replicated correctly $rt" {
+ r eval {
+ redis.call("set","foo1","bar1")
+ redis.call("select","10")
+ redis.call("incr","x")
+ redis.call("select","11")
+ redis.call("incr","z")
+ } 0
+ r eval {
+ redis.call("set","foo1","bar1")
+ redis.call("select","10")
+ redis.call("incr","x")
+ redis.call("select","11")
+ redis.call("incr","z")
+ } 0
+ wait_for_condition 50 100 {
+ [r -1 debug digest] eq [r debug digest]
+ } else {
+ fail "Master-Slave desync after Lua script using SELECT."
+ }
+ }
+ }
+ }
+}
+
+start_server {tags {"scripting repl"}} {
+ start_server {overrides {appendonly yes aof-use-rdb-preamble no}} {
+ test "Connect a slave to the master instance" {
r -1 slaveof [srv 0 host] [srv 0 port]
wait_for_condition 50 100 {
[s -1 role] eq {slave} &&
@@ -535,72 +641,96 @@ start_server {tags {"scripting repl"}} {
}
}
- test {Now use EVALSHA against the master, with both SHAs} {
- # The server should replicate successful and unsuccessful
- # commands as EVAL instead of EVALSHA.
+ test "Redis.replicate_commands() must be issued before any write" {
+ r eval {
+ redis.call('set','foo','bar');
+ return redis.replicate_commands();
+ } 0
+ } {}
+
+ test "Redis.replicate_commands() must be issued before any write (2)" {
+ r eval {
+ return redis.replicate_commands();
+ } 0
+ } {1}
+
+ test "Redis.set_repl() must be issued after replicate_commands()" {
catch {
- r evalsha 67164fc43fa971f76fd1aaeeaf60c1c178d25876 1 x
- }
- r evalsha 6f5ade10a69975e903c6d07b10ea44c6382381a5 1 x
- } {4}
+ r eval {
+ redis.set_repl(redis.REPL_ALL);
+ } 0
+ } e
+ set e
+ } {*only after turning on*}
- test {If EVALSHA was replicated as EVAL, 'x' should be '4'} {
- wait_for_condition 50 100 {
- [r -1 get x] eq {4}
- } else {
- fail "Expected 4 in x, but value is '[r -1 get x]'"
- }
- }
+ test "Redis.set_repl() don't accept invalid values" {
+ catch {
+ r eval {
+ redis.replicate_commands();
+ redis.set_repl(12345);
+ } 0
+ } e
+ set e
+ } {*Invalid*flags*}
- test {Replication of script multiple pushes to list with BLPOP} {
- set rd [redis_deferring_client]
- $rd brpop a 0
+ test "Test selective replication of certain Redis commands from Lua" {
+ r del a b c d
r eval {
- redis.call("lpush",KEYS[1],"1");
- redis.call("lpush",KEYS[1],"2");
- } 1 a
- set res [$rd read]
- $rd close
- wait_for_condition 50 100 {
- [r -1 lrange a 0 -1] eq [r lrange a 0 -1]
- } else {
- fail "Expected list 'a' in slave and master to be the same, but they are respectively '[r -1 lrange a 0 -1]' and '[r lrange a 0 -1]'"
- }
- set res
- } {a 1}
-
- test {EVALSHA replication when first call is readonly} {
- r del x
- r eval {if tonumber(ARGV[1]) > 0 then redis.call('incr', KEYS[1]) end} 1 x 0
- r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 0
- r evalsha 6e0e2745aa546d0b50b801a20983b70710aef3ce 1 x 1
+ redis.replicate_commands();
+ redis.call('set','a','1');
+ redis.set_repl(redis.REPL_NONE);
+ redis.call('set','b','2');
+ redis.set_repl(redis.REPL_AOF);
+ redis.call('set','c','3');
+ redis.set_repl(redis.REPL_ALL);
+ redis.call('set','d','4');
+ } 0
+
wait_for_condition 50 100 {
- [r -1 get x] eq {1}
+ [r -1 mget a b c d] eq {1 {} {} 4}
} else {
- fail "Expected 1 in x, but value is '[r -1 get x]'"
+ fail "Only a and c should be replicated to slave"
}
+
+ # Master should have everything right now
+ assert {[r mget a b c d] eq {1 2 3 4}}
+
+ # After an AOF reload only a, c and d should exist
+ r debug loadaof
+
+ assert {[r mget a b c d] eq {1 {} 3 4}}
}
- test {Lua scripts using SELECT are replicated correctly} {
- r eval {
- redis.call("set","foo1","bar1")
- redis.call("select","10")
- redis.call("incr","x")
- redis.call("select","11")
- redis.call("incr","z")
- } 0
+ test "PRNG is seeded randomly for command replication" {
+ set a [
+ r eval {
+ redis.replicate_commands();
+ return math.random()*100000;
+ } 0
+ ]
+ set b [
+ r eval {
+ redis.replicate_commands();
+ return math.random()*100000;
+ } 0
+ ]
+ assert {$a ne $b}
+ }
+
+ test "Using side effects is not a problem with command replication" {
r eval {
- redis.call("set","foo1","bar1")
- redis.call("select","10")
- redis.call("incr","x")
- redis.call("select","11")
- redis.call("incr","z")
+ redis.replicate_commands();
+ redis.call('set','time',redis.call('time')[1])
} 0
+
+ assert {[r get time] ne {}}
+
wait_for_condition 50 100 {
- [r -1 debug digest] eq [r debug digest]
+ [r get time] eq [r -1 get time]
} else {
- fail "Master-Slave desync after Lua script using SELECT."
+ fail "Time key does not match between master and slave"
}
}
}
}
+
diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl
index b25b91e2c..dbd7a1547 100644
--- a/tests/unit/slowlog.tcl
+++ b/tests/unit/slowlog.tcl
@@ -31,12 +31,14 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
} {0}
test {SLOWLOG - logged entry sanity check} {
+ r client setname foobar
r debug sleep 0.2
set e [lindex [r slowlog get] 0]
- assert_equal [llength $e] 4
+ assert_equal [llength $e] 6
assert_equal [lindex $e 0] 105
assert_equal [expr {[lindex $e 2] > 100000}] 1
assert_equal [lindex $e 3] {debug sleep 0.2}
+ assert_equal {foobar} [lindex $e 5]
}
test {SLOWLOG - commands with too many arguments are trimmed} {
@@ -67,4 +69,23 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} {
set e [lindex [r slowlog get] 0]
assert_equal [lindex $e 3] {debug sleep 0.2}
}
+
+ test {SLOWLOG - can clean older entires} {
+ r client setname lastentry_client
+ r config set slowlog-max-len 1
+ r debug sleep 0.2
+ assert {[llength [r slowlog get]] == 1}
+ set e [lindex [r slowlog get] 0]
+ assert_equal {lastentry_client} [lindex $e 5]
+ }
+
+ test {SLOWLOG - can be disabled} {
+ r config set slowlog-log-slower-than 1
+ r slowlog reset
+ assert_equal [r slowlog len] 1
+ r config set slowlog-log-slower-than -1
+ r slowlog reset
+ r debug sleep 0.2
+ assert_equal [r slowlog len] 0
+ }
}
diff --git a/tests/unit/type/hash.tcl b/tests/unit/type/hash.tcl
index fa52afd16..d2c679d32 100644
--- a/tests/unit/type/hash.tcl
+++ b/tests/unit/type/hash.tcl
@@ -2,8 +2,8 @@ start_server {tags {"hash"}} {
test {HSET/HLEN - Small hash creation} {
array set smallhash {}
for {set i 0} {$i < 8} {incr i} {
- set key [randstring 0 8 alpha]
- set val [randstring 0 8 alpha]
+ set key __avoid_collisions__[randstring 0 8 alpha]
+ set val __avoid_collisions__[randstring 0 8 alpha]
if {[info exists smallhash($key)]} {
incr i -1
continue
@@ -21,8 +21,8 @@ start_server {tags {"hash"}} {
test {HSET/HLEN - Big hash creation} {
array set bighash {}
for {set i 0} {$i < 1024} {incr i} {
- set key [randstring 0 8 alpha]
- set val [randstring 0 8 alpha]
+ set key __avoid_collisions__[randstring 0 8 alpha]
+ set val __avoid_collisions__[randstring 0 8 alpha]
if {[info exists bighash($key)]} {
incr i -1
continue
@@ -33,7 +33,7 @@ start_server {tags {"hash"}} {
list [r hlen bighash]
} {1024}
- test {Is the big hash encoded with a ziplist?} {
+ test {Is the big hash encoded with an hash table?} {
assert_encoding hashtable bighash
}
@@ -390,6 +390,54 @@ start_server {tags {"hash"}} {
lappend rv [string match "ERR*not*float*" $bigerr]
} {1 1}
+ test {HSTRLEN against the small hash} {
+ set err {}
+ foreach k [array names smallhash *] {
+ if {[string length $smallhash($k)] ne [r hstrlen smallhash $k]} {
+ set err "[string length $smallhash($k)] != [r hstrlen smallhash $k]"
+ break
+ }
+ }
+ set _ $err
+ } {}
+
+ test {HSTRLEN against the big hash} {
+ set err {}
+ foreach k [array names bighash *] {
+ if {[string length $bighash($k)] ne [r hstrlen bighash $k]} {
+ set err "[string length $bighash($k)] != [r hstrlen bighash $k]"
+ puts "HSTRLEN and logical length mismatch:"
+ puts "key: $k"
+ puts "Logical content: $bighash($k)"
+ puts "Server content: [r hget bighash $k]"
+ }
+ }
+ set _ $err
+ } {}
+
+ test {HSTRLEN against non existing field} {
+ set rv {}
+ lappend rv [r hstrlen smallhash __123123123__]
+ lappend rv [r hstrlen bighash __123123123__]
+ set _ $rv
+ } {0 0}
+
+ test {HSTRLEN corner cases} {
+ set vals {
+ -9223372036854775808 9223372036854775807 9223372036854775808
+ {} 0 -1 x
+ }
+ foreach v $vals {
+ r hmset smallhash field $v
+ r hmset bighash field $v
+ set len1 [string length $v]
+ set len2 [r hstrlen smallhash field]
+ set len3 [r hstrlen bighash field]
+ assert {$len1 == $len2}
+ assert {$len2 == $len3}
+ }
+ }
+
test {Hash ziplist regression test for large keys} {
r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk a
r hset hash kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk b
@@ -467,4 +515,22 @@ start_server {tags {"hash"}} {
assert {[r object encoding myhash] eq {hashtable}}
}
}
+
+ # The following test can only be executed if we don't use Valgrind, and if
+ # we are using x86_64 architecture, because:
+ #
+ # 1) Valgrind has floating point limitations, no support for 80 bits math.
+ # 2) Other archs may have the same limits.
+ #
+ # 1.23 cannot be represented correctly with 64 bit doubles, so we skip
+ # the test, since we are only testing pretty printing here and is not
+ # a bug if the program outputs things like 1.299999...
+ if {!$::valgrind && [string match *x86_64* [exec uname -a]]} {
+ test {Test HINCRBYFLOAT for correct float representation (issue #2846)} {
+ r del myhash
+ assert {[r hincrbyfloat myhash float 1.23] eq {1.23}}
+ assert {[r hincrbyfloat myhash float 0.77] eq {2}}
+ assert {[r hincrbyfloat myhash float -0.1] eq {1.9}}
+ }
+ }
}
diff --git a/tests/unit/type/incr.tcl b/tests/unit/type/incr.tcl
new file mode 100644
index 000000000..a58710d39
--- /dev/null
+++ b/tests/unit/type/incr.tcl
@@ -0,0 +1,154 @@
+start_server {tags {"incr"}} {
+ test {INCR against non existing key} {
+ set res {}
+ append res [r incr novar]
+ append res [r get novar]
+ } {11}
+
+ test {INCR against key created by incr itself} {
+ r incr novar
+ } {2}
+
+ test {INCR against key originally set with SET} {
+ r set novar 100
+ r incr novar
+ } {101}
+
+ test {INCR over 32bit value} {
+ r set novar 17179869184
+ r incr novar
+ } {17179869185}
+
+ test {INCRBY over 32bit value with over 32bit increment} {
+ r set novar 17179869184
+ r incrby novar 17179869184
+ } {34359738368}
+
+ test {INCR fails against key with spaces (left)} {
+ r set novar " 11"
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against key with spaces (right)} {
+ r set novar "11 "
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against key with spaces (both)} {
+ r set novar " 11 "
+ catch {r incr novar} err
+ format $err
+ } {ERR*}
+
+ test {INCR fails against a key holding a list} {
+ r rpush mylist 1
+ catch {r incr mylist} err
+ r rpop mylist
+ format $err
+ } {WRONGTYPE*}
+
+ test {DECRBY over 32bit value with over 32bit increment, negative res} {
+ r set novar 17179869184
+ r decrby novar 17179869185
+ } {-1}
+
+ test {INCR uses shared objects in the 0-9999 range} {
+ r set foo -1
+ r incr foo
+ assert {[r object refcount foo] > 1}
+ r set foo 9998
+ r incr foo
+ assert {[r object refcount foo] > 1}
+ r incr foo
+ assert {[r object refcount foo] == 1}
+ }
+
+ test {INCR can modify objects in-place} {
+ r set foo 20000
+ r incr foo
+ assert {[r object refcount foo] == 1}
+ set old [lindex [split [r debug object foo]] 1]
+ r incr foo
+ set new [lindex [split [r debug object foo]] 1]
+ assert {[string range $old 0 2] eq "at:"}
+ assert {[string range $new 0 2] eq "at:"}
+ assert {$old eq $new}
+ }
+
+ test {INCRBYFLOAT against non existing key} {
+ r del novar
+ list [roundFloat [r incrbyfloat novar 1]] \
+ [roundFloat [r get novar]] \
+ [roundFloat [r incrbyfloat novar 0.25]] \
+ [roundFloat [r get novar]]
+ } {1 1 1.25 1.25}
+
+ test {INCRBYFLOAT against key originally set with SET} {
+ r set novar 1.5
+ roundFloat [r incrbyfloat novar 1.5]
+ } {3}
+
+ test {INCRBYFLOAT over 32bit value} {
+ r set novar 17179869184
+ r incrbyfloat novar 1.5
+ } {17179869185.5}
+
+ test {INCRBYFLOAT over 32bit value with over 32bit increment} {
+ r set novar 17179869184
+ r incrbyfloat novar 17179869184
+ } {34359738368}
+
+ test {INCRBYFLOAT fails against key with spaces (left)} {
+ set err {}
+ r set novar " 11"
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against key with spaces (right)} {
+ set err {}
+ r set novar "11 "
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against key with spaces (both)} {
+ set err {}
+ r set novar " 11 "
+ catch {r incrbyfloat novar 1.0} err
+ format $err
+ } {ERR*valid*}
+
+ test {INCRBYFLOAT fails against a key holding a list} {
+ r del mylist
+ set err {}
+ r rpush mylist 1
+ catch {r incrbyfloat mylist 1.0} err
+ r del mylist
+ format $err
+ } {WRONGTYPE*}
+
+ test {INCRBYFLOAT does not allow NaN or Infinity} {
+ r set foo 0
+ set err {}
+ catch {r incrbyfloat foo +inf} err
+ set err
+ # p.s. no way I can force NaN to test it from the API because
+ # there is no way to increment / decrement by infinity nor to
+ # perform divisions.
+ } {ERR*would produce*}
+
+ test {INCRBYFLOAT decrement} {
+ r set foo 1
+ roundFloat [r incrbyfloat foo -1.1]
+ } {-0.1}
+
+ test {string to double with null terminator} {
+ r set foo 1
+ r setrange foo 2 2
+ catch {r incrbyfloat foo 1} err
+ format $err
+ } {ERR*valid*}
+}
diff --git a/tests/unit/type/list-3.tcl b/tests/unit/type/list-3.tcl
index ece6ea2d5..b5bd48cb0 100644
--- a/tests/unit/type/list-3.tcl
+++ b/tests/unit/type/list-3.tcl
@@ -13,6 +13,50 @@ start_server {
assert_equal [r lindex l 1] [lindex $mylist 1]
}
+ test {Regression for quicklist #3343 bug} {
+ r del mylist
+ r lpush mylist 401
+ r lpush mylist 392
+ r rpush mylist [string repeat x 5105]"799"
+ r lset mylist -1 [string repeat x 1014]"702"
+ r lpop mylist
+ r lset mylist -1 [string repeat x 4149]"852"
+ r linsert mylist before 401 [string repeat x 9927]"12"
+ r lrange mylist 0 -1
+ r ping ; # It's enough if the server is still alive
+ } {PONG}
+
+ test {Stress tester for #3343-alike bugs} {
+ r del key
+ for {set j 0} {$j < 10000} {incr j} {
+ set op [randomInt 6]
+ set small_signed_count [expr 5-[randomInt 10]]
+ if {[randomInt 2] == 0} {
+ set ele [randomInt 1000]
+ } else {
+ set ele [string repeat x [randomInt 10000]][randomInt 1000]
+ }
+ switch $op {
+ 0 {r lpush key $ele}
+ 1 {r rpush key $ele}
+ 2 {r lpop key}
+ 3 {r rpop key}
+ 4 {
+ catch {r lset key $small_signed_count $ele}
+ }
+ 5 {
+ set otherele [randomInt 1000]
+ if {[randomInt 2] == 0} {
+ set where before
+ } else {
+ set where after
+ }
+ r linsert key $where $otherele $ele
+ }
+ }
+ }
+ }
+
tags {slow} {
test {ziplist implementation: value encoding and backlink} {
if {$::accurate} {set iterations 100} else {set iterations 10}
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index e4d568cf1..1557082a2 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -507,7 +507,9 @@ start_server {
create_list xlist "$large c"
assert_equal 3 [r rpushx xlist d]
assert_equal 4 [r lpushx xlist a]
- assert_equal "a $large c d" [r lrange xlist 0 -1]
+ assert_equal 6 [r rpushx xlist 42 x]
+ assert_equal 9 [r lpushx xlist y3 y2 y1]
+ assert_equal "y1 y2 y3 a $large c d 42 x" [r lrange xlist 0 -1]
}
test "LINSERT - $type" {
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 74a8fb318..7b467f1c4 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -346,6 +346,33 @@ start_server {
r spop nonexisting_key 100
} {}
+ test "SPOP new implementation: code path #1" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 30]
+ assert {[lsort $content] eq [lsort $res]}
+ }
+
+ test "SPOP new implementation: code path #2" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 2]
+ assert {[llength $res] == 2}
+ assert {[r scard myset] == 18}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
+ test "SPOP new implementation: code path #3" {
+ set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ create_set myset $content
+ set res [r spop myset 18]
+ assert {[llength $res] == 18}
+ assert {[r scard myset] == 2}
+ set union [concat [r smembers myset] $res]
+ assert {[lsort $union] eq [lsort $content]}
+ }
+
test "SRANDMEMBER with <count> against non existing key" {
r srandmember nonexisting_key 100
} {}
@@ -492,6 +519,7 @@ start_server {
test "SMOVE non existing key" {
setup_move
assert_equal 0 [r smove myset1 myset2 foo]
+ assert_equal 0 [r smove myset1 myset1 foo]
assert_equal {1 a b} [lsort [r smembers myset1]]
assert_equal {2 3 4} [lsort [r smembers myset2]]
}
diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl
new file mode 100644
index 000000000..7c991a5da
--- /dev/null
+++ b/tests/unit/type/stream-cgroups.tcl
@@ -0,0 +1,99 @@
+start_server {
+ tags {"stream"}
+} {
+ test {XGROUP CREATE: creation and duplicate group name detection} {
+ r DEL mystream
+ r XADD mystream * foo bar
+ r XGROUP CREATE mystream mygroup $
+ catch {r XGROUP CREATE mystream mygroup $} err
+ set err
+ } {BUSYGROUP*}
+
+ test {XREADGROUP will return only new elements} {
+ r XADD mystream * a 1
+ r XADD mystream * b 2
+ # XREADGROUP should return only the new elements "a 1" "b 1"
+ # and not the element "foo bar" which was pre existing in the
+ # stream (see previous test)
+ set reply [
+ r XREADGROUP GROUP mygroup client-1 STREAMS mystream ">"
+ ]
+ assert {[llength [lindex $reply 0 1]] == 2}
+ lindex $reply 0 1 0 1
+ } {a 1}
+
+ test {XREADGROUP can read the history of the elements we own} {
+ # Add a few more elements
+ r XADD mystream * c 3
+ r XADD mystream * d 4
+ # Read a few elements using a different consumer name
+ set reply [
+ r XREADGROUP GROUP mygroup client-2 STREAMS mystream ">"
+ ]
+ assert {[llength [lindex $reply 0 1]] == 2}
+ assert {[lindex $reply 0 1 0 1] eq {c 3}}
+
+ set r1 [r XREADGROUP GROUP mygroup client-1 COUNT 10 STREAMS mystream 0]
+ set r2 [r XREADGROUP GROUP mygroup client-2 COUNT 10 STREAMS mystream 0]
+ assert {[lindex $r1 0 1 0 1] eq {a 1}}
+ assert {[lindex $r2 0 1 0 1] eq {c 3}}
+ }
+
+ test {XPENDING is able to return pending items} {
+ set pending [r XPENDING mystream mygroup - + 10]
+ assert {[llength $pending] == 4}
+ for {set j 0} {$j < 4} {incr j} {
+ set item [lindex $pending $j]
+ if {$j < 2} {
+ set owner client-1
+ } else {
+ set owner client-2
+ }
+ assert {[lindex $item 1] eq $owner}
+ assert {[lindex $item 1] eq $owner}
+ }
+ }
+
+ test {XPENDING can return single consumer items} {
+ set pending [r XPENDING mystream mygroup - + 10 client-1]
+ assert {[llength $pending] == 2}
+ }
+
+ test {XACK is able to remove items from the client/group PEL} {
+ set pending [r XPENDING mystream mygroup - + 10 client-1]
+ set id1 [lindex $pending 0 0]
+ set id2 [lindex $pending 1 0]
+ assert {[r XACK mystream mygroup $id1] eq 1}
+ set pending [r XPENDING mystream mygroup - + 10 client-1]
+ assert {[llength $pending] == 1}
+ set id [lindex $pending 0 0]
+ assert {$id eq $id2}
+ set global_pel [r XPENDING mystream mygroup - + 10]
+ assert {[llength $global_pel] == 3}
+ }
+
+ test {XACK can't remove the same item multiple times} {
+ assert {[r XACK mystream mygroup $id1] eq 0}
+ }
+
+ test {XACK is able to accept multiple arguments} {
+ # One of the IDs was already removed, so it should ack
+ # just ID2.
+ assert {[r XACK mystream mygroup $id1 $id2] eq 1}
+ }
+
+ test {PEL NACK reassignment after XGROUP SETID event} {
+ r del events
+ r xadd events * f1 v1
+ r xadd events * f1 v1
+ r xadd events * f1 v1
+ r xadd events * f1 v1
+ r xgroup create events g1 $
+ r xadd events * f1 v1
+ set c [llength [lindex [r xreadgroup group g1 c1 streams events >] 0 1]]
+ assert {$c == 1}
+ r xgroup setid events g1 -
+ set c [llength [lindex [r xreadgroup group g1 c2 streams events >] 0 1]]
+ assert {$c == 5}
+ }
+}
diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl
new file mode 100644
index 000000000..5cf6805d7
--- /dev/null
+++ b/tests/unit/type/stream.tcl
@@ -0,0 +1,319 @@
+# return value is like strcmp() and similar.
+proc streamCompareID {a b} {
+ if {$a eq $b} {return 0}
+ lassign [split $a -] a_ms a_seq
+ lassign [split $b -] b_ms b_seq
+ if {$a_ms > $b_ms} {return 1}
+ if {$a_ms < $b_ms} {return -1}
+ # Same ms case, compare seq.
+ if {$a_seq > $b_seq} {return 1}
+ if {$a_seq < $b_seq} {return -1}
+}
+
+# return the ID immediately greater than the specified one.
+# Note that this function does not care to handle 'seq' overflow
+# since it's a 64 bit value.
+proc streamNextID {id} {
+ lassign [split $id -] ms seq
+ incr seq
+ join [list $ms $seq] -
+}
+
+# Generate a random stream entry ID with the ms part between min and max
+# and a low sequence number (0 - 999 range), in order to stress test
+# XRANGE against a Tcl implementation implementing the same concept
+# with Tcl-only code in a linear array.
+proc streamRandomID {min_id max_id} {
+ lassign [split $min_id -] min_ms min_seq
+ lassign [split $max_id -] max_ms max_seq
+ set delta [expr {$max_ms-$min_ms+1}]
+ set ms [expr {$min_ms+[randomInt $delta]}]
+ set seq [randomInt 1000]
+ return $ms-$seq
+}
+
+# Tcl-side implementation of XRANGE to perform fuzz testing in the Redis
+# XRANGE implementation.
+proc streamSimulateXRANGE {items start end} {
+ set res {}
+ foreach i $items {
+ set this_id [lindex $i 0]
+ if {[streamCompareID $this_id $start] >= 0} {
+ if {[streamCompareID $this_id $end] <= 0} {
+ lappend res $i
+ }
+ }
+ }
+ return $res
+}
+
+set content {} ;# Will be populated with Tcl side copy of the stream content.
+
+start_server {
+ tags {"stream"}
+} {
+ test {XADD can add entries into a stream that XRANGE can fetch} {
+ r XADD mystream * item 1 value a
+ r XADD mystream * item 2 value b
+ assert_equal 2 [r XLEN mystream]
+ set items [r XRANGE mystream - +]
+ assert_equal [lindex $items 0 1] {item 1 value a}
+ assert_equal [lindex $items 1 1] {item 2 value b}
+ }
+
+ test {XADD IDs are incremental} {
+ set id1 [r XADD mystream * item 1 value a]
+ set id2 [r XADD mystream * item 2 value b]
+ set id3 [r XADD mystream * item 3 value c]
+ assert {[streamCompareID $id1 $id2] == -1}
+ assert {[streamCompareID $id2 $id3] == -1}
+ }
+
+ test {XADD IDs are incremental when ms is the same as well} {
+ r multi
+ r XADD mystream * item 1 value a
+ r XADD mystream * item 2 value b
+ r XADD mystream * item 3 value c
+ lassign [r exec] id1 id2 id3
+ assert {[streamCompareID $id1 $id2] == -1}
+ assert {[streamCompareID $id2 $id3] == -1}
+ }
+
+ test {XADD with MAXLEN option} {
+ r DEL mystream
+ for {set j 0} {$j < 1000} {incr j} {
+ if {rand() < 0.9} {
+ r XADD mystream MAXLEN 5 * xitem $j
+ } else {
+ r XADD mystream MAXLEN 5 * yitem $j
+ }
+ }
+ set res [r xrange mystream - +]
+ set expected 995
+ foreach r $res {
+ assert {[lindex $r 1 1] == $expected}
+ incr expected
+ }
+ }
+
+ test {XADD mass insertion and XLEN} {
+ r DEL mystream
+ r multi
+ for {set j 0} {$j < 10000} {incr j} {
+ # From time to time insert a field with a different set
+ # of fields in order to stress the stream compression code.
+ if {rand() < 0.9} {
+ r XADD mystream * item $j
+ } else {
+ r XADD mystream * item $j otherfield foo
+ }
+ }
+ r exec
+
+ set items [r XRANGE mystream - +]
+ for {set j 0} {$j < 10000} {incr j} {
+ assert {[lrange [lindex $items $j 1] 0 1] eq [list item $j]}
+ }
+ assert {[r xlen mystream] == $j}
+ }
+
+ test {XRANGE COUNT works as expected} {
+ assert {[llength [r xrange mystream - + COUNT 10]] == 10}
+ }
+
+ test {XREVRANGE COUNT works as expected} {
+ assert {[llength [r xrevrange mystream + - COUNT 10]] == 10}
+ }
+
+ test {XRANGE can be used to iterate the whole stream} {
+ set last_id "-"
+ set j 0
+ while 1 {
+ set elements [r xrange mystream $last_id + COUNT 100]
+ if {[llength $elements] == 0} break
+ foreach e $elements {
+ assert {[lrange [lindex $e 1] 0 1] eq [list item $j]}
+ incr j;
+ }
+ set last_id [streamNextID [lindex $elements end 0]]
+ }
+ assert {$j == 10000}
+ }
+
+ test {XREVRANGE returns the reverse of XRANGE} {
+ assert {[r xrange mystream - +] == [lreverse [r xrevrange mystream + -]]}
+ }
+
+ test {XREAD with non empty stream} {
+ set res [r XREAD COUNT 1 STREAMS mystream 0-0]
+ assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {item 0}}
+ }
+
+ test {Non blocking XREAD with empty streams} {
+ set res [r XREAD STREAMS s1 s2 0-0 0-0]
+ assert {$res eq {}}
+ }
+
+ test {XREAD with non empty second stream} {
+ set res [r XREAD COUNT 1 STREAMS nostream mystream 0-0 0-0]
+ assert {[lindex $res 0 0] eq {mystream}}
+ assert {[lrange [lindex $res 0 1 0 1] 0 1] eq {item 0}}
+ }
+
+ test {Blocking XREAD waiting new data} {
+ r XADD s2 * old abcd1234
+ set rd [redis_deferring_client]
+ $rd XREAD BLOCK 20000 STREAMS s1 s2 s3 $ $ $
+ r XADD s2 * new abcd1234
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s2}}
+ assert {[lindex $res 0 1 0 1] eq {new abcd1234}}
+ }
+
+ test {Blocking XREAD waiting old data} {
+ set rd [redis_deferring_client]
+ $rd XREAD BLOCK 20000 STREAMS s1 s2 s3 $ 0-0 $
+ r XADD s2 * foo abcd1234
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s2}}
+ assert {[lindex $res 0 1 0 1] eq {old abcd1234}}
+ }
+
+ test "XREAD: XADD + DEL should not awake client" {
+ set rd [redis_deferring_client]
+ r del s1
+ $rd XREAD BLOCK 20000 STREAMS s1 $
+ r multi
+ r XADD s1 * old abcd1234
+ r DEL s1
+ r exec
+ r XADD s1 * new abcd1234
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s1}}
+ assert {[lindex $res 0 1 0 1] eq {new abcd1234}}
+ }
+
+ test "XREAD: XADD + DEL + LPUSH should not awake client" {
+ set rd [redis_deferring_client]
+ r del s1
+ $rd XREAD BLOCK 20000 STREAMS s1 $
+ r multi
+ r XADD s1 * old abcd1234
+ r DEL s1
+ r LPUSH s1 foo bar
+ r exec
+ r DEL s1
+ r XADD s1 * new abcd1234
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s1}}
+ assert {[lindex $res 0 1 0 1] eq {new abcd1234}}
+ }
+
+ test {XREAD with same stream name multiple times should work} {
+ r XADD s2 * old abcd1234
+ set rd [redis_deferring_client]
+ $rd XREAD BLOCK 20000 STREAMS s2 s2 s2 $ $ $
+ r XADD s2 * new abcd1234
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s2}}
+ assert {[lindex $res 0 1 0 1] eq {new abcd1234}}
+ }
+
+ test {XREAD + multiple XADD inside transaction} {
+ r XADD s2 * old abcd1234
+ set rd [redis_deferring_client]
+ $rd XREAD BLOCK 20000 STREAMS s2 s2 s2 $ $ $
+ r MULTI
+ r XADD s2 * field one
+ r XADD s2 * field two
+ r XADD s2 * field three
+ r EXEC
+ set res [$rd read]
+ assert {[lindex $res 0 0] eq {s2}}
+ assert {[lindex $res 0 1 0 1] eq {field one}}
+ assert {[lindex $res 0 1 1 1] eq {field two}}
+ }
+
+ test {XDEL basic test} {
+ r del somestream
+ r xadd somestream * foo value0
+ set id [r xadd somestream * foo value1]
+ r xadd somestream * foo value2
+ r xdel somestream $id
+ assert {[r xlen somestream] == 2}
+ set result [r xrange somestream - +]
+ assert {[lindex $result 0 1 1] eq {value0}}
+ assert {[lindex $result 1 1 1] eq {value2}}
+ }
+
+ # Here the idea is to check the consistency of the stream data structure
+ # as we remove all the elements down to zero elements.
+ test {XDEL fuzz test} {
+ r del somestream
+ set ids {}
+ set x 0; # Length of the stream
+ while 1 {
+ lappend ids [r xadd somestream * item $x]
+ incr x
+ # Add enough elements to have a few radix tree nodes inside the stream.
+ if {[dict get [r xinfo stream somestream] radix-tree-keys] > 20} break
+ }
+
+ # Now remove all the elements till we reach an empty stream
+ # and after every deletion, check that the stream is sane enough
+ # to report the right number of elements with XRANGE: this will also
+ # force accessing the whole data structure to check sanity.
+ assert {[r xlen somestream] == $x}
+
+ # We want to remove elements in random order to really test the
+ # implementation in a better way.
+ set ids [lshuffle $ids]
+ foreach id $ids {
+ assert {[r xdel somestream $id] == 1}
+ incr x -1
+ assert {[r xlen somestream] == $x}
+ # The test would be too slow calling XRANGE for every iteration.
+ # Do it every 100 removal.
+ if {$x % 100 == 0} {
+ set res [r xrange somestream - +]
+ assert {[llength $res] == $x}
+ }
+ }
+ }
+
+ test {XRANGE fuzzing} {
+ set low_id [lindex $items 0 0]
+ set high_id [lindex $items end 0]
+ for {set j 0} {$j < 100} {incr j} {
+ set start [streamRandomID $low_id $high_id]
+ set end [streamRandomID $low_id $high_id]
+ set range [r xrange mystream $start $end]
+ set tcl_range [streamSimulateXRANGE $items $start $end]
+ if {$range ne $tcl_range} {
+ puts "*** WARNING *** - XRANGE fuzzing mismatch: $start - $end"
+ puts "---"
+ puts "XRANGE: '$range'"
+ puts "---"
+ puts "TCL: '$tcl_range'"
+ puts "---"
+ fail "XRANGE fuzzing failed, check logs for details"
+ }
+ }
+ }
+
+ test {XREVRANGE regression test for issue #5006} {
+ # Add non compressed entries
+ r xadd teststream 1234567891230 key1 value1
+ r xadd teststream 1234567891240 key2 value2
+ r xadd teststream 1234567891250 key3 value3
+
+ # Add SAMEFIELD compressed entries
+ r xadd teststream2 1234567891230 key1 value1
+ r xadd teststream2 1234567891240 key1 value2
+ r xadd teststream2 1234567891250 key1 value3
+
+ assert_equal [r xrevrange teststream 1234567891245 -] {{1234567891240-0 {key2 value2}} {1234567891230-0 {key1 value1}}}
+
+ assert_equal [r xrevrange teststream2 1234567891245 -] {{1234567891240-0 {key1 value2}} {1234567891230-0 {key1 value1}}}
+ }
+}
diff --git a/tests/unit/basic.tcl b/tests/unit/type/string.tcl
index b0b3b9bac..7122fd987 100644
--- a/tests/unit/basic.tcl
+++ b/tests/unit/type/string.tcl
@@ -1,9 +1,4 @@
-start_server {tags {"basic"}} {
- test {DEL all keys to start with a clean DB} {
- foreach key [r keys *] {r del $key}
- r dbsize
- } {0}
-
+start_server {tags {"string"}} {
test {SET and GET an item} {
r set x foobar
r get x
@@ -14,38 +9,6 @@ start_server {tags {"basic"}} {
r get x
} {}
- test {DEL against a single item} {
- r del x
- r get x
- } {}
-
- test {Vararg DEL} {
- r set foo1 a
- r set foo2 b
- r set foo3 c
- list [r del foo1 foo2 foo3 foo4] [r mget foo1 foo2 foo3]
- } {3 {{} {} {}}}
-
- test {KEYS with pattern} {
- foreach key {key_x key_y key_z foo_a foo_b foo_c} {
- r set $key hello
- }
- lsort [r keys foo*]
- } {foo_a foo_b foo_c}
-
- test {KEYS to get all keys} {
- lsort [r keys *]
- } {foo_a foo_b foo_c key_x key_y key_z}
-
- test {DBSIZE} {
- r dbsize
- } {6}
-
- test {DEL all keys} {
- foreach key [r keys *] {r del $key}
- r dbsize
- } {0}
-
test {Very big payload in GET/SET} {
set buf [string repeat "abcd" 1000000]
r set foo $buf
@@ -75,6 +38,7 @@ start_server {tags {"basic"}} {
} {}
test {SET 10000 numeric keys and access all them in reverse order} {
+ r flushdb
set err {}
for {set x 0} {$x < 10000} {incr x} {
r set $x $x
@@ -90,157 +54,11 @@ start_server {tags {"basic"}} {
set _ $err
} {}
- test {DBSIZE should be 10101 now} {
+ test {DBSIZE should be 10000 now} {
r dbsize
- } {10101}
- }
-
- test {INCR against non existing key} {
- set res {}
- append res [r incr novar]
- append res [r get novar]
- } {11}
-
- test {INCR against key created by incr itself} {
- r incr novar
- } {2}
-
- test {INCR against key originally set with SET} {
- r set novar 100
- r incr novar
- } {101}
-
- test {INCR over 32bit value} {
- r set novar 17179869184
- r incr novar
- } {17179869185}
-
- test {INCRBY over 32bit value with over 32bit increment} {
- r set novar 17179869184
- r incrby novar 17179869184
- } {34359738368}
-
- test {INCR fails against key with spaces (left)} {
- r set novar " 11"
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against key with spaces (right)} {
- r set novar "11 "
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against key with spaces (both)} {
- r set novar " 11 "
- catch {r incr novar} err
- format $err
- } {ERR*}
-
- test {INCR fails against a key holding a list} {
- r rpush mylist 1
- catch {r incr mylist} err
- r rpop mylist
- format $err
- } {WRONGTYPE*}
-
- test {DECRBY over 32bit value with over 32bit increment, negative res} {
- r set novar 17179869184
- r decrby novar 17179869185
- } {-1}
-
- test {INCR uses shared objects in the 0-9999 range} {
- r set foo -1
- r incr foo
- assert {[r object refcount foo] > 1}
- r set foo 9998
- r incr foo
- assert {[r object refcount foo] > 1}
- r incr foo
- assert {[r object refcount foo] == 1}
+ } {10000}
}
- test {INCR can modify objects in-place} {
- r set foo 20000
- r incr foo
- assert {[r object refcount foo] == 1}
- set old [lindex [split [r debug object foo]] 1]
- r incr foo
- set new [lindex [split [r debug object foo]] 1]
- assert {[string range $old 0 2] eq "at:"}
- assert {[string range $new 0 2] eq "at:"}
- assert {$old eq $new}
- }
-
- test {INCRBYFLOAT against non existing key} {
- r del novar
- list [roundFloat [r incrbyfloat novar 1]] \
- [roundFloat [r get novar]] \
- [roundFloat [r incrbyfloat novar 0.25]] \
- [roundFloat [r get novar]]
- } {1 1 1.25 1.25}
-
- test {INCRBYFLOAT against key originally set with SET} {
- r set novar 1.5
- roundFloat [r incrbyfloat novar 1.5]
- } {3}
-
- test {INCRBYFLOAT over 32bit value} {
- r set novar 17179869184
- r incrbyfloat novar 1.5
- } {17179869185.5}
-
- test {INCRBYFLOAT over 32bit value with over 32bit increment} {
- r set novar 17179869184
- r incrbyfloat novar 17179869184
- } {34359738368}
-
- test {INCRBYFLOAT fails against key with spaces (left)} {
- set err {}
- r set novar " 11"
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against key with spaces (right)} {
- set err {}
- r set novar "11 "
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against key with spaces (both)} {
- set err {}
- r set novar " 11 "
- catch {r incrbyfloat novar 1.0} err
- format $err
- } {ERR*valid*}
-
- test {INCRBYFLOAT fails against a key holding a list} {
- r del mylist
- set err {}
- r rpush mylist 1
- catch {r incrbyfloat mylist 1.0} err
- r del mylist
- format $err
- } {WRONGTYPE*}
-
- test {INCRBYFLOAT does not allow NaN or Infinity} {
- r set foo 0
- set err {}
- catch {r incrbyfloat foo +inf} err
- set err
- # p.s. no way I can force NaN to test it from the API because
- # there is no way to increment / decrement by infinity nor to
- # perform divisions.
- } {ERR*would produce*}
-
- test {INCRBYFLOAT decrement} {
- r set foo 1
- roundFloat [r incrbyfloat foo -1.1]
- } {-0.1}
-
test "SETNX target key missing" {
r del novar
assert_equal 1 [r setnx novar foobared]
@@ -284,172 +102,6 @@ start_server {tags {"basic"}} {
assert_equal 20 [r get x]
}
- test "DEL against expired key" {
- r debug set-active-expire 0
- r setex keyExpire 1 valExpire
- after 1100
- assert_equal 0 [r del keyExpire]
- r debug set-active-expire 1
- }
-
- test {EXISTS} {
- set res {}
- r set newkey test
- append res [r exists newkey]
- r del newkey
- append res [r exists newkey]
- } {10}
-
- test {Zero length value in key. SET/GET/EXISTS} {
- r set emptykey {}
- set res [r get emptykey]
- append res [r exists emptykey]
- r del emptykey
- append res [r exists emptykey]
- } {10}
-
- test {Commands pipelining} {
- set fd [r channel]
- puts -nonewline $fd "SET k1 xyzk\r\nGET k1\r\nPING\r\n"
- flush $fd
- set res {}
- append res [string match OK* [r read]]
- append res [r read]
- append res [string match PONG* [r read]]
- format $res
- } {1xyzk1}
-
- test {Non existing command} {
- catch {r foobaredcommand} err
- string match ERR* $err
- } {1}
-
- test {RENAME basic usage} {
- r set mykey hello
- r rename mykey mykey1
- r rename mykey1 mykey2
- r get mykey2
- } {hello}
-
- test {RENAME source key should no longer exist} {
- r exists mykey
- } {0}
-
- test {RENAME against already existing key} {
- r set mykey a
- r set mykey2 b
- r rename mykey2 mykey
- set res [r get mykey]
- append res [r exists mykey2]
- } {b0}
-
- test {RENAMENX basic usage} {
- r del mykey
- r del mykey2
- r set mykey foobar
- r renamenx mykey mykey2
- set res [r get mykey2]
- append res [r exists mykey]
- } {foobar0}
-
- test {RENAMENX against already existing key} {
- r set mykey foo
- r set mykey2 bar
- r renamenx mykey mykey2
- } {0}
-
- test {RENAMENX against already existing key (2)} {
- set res [r get mykey]
- append res [r get mykey2]
- } {foobar}
-
- test {RENAME against non existing source key} {
- catch {r rename nokey foobar} err
- format $err
- } {ERR*}
-
- test {RENAME where source and dest key is the same} {
- catch {r rename mykey mykey} err
- format $err
- } {ERR*}
-
- test {RENAME with volatile key, should move the TTL as well} {
- r del mykey mykey2
- r set mykey foo
- r expire mykey 100
- assert {[r ttl mykey] > 95 && [r ttl mykey] <= 100}
- r rename mykey mykey2
- assert {[r ttl mykey2] > 95 && [r ttl mykey2] <= 100}
- }
-
- test {RENAME with volatile key, should not inherit TTL of target key} {
- r del mykey mykey2
- r set mykey foo
- r set mykey2 bar
- r expire mykey2 100
- assert {[r ttl mykey] == -1 && [r ttl mykey2] > 0}
- r rename mykey mykey2
- r ttl mykey2
- } {-1}
-
- test {DEL all keys again (DB 0)} {
- foreach key [r keys *] {
- r del $key
- }
- r dbsize
- } {0}
-
- test {DEL all keys again (DB 1)} {
- r select 10
- foreach key [r keys *] {
- r del $key
- }
- set res [r dbsize]
- r select 9
- format $res
- } {0}
-
- test {MOVE basic usage} {
- r set mykey foobar
- r move mykey 10
- set res {}
- lappend res [r exists mykey]
- lappend res [r dbsize]
- r select 10
- lappend res [r get mykey]
- lappend res [r dbsize]
- r select 9
- format $res
- } [list 0 0 foobar 1]
-
- test {MOVE against key existing in the target DB} {
- r set mykey hello
- r move mykey 10
- } {0}
-
- test {MOVE against non-integer DB (#1428)} {
- r set mykey hello
- catch {r move mykey notanumber} e
- set e
- } {*ERR*index out of range}
-
- test {SET/GET keys in different DBs} {
- r set a hello
- r set b world
- r select 10
- r set a foo
- r set b bared
- r select 9
- set res {}
- lappend res [r get a]
- lappend res [r get b]
- r select 10
- lappend res [r get a]
- lappend res [r get b]
- r select 9
- format $res
- } {hello world foo bared}
-
test {MGET} {
r flushdb
r set foo BAR
@@ -467,37 +119,8 @@ start_server {tags {"basic"}} {
r mget foo baazz bar myset
} {BAR {} FOO {}}
- test {RANDOMKEY} {
- r flushdb
- r set foo x
- r set bar y
- set foo_seen 0
- set bar_seen 0
- for {set i 0} {$i < 100} {incr i} {
- set rkey [r randomkey]
- if {$rkey eq {foo}} {
- set foo_seen 1
- }
- if {$rkey eq {bar}} {
- set bar_seen 1
- }
- }
- list $foo_seen $bar_seen
- } {1 1}
-
- test {RANDOMKEY against empty DB} {
- r flushdb
- r randomkey
- } {}
-
- test {RANDOMKEY regression 1} {
- r flushdb
- r set x 10
- r del x
- r randomkey
- } {}
-
test {GETSET (set new value)} {
+ r del foo
list [r getset foo xyz] [r get foo]
} {{} xyz}
@@ -792,13 +415,6 @@ start_server {tags {"basic"}} {
assert {$ttl <= 10 && $ttl > 5}
}
- test {KEYS * two times with long key, Github issue #1208} {
- r flushdb
- r set dlskeriewrioeuwqoirueioqwrueoqwrueqw test
- r keys *
- r keys *
- } {dlskeriewrioeuwqoirueioqwrueoqwrueqw}
-
test {GETRANGE with huge ranges, Github issue #1844} {
r set foo bar
r getrange foo 0 4294967297
diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl
index 238eebb9d..cf54ae839 100644
--- a/tests/unit/type/zset.tcl
+++ b/tests/unit/type/zset.tcl
@@ -43,6 +43,84 @@ start_server {tags {"zset"}} {
assert_error "*not*float*" {r zadd myzset nan abc}
}
+ test "ZADD with options syntax error with incomplete pair" {
+ r del ztmp
+ catch {r zadd ztmp xx 10 x 20} err
+ set err
+ } {ERR*}
+
+ test "ZADD XX option without key - $encoding" {
+ r del ztmp
+ assert {[r zadd ztmp xx 10 x] == 0}
+ assert {[r type ztmp] eq {none}}
+ }
+
+ test "ZADD XX existing key - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 x
+ assert {[r zadd ztmp xx 20 y] == 0}
+ assert {[r zcard ztmp] == 1}
+ }
+
+ test "ZADD XX returns the number of elements actually added" {
+ r del ztmp
+ r zadd ztmp 10 x
+ set retval [r zadd ztmp 10 x 20 y 30 z]
+ assert {$retval == 2}
+ }
+
+ test "ZADD XX updates existing elements score" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp xx 5 foo 11 x 21 y 40 zap
+ assert {[r zcard ztmp] == 3}
+ assert {[r zscore ztmp x] == 11}
+ assert {[r zscore ztmp y] == 21}
+ }
+
+ test "ZADD XX and NX are not compatible" {
+ r del ztmp
+ catch {r zadd ztmp xx nx 10 x} err
+ set err
+ } {ERR*}
+
+ test "ZADD NX with non existing key" {
+ r del ztmp
+ r zadd ztmp nx 10 x 20 y 30 z
+ assert {[r zcard ztmp] == 3}
+ }
+
+ test "ZADD NX only add new elements without updating old ones" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp nx 11 x 21 y 100 a 200 b] == 2}
+ assert {[r zscore ztmp x] == 10}
+ assert {[r zscore ztmp y] == 20}
+ assert {[r zscore ztmp a] == 100}
+ assert {[r zscore ztmp b] == 200}
+ }
+
+ test "ZADD INCR works like ZINCRBY" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ r zadd ztmp INCR 15 x
+ assert {[r zscore ztmp x] == 25}
+ }
+
+ test "ZADD INCR works with a single score-elemenet pair" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ catch {r zadd ztmp INCR 15 x 10 y} err
+ set err
+ } {ERR*}
+
+ test "ZADD CH option changes return value to all changed elements" {
+ r del ztmp
+ r zadd ztmp 10 x 20 y 30 z
+ assert {[r zadd ztmp 11 x 21 y 30 z] == 0}
+ assert {[r zadd ztmp ch 12 x 22 y 30 z] == 2}
+ }
+
test "ZINCRBY calls leading to NaN result in error" {
r zincrby myzset +inf abc
assert_error "*NaN*" {r zincrby myzset -inf abc}
@@ -77,6 +155,8 @@ start_server {tags {"zset"}} {
}
test "ZCARD basics - $encoding" {
+ r del ztmp
+ r zadd ztmp 10 a 20 b 30 c
assert_equal 3 [r zcard ztmp]
assert_equal 0 [r zcard zdoesntexist]
}
@@ -210,6 +290,12 @@ start_server {tags {"zset"}} {
assert_equal 6 [r zscore zset bar]
}
+ test "ZINCRBY return value" {
+ r del ztmp
+ set retval [r zincrby ztmp 1.0 x]
+ assert {$retval == 1.0}
+ }
+
proc create_default_zset {} {
create_zset zset {-inf a 1 b 2 c 3 d 4 e 5 f +inf g}
}
@@ -562,6 +648,75 @@ start_server {tags {"zset"}} {
}
}
}
+
+ test "Basic ZPOP with a single key - $encoding" {
+ r del zset
+ assert_equal {} [r zpopmin zset]
+ create_zset zset {-1 a 1 b 2 c 3 d 4 e}
+ assert_equal {a -1} [r zpopmin zset]
+ assert_equal {b 1} [r zpopmin zset]
+ assert_equal {e 4} [r zpopmax zset]
+ assert_equal {d 3} [r zpopmax zset]
+ assert_equal {c 2} [r zpopmin zset]
+ assert_equal 0 [r exists zset]
+ r set foo bar
+ assert_error "*WRONGTYPE*" {r zpopmin foo}
+ }
+
+ test "ZPOP with count - $encoding" {
+ r del z1 z2 z3 foo
+ r set foo bar
+ assert_equal {} [r zpopmin z1 2]
+ assert_error "*WRONGTYPE*" {r zpopmin foo 2}
+ create_zset z1 {0 a 1 b 2 c 3 d}
+ assert_equal {a 0 b 1} [r zpopmin z1 2]
+ assert_equal {d 3 c 2} [r zpopmax z1 2]
+ }
+
+ test "BZPOP with a single existing sorted set - $encoding" {
+ set rd [redis_deferring_client]
+ create_zset zset {0 a 1 b 2 c}
+
+ $rd bzpopmin zset 5
+ assert_equal {zset a 0} [$rd read]
+ $rd bzpopmin zset 5
+ assert_equal {zset b 1} [$rd read]
+ $rd bzpopmax zset 5
+ assert_equal {zset c 2} [$rd read]
+ assert_equal 0 [r exists zset]
+ }
+
+ test "BZPOP with multiple existing sorted sets - $encoding" {
+ set rd [redis_deferring_client]
+ create_zset z1 {0 a 1 b 2 c}
+ create_zset z2 {3 d 4 e 5 f}
+
+ $rd bzpopmin z1 z2 5
+ assert_equal {z1 a 0} [$rd read]
+ $rd bzpopmax z1 z2 5
+ assert_equal {z1 c 2} [$rd read]
+ assert_equal 1 [r zcard z1]
+ assert_equal 3 [r zcard z2]
+
+ $rd bzpopmax z2 z1 5
+ assert_equal {z2 f 5} [$rd read]
+ $rd bzpopmin z2 z1 5
+ assert_equal {z2 d 3} [$rd read]
+ assert_equal 1 [r zcard z1]
+ assert_equal 1 [r zcard z2]
+ }
+
+ test "BZPOP second sorted set has members - $encoding" {
+ set rd [redis_deferring_client]
+ r del z1
+ create_zset z2 {3 d 4 e 5 f}
+ $rd bzpopmax z1 z2 5
+ assert_equal {z2 f 5} [$rd read]
+ $rd bzpopmin z2 z1 5
+ assert_equal {z2 d 3} [$rd read]
+ assert_equal 0 [r zcard z1]
+ assert_equal 1 [r zcard z2]
+ }
}
basics ziplist
@@ -610,6 +765,10 @@ start_server {tags {"zset"}} {
}
}
+ test "ZSET commands don't accept the empty strings as valid score" {
+ assert_error "*not*float*" {r zadd myzset "" abc}
+ }
+
proc stressers {encoding} {
if {$encoding == "ziplist"} {
# Little extra to allow proper fuzzing in the sorting stresser
@@ -935,10 +1094,121 @@ start_server {tags {"zset"}} {
}
assert_equal {} $err
}
+
+ test "BZPOPMIN, ZADD + DEL should not awake blocked client" {
+ set rd [redis_deferring_client]
+ r del zset
+
+ $rd bzpopmin zset 0
+ r multi
+ r zadd zset 0 foo
+ r del zset
+ r exec
+ r del zset
+ r zadd zset 1 bar
+ $rd read
+ } {zset bar 1}
+
+ test "BZPOPMIN, ZADD + DEL + SET should not awake blocked client" {
+ set rd [redis_deferring_client]
+ r del list
+
+ r del zset
+
+ $rd bzpopmin zset 0
+ r multi
+ r zadd zset 0 foo
+ r del zset
+ r set zset foo
+ r exec
+ r del zset
+ r zadd zset 1 bar
+ $rd read
+ } {zset bar 1}
+
+ test "BZPOPMIN with same key multiple times should work" {
+ set rd [redis_deferring_client]
+ r del z1 z2
+
+ # Data arriving after the BZPOPMIN.
+ $rd bzpopmin z1 z2 z2 z1 0
+ r zadd z1 0 a
+ assert_equal [$rd read] {z1 a 0}
+ $rd bzpopmin z1 z2 z2 z1 0
+ r zadd z2 1 b
+ assert_equal [$rd read] {z2 b 1}
+
+ # Data already there.
+ r zadd z1 0 a
+ r zadd z2 1 b
+ $rd bzpopmin z1 z2 z2 z1 0
+ assert_equal [$rd read] {z1 a 0}
+ $rd bzpopmin z1 z2 z2 z1 0
+ assert_equal [$rd read] {z2 b 1}
+ }
+
+ test "MULTI/EXEC is isolated from the point of view of BZPOPMIN" {
+ set rd [redis_deferring_client]
+ r del zset
+ $rd bzpopmin zset 0
+ r multi
+ r zadd zset 0 a
+ r zadd zset 1 b
+ r zadd zset 2 c
+ r exec
+ $rd read
+ } {zset a 0}
+
+ test "BZPOPMIN with variadic ZADD" {
+ set rd [redis_deferring_client]
+ r del zset
+ if {$::valgrind} {after 100}
+ $rd bzpopmin zset 0
+ if {$::valgrind} {after 100}
+ assert_equal 2 [r zadd zset -1 foo 1 bar]
+ if {$::valgrind} {after 100}
+ assert_equal {zset foo -1} [$rd read]
+ assert_equal {bar} [r zrange zset 0 -1]
+ }
+
+ test "BZPOPMIN with zero timeout should block indefinitely" {
+ set rd [redis_deferring_client]
+ r del zset
+ $rd bzpopmin zset 0
+ after 1000
+ r zadd zset 0 foo
+ assert_equal {zset foo 0} [$rd read]
+ }
}
tags {"slow"} {
stressers ziplist
stressers skiplist
}
+
+ test {ZSET skiplist order consistency when elements are moved} {
+ set original_max [lindex [r config get zset-max-ziplist-entries] 1]
+ r config set zset-max-ziplist-entries 0
+ for {set times 0} {$times < 10} {incr times} {
+ r del zset
+ for {set j 0} {$j < 1000} {incr j} {
+ r zadd zset [randomInt 50] ele-[randomInt 10]
+ }
+
+ # Make sure that element ordering is correct
+ set prev_element {}
+ set prev_score -1
+ foreach {element score} [r zrange zset 0 -1 WITHSCORES] {
+ # Assert that elements are in increasing ordering
+ assert {
+ $prev_score < $score ||
+ ($prev_score == $score &&
+ [string compare $prev_element $element] == -1)
+ }
+ set prev_element $element
+ set prev_score $score
+ }
+ }
+ r config set zset-max-ziplist-entries $original_max
+ }
}
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
new file mode 100644
index 000000000..e2f5d2942
--- /dev/null
+++ b/tests/unit/wait.tcl
@@ -0,0 +1,42 @@
+start_server {tags {"wait"}} {
+start_server {} {
+ set slave [srv 0 client]
+ set slave_host [srv 0 host]
+ set slave_port [srv 0 port]
+ set master [srv -1 client]
+ set master_host [srv -1 host]
+ set master_port [srv -1 port]
+
+ test {Setup slave} {
+ $slave slaveof $master_host $master_port
+ wait_for_condition 50 100 {
+ [s 0 master_link_status] eq {up}
+ } else {
+ fail "Replication not started."
+ }
+ }
+
+ test {WAIT should acknowledge 1 additional copy of the data} {
+ $master set foo 0
+ $master incr foo
+ $master incr foo
+ $master incr foo
+ assert {[$master wait 1 5000] == 1}
+ assert {[$slave get foo] == 3}
+ }
+
+ test {WAIT should not acknowledge 2 additional copies of the data} {
+ $master incr foo
+ assert {[$master wait 2 1000] <= 1}
+ }
+
+ test {WAIT should not acknowledge 1 additional copy if slave is blocked} {
+ exec src/redis-cli -h $slave_host -p $slave_port debug sleep 5 > /dev/null 2> /dev/null &
+ after 1000 ;# Give redis-cli the time to execute the command.
+ $master set foo 0
+ $master incr foo
+ $master incr foo
+ $master incr foo
+ assert {[$master wait 1 3000] == 0}
+ }
+}}
diff --git a/utils/cluster_fail_time.tcl b/utils/cluster_fail_time.tcl
new file mode 100644
index 000000000..87399495f
--- /dev/null
+++ b/utils/cluster_fail_time.tcl
@@ -0,0 +1,50 @@
+# This simple script is used in order to estimate the average PFAIL->FAIL
+# state switch after a failure.
+
+set ::sleep_time 10 ; # How much to sleep to trigger PFAIL.
+set ::fail_port 30016 ; # Node to put in sleep.
+set ::other_port 30001 ; # Node to use to monitor the flag switch.
+
+proc avg vector {
+ set sum 0.0
+ foreach x $vector {
+ set sum [expr {$sum+$x}]
+ }
+ expr {$sum/[llength $vector]}
+}
+
+set samples {}
+while 1 {
+ exec redis-cli -p $::fail_port debug sleep $::sleep_time > /dev/null &
+
+ # Wait for fail? to appear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {[string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL?"
+ set start [clock milliseconds]
+
+ # Wait for fail? to disappear.
+ while 1 {
+ set output [exec redis-cli -p $::other_port cluster nodes]
+ if {![string match {*fail\?*} $output]} break
+ after 100
+ }
+
+ puts "FAIL"
+ set now [clock milliseconds]
+ set elapsed [expr {$now-$start}]
+ puts $elapsed
+ lappend samples $elapsed
+
+ puts "AVG([llength $samples]): [avg $samples]"
+
+ # Wait for the instance to be available again.
+ exec redis-cli -p $::fail_port ping
+
+ # Wait for the fail flag to be cleared.
+ after 2000
+}
diff --git a/utils/corrupt_rdb.c b/utils/corrupt_rdb.c
new file mode 100644
index 000000000..7ba9caeee
--- /dev/null
+++ b/utils/corrupt_rdb.c
@@ -0,0 +1,44 @@
+/* Trivia program to corrupt an RDB file in order to check the RDB check
+ * program behavior and effectiveness.
+ *
+ * Copyright (C) 2016 Salvatore Sanfilippo.
+ * This software is released in the 3-clause BSD license. */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+
+int main(int argc, char **argv) {
+ struct stat stat;
+ int fd, cycles;
+
+ if (argc != 3) {
+ fprintf(stderr,"Usage: <filename> <cycles>\n");
+ exit(1);
+ }
+
+ srand(time(NULL));
+ cycles = atoi(argv[2]);
+ fd = open("dump.rdb",O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+ fstat(fd,&stat);
+
+ while(cycles--) {
+ unsigned char buf[32];
+ unsigned long offset = rand()%stat.st_size;
+ int writelen = 1+rand()%31;
+ int j;
+
+ for (j = 0; j < writelen; j++) buf[j] = (char)rand();
+ lseek(fd,offset,SEEK_SET);
+ printf("Writing %d bytes at offset %lu\n", writelen, offset);
+ write(fd,buf,writelen);
+ }
+ return 0;
+}
diff --git a/utils/create-cluster/.gitignore b/utils/create-cluster/.gitignore
new file mode 100644
index 000000000..2988ee919
--- /dev/null
+++ b/utils/create-cluster/.gitignore
@@ -0,0 +1,5 @@
+config.sh
+*.rdb
+*.aof
+*.conf
+*.log
diff --git a/utils/create-cluster/README b/utils/create-cluster/README
new file mode 100644
index 000000000..e682f6dc9
--- /dev/null
+++ b/utils/create-cluster/README
@@ -0,0 +1,27 @@
+Create-custer is a small script used to easily start a big number of Redis
+instances configured to run in cluster mode. Its main goal is to allow manual
+testing in a condition which is not easy to replicate with the Redis cluster
+unit tests, for example when a lot of instances are needed in order to trigger
+a given bug.
+
+The tool can also be used just to easily create a number of instances in a
+Redis Cluster in order to experiment a bit with the system.
+
+USAGE
+---
+
+To create a cluster, follow these steps:
+
+1. Edit create-cluster and change the start / end port, depending on the
+number of instances you want to create.
+2. Use "./create-cluster start" in order to run the instances.
+3. Use "./create-cluster create" in order to execute redis-cli --cluster create, so that
+an actual Redis cluster will be created.
+4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory.
+
+In order to stop a cluster:
+
+1. Use "./create-cluster stop" to stop all the instances. After you stopped the instances you can use "./create-cluster start" to restart them if you change your mind.
+2. Use "./create-cluster clean" to remove all the AOF / log files to restart with a clean environment.
+
+Use the command "./create-cluster help" to get the full list of features.
diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster
new file mode 100755
index 000000000..468f924a4
--- /dev/null
+++ b/utils/create-cluster/create-cluster
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Settings
+PORT=30000
+TIMEOUT=2000
+NODES=6
+REPLICAS=1
+
+# You may want to put the above config parameters into config.sh in order to
+# override the defaults without modifying this script.
+
+if [ -a config.sh ]
+then
+ source "config.sh"
+fi
+
+# Computed vars
+ENDPORT=$((PORT+NODES))
+
+if [ "$1" == "start" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Starting $PORT"
+ ../../src/redis-server --port $PORT --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes
+ done
+ exit 0
+fi
+
+if [ "$1" == "create" ]
+then
+ HOSTS=""
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ HOSTS="$HOSTS 127.0.0.1:$PORT"
+ done
+ ../../src/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS
+ exit 0
+fi
+
+if [ "$1" == "stop" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ echo "Stopping $PORT"
+ ../../src/redis-cli -p $PORT shutdown nosave
+ done
+ exit 0
+fi
+
+if [ "$1" == "watch" ]
+then
+ PORT=$((PORT+1))
+ while [ 1 ]; do
+ clear
+ date
+ ../../src/redis-cli -p $PORT cluster nodes | head -30
+ sleep 1
+ done
+ exit 0
+fi
+
+if [ "$1" == "tail" ]
+then
+ INSTANCE=$2
+ PORT=$((PORT+INSTANCE))
+ tail -f ${PORT}.log
+ exit 0
+fi
+
+if [ "$1" == "call" ]
+then
+ while [ $((PORT < ENDPORT)) != "0" ]; do
+ PORT=$((PORT+1))
+ ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9
+ done
+ exit 0
+fi
+
+if [ "$1" == "clean" ]
+then
+ rm -rf *.log
+ rm -rf appendonly*.aof
+ rm -rf dump*.rdb
+ rm -rf nodes*.conf
+ exit 0
+fi
+
+if [ "$1" == "clean-logs" ]
+then
+ rm -rf *.log
+ exit 0
+fi
+
+echo "Usage: $0 [start|create|stop|watch|tail|clean]"
+echo "start -- Launch Redis Cluster instances."
+echo "create -- Create a cluster using redis-cli --cluster create."
+echo "stop -- Stop Redis Cluster instances."
+echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node."
+echo "tail <id> -- Run tail -f of instance at base port + ID."
+echo "clean -- Remove all instances data, logs, configs."
+echo "clean-logs -- Remove just instances logs."
diff --git a/utils/generate-command-help.rb b/utils/generate-command-help.rb
index 068953198..29acef69d 100755
--- a/utils/generate-command-help.rb
+++ b/utils/generate-command-help.rb
@@ -12,7 +12,10 @@ GROUPS = [
"connection",
"server",
"scripting",
- "hyperloglog"
+ "hyperloglog",
+ "cluster",
+ "geo",
+ "stream"
].freeze
GROUPS_BY_NAME = Hash[*
diff --git a/utils/graphs/commits-over-time/README.md b/utils/graphs/commits-over-time/README.md
new file mode 100644
index 000000000..b28019ecc
--- /dev/null
+++ b/utils/graphs/commits-over-time/README.md
@@ -0,0 +1,16 @@
+This Tcl script is what I used in order to generate the graph you
+can find at http://antirez.com/news/98. It's really quick & dirty, more
+a trow away program than anything else, but probably could be reused or
+modified in the future in order to visualize other similar data or an
+updated version of the same data.
+
+The usage is trivial:
+
+ ./genhtml.tcl > output.html
+
+The generated HTML is quite broken but good enough to grab a screenshot
+from the browser. Feel free to improve it if you got time / interest.
+
+Note that the code filtering the tags, and the hardcoded branch name, does
+not make the script, as it is, able to analyze a different repository.
+However the changes needed are trivial.
diff --git a/utils/graphs/commits-over-time/genhtml.tcl b/utils/graphs/commits-over-time/genhtml.tcl
new file mode 100755
index 000000000..c4b4e0989
--- /dev/null
+++ b/utils/graphs/commits-over-time/genhtml.tcl
@@ -0,0 +1,96 @@
+#!/usr/bin/env tclsh
+
+# Load commits history as "sha1 unixtime".
+set commits [exec git log unstable {--pretty="%H %at"}]
+set raw_tags [exec git tag]
+
+# Load all the tags that are about stable releases.
+foreach tag $raw_tags {
+ if {[string match v*-stable $tag]} {
+ set tag [string range $tag 1 end-7]
+ puts $tag
+ }
+ if {[regexp {^[0-9]+.[0-9]+.[0-9]+$} $tag]} {
+ lappend tags $tag
+ }
+}
+
+# For each tag, create a list of "name unixtime"
+foreach tag $tags {
+ set taginfo [exec git log $tag -n 1 "--pretty=\"$tag %at\""]
+ set taginfo [string trim $taginfo {"}]
+ lappend labels $taginfo
+}
+
+# For each commit, check the amount of code changed and create an array
+# mapping the commit to the number of lines affected.
+foreach c $commits {
+ set stat [exec git show --oneline --numstat [lindex $c 0]]
+ set linenum 0
+ set affected 0
+ foreach line [split $stat "\n"] {
+ incr linenum
+ if {$linenum == 1 || [string match *deps/* $line]} continue
+ if {[catch {llength $line} numfields]} continue
+ if {$numfields == 0} continue
+ catch {
+ incr affected [lindex $line 0]
+ incr affected [lindex $line 1]
+ }
+ }
+ set commit_to_affected([lindex $c 0]) $affected
+}
+
+set base_time [lindex [lindex $commits end] 1]
+puts [clock format $base_time]
+
+# Generate a graph made of HTML DIVs.
+puts {<html>
+<style>
+.box {
+ position:absolute;
+ width:10px;
+ height:5px;
+ border:1px black solid;
+ background-color:#44aa33;
+ opacity: 0.04;
+}
+.label {
+ position:absolute;
+ background-color:#dddddd;
+ font-family:helvetica;
+ font-size:12px;
+ padding:2px;
+ color:#666;
+ border:1px #aaa solid;
+ border-radius: 5px;
+}
+#outer {
+ position:relative;
+ width:1500;
+ height:500;
+ border:1px #aaa solid;
+}
+</style>
+<div id="outer">
+}
+foreach c $commits {
+ set sha [lindex $c 0]
+ set t [expr {([lindex $c 1]-$base_time)/(3600*24*2)}]
+ set affected [expr $commit_to_affected($sha)]
+ set left $t
+ set height [expr {log($affected)*20}]
+ puts "<div class=\"box\" style=\"left:$left; bottom:0; height:$height\"></div>"
+}
+
+set bottom -30
+foreach l $labels {
+ set name [lindex $l 0]
+ set t [expr {([lindex $l 1]-$base_time)/(3600*24*2)}]
+ set left $t
+ if {$left < 0} continue
+ incr bottom -20
+ if {$bottom == -210} {set bottom -30}
+ puts "<div class=\"label\" style=\"left:$left; bottom:$bottom\">$name</div>"
+}
+puts {</div></html>}
diff --git a/utils/hashtable/README b/utils/hashtable/README
new file mode 100644
index 000000000..87a76c9a5
--- /dev/null
+++ b/utils/hashtable/README
@@ -0,0 +1,13 @@
+Hash table implementation related utilities.
+
+rehashing.c
+---
+
+Visually show buckets in the two hash tables between rehashings. Also stress
+test getRandomKeys() implementation, that may actually disappear from
+Redis soon, however visualization some code is reusable in new bugs
+investigation.
+
+Compile with:
+
+ cc -I ../../src/ rehashing.c ../../src/zmalloc.c ../../src/dict.c -o rehashing_test
diff --git a/utils/hashtable/rehashing.c b/utils/hashtable/rehashing.c
new file mode 100644
index 000000000..b57a9043a
--- /dev/null
+++ b/utils/hashtable/rehashing.c
@@ -0,0 +1,142 @@
+#include "redis.h"
+#include "dict.h"
+
+void _redisAssert(char *x, char *y, int l) {
+ printf("ASSERT: %s %s %d\n",x,y,l);
+ exit(1);
+}
+
+unsigned int dictKeyHash(const void *keyp) {
+ unsigned long key = (unsigned long)keyp;
+ key = dictGenHashFunction(&key,sizeof(key));
+ key += ~(key << 15);
+ key ^= (key >> 10);
+ key += (key << 3);
+ key ^= (key >> 6);
+ key += ~(key << 11);
+ key ^= (key >> 16);
+ return key;
+}
+
+int dictKeyCompare(void *privdata, const void *key1, const void *key2) {
+ unsigned long k1 = (unsigned long)key1;
+ unsigned long k2 = (unsigned long)key2;
+ return k1 == k2;
+}
+
+dictType dictTypeTest = {
+ dictKeyHash, /* hash function */
+ NULL, /* key dup */
+ NULL, /* val dup */
+ dictKeyCompare, /* key compare */
+ NULL, /* key destructor */
+ NULL /* val destructor */
+};
+
+void showBuckets(dictht ht) {
+ if (ht.table == NULL) {
+ printf("NULL\n");
+ } else {
+ int j;
+ for (j = 0; j < ht.size; j++) {
+ printf("%c", ht.table[j] ? '1' : '0');
+ }
+ printf("\n");
+ }
+}
+
+void show(dict *d) {
+ int j;
+ if (d->rehashidx != -1) {
+ printf("rhidx: ");
+ for (j = 0; j < d->rehashidx; j++)
+ printf(".");
+ printf("|\n");
+ }
+ printf("ht[0]: ");
+ showBuckets(d->ht[0]);
+ printf("ht[1]: ");
+ showBuckets(d->ht[1]);
+ printf("\n");
+}
+
+int sortPointers(const void *a, const void *b) {
+ unsigned long la, lb;
+
+ la = (long) (*((dictEntry**)a));
+ lb = (long) (*((dictEntry**)b));
+ return la-lb;
+}
+
+void stressGetKeys(dict *d, int times, int *perfect_run, int *approx_run) {
+ int j;
+
+ dictEntry **des = zmalloc(sizeof(dictEntry*)*dictSize(d));
+ for (j = 0; j < times; j++) {
+ int requested = rand() % (dictSize(d)+1);
+ int returned = dictGetSomeKeys(d, des, requested);
+ int dup = 0;
+
+ qsort(des,returned,sizeof(dictEntry*),sortPointers);
+ if (returned > 1) {
+ int i;
+ for (i = 0; i < returned-1; i++) {
+ if (des[i] == des[i+1]) dup++;
+ }
+ }
+
+ if (requested == returned && dup == 0) {
+ (*perfect_run)++;
+ } else {
+ (*approx_run)++;
+ printf("Requested, returned, duplicated: %d %d %d\n",
+ requested, returned, dup);
+ }
+ }
+ zfree(des);
+}
+
+#define MAX1 120
+#define MAX2 1000
+int main(void) {
+ dict *d = dictCreate(&dictTypeTest,NULL);
+ unsigned long i;
+ srand(time(NULL));
+
+ for (i = 0; i < MAX1; i++) {
+ dictAdd(d,(void*)i,NULL);
+ show(d);
+ }
+ printf("Size: %d\n", (int)dictSize(d));
+
+ for (i = 0; i < MAX1; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ show(d);
+ }
+ dictRelease(d);
+
+ d = dictCreate(&dictTypeTest,NULL);
+
+ printf("Stress testing dictGetSomeKeys\n");
+ int perfect_run = 0, approx_run = 0;
+
+ for (i = 0; i < MAX2; i++) {
+ dictAdd(d,(void*)i,NULL);
+ stressGetKeys(d,100,&perfect_run,&approx_run);
+ }
+
+ for (i = 0; i < MAX2; i++) {
+ dictDelete(d,(void*)i);
+ dictResize(d);
+ stressGetKeys(d,100,&perfect_run,&approx_run);
+ }
+
+ printf("dictGetSomeKey, %d perfect runs, %d approximated runs\n",
+ perfect_run, approx_run);
+
+ dictRelease(d);
+
+ printf("TEST PASSED!\n");
+ return 0;
+}
diff --git a/utils/hyperloglog/hll-err.rb b/utils/hyperloglog/hll-err.rb
index 75bb8e424..2c71ac5ef 100644
--- a/utils/hyperloglog/hll-err.rb
+++ b/utils/hyperloglog/hll-err.rb
@@ -18,7 +18,7 @@ while true do
elements << ele
i += 1
}
- r.pfadd('hll',*elements)
+ r.pfadd('hll',elements)
}
approx = r.pfcount('hll')
abs_err = (approx-i).abs
diff --git a/utils/hyperloglog/hll-gnuplot-graph.rb b/utils/hyperloglog/hll-gnuplot-graph.rb
index 745baddcf..6c7596d17 100644
--- a/utils/hyperloglog/hll-gnuplot-graph.rb
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@@ -30,7 +30,7 @@ def run_experiment(r,seed,max,step)
elements << ele
i += 1
}
- r.pfadd('hll',*elements)
+ r.pfadd('hll',elements)
approx = r.pfcount('hll')
err = approx-i
rel_err = 100.to_f*err/i
diff --git a/utils/install_server.sh b/utils/install_server.sh
index 98e047e3d..7eb341417 100755
--- a/utils/install_server.sh
+++ b/utils/install_server.sh
@@ -25,9 +25,25 @@
#
################################################################################
#
-# Interactive service installer for redis server
-# this generates a redis config file and an /etc/init.d script, and installs them
-# this scripts should be run as root
+# Service installer for redis server, runs interactively by default.
+#
+# To run this script non-interactively (for automation/provisioning purposes),
+# feed the variables into the script. Any missing variables will be prompted!
+# Tip: Environment variables also support command substitution (see REDIS_EXECUTABLE)
+#
+# Example:
+#
+# sudo REDIS_PORT=1234 \
+# REDIS_CONFIG_FILE=/etc/redis/1234.conf \
+# REDIS_LOG_FILE=/var/log/redis_1234.log \
+# REDIS_DATA_DIR=/var/lib/redis/1234 \
+# REDIS_EXECUTABLE=`command -v redis-server` ./utils/install_server.sh
+#
+# This generates a redis config file and an /etc/init.d script, and installs them.
+#
+# /!\ This script should be run as root
+#
+################################################################################
die () {
echo "ERROR: $1. Aborting!"
@@ -42,6 +58,7 @@ SCRIPTPATH=$(dirname $SCRIPT)
#Initial defaults
_REDIS_PORT=6379
+_MANUAL_EXECUTION=false
echo "Welcome to the redis service installer"
echo "This script will help you easily set up a running redis server"
@@ -53,47 +70,61 @@ if [ "$(id -u)" -ne 0 ] ; then
exit 1
fi
-#Read the redis port
-read -p "Please select the redis port for this instance: [$_REDIS_PORT] " REDIS_PORT
if ! echo $REDIS_PORT | egrep -q '^[0-9]+$' ; then
- echo "Selecting default: $_REDIS_PORT"
- REDIS_PORT=$_REDIS_PORT
+ _MANUAL_EXECUTION=true
+ #Read the redis port
+ read -p "Please select the redis port for this instance: [$_REDIS_PORT] " REDIS_PORT
+ if ! echo $REDIS_PORT | egrep -q '^[0-9]+$' ; then
+ echo "Selecting default: $_REDIS_PORT"
+ REDIS_PORT=$_REDIS_PORT
+ fi
fi
-#read the redis config file
-_REDIS_CONFIG_FILE="/etc/redis/$REDIS_PORT.conf"
-read -p "Please select the redis config file name [$_REDIS_CONFIG_FILE] " REDIS_CONFIG_FILE
if [ -z "$REDIS_CONFIG_FILE" ] ; then
- REDIS_CONFIG_FILE=$_REDIS_CONFIG_FILE
- echo "Selected default - $REDIS_CONFIG_FILE"
+ _MANUAL_EXECUTION=true
+ #read the redis config file
+ _REDIS_CONFIG_FILE="/etc/redis/$REDIS_PORT.conf"
+ read -p "Please select the redis config file name [$_REDIS_CONFIG_FILE] " REDIS_CONFIG_FILE
+ if [ -z "$REDIS_CONFIG_FILE" ] ; then
+ REDIS_CONFIG_FILE=$_REDIS_CONFIG_FILE
+ echo "Selected default - $REDIS_CONFIG_FILE"
+ fi
fi
-#read the redis log file path
-_REDIS_LOG_FILE="/var/log/redis_$REDIS_PORT.log"
-read -p "Please select the redis log file name [$_REDIS_LOG_FILE] " REDIS_LOG_FILE
if [ -z "$REDIS_LOG_FILE" ] ; then
- REDIS_LOG_FILE=$_REDIS_LOG_FILE
- echo "Selected default - $REDIS_LOG_FILE"
+ _MANUAL_EXECUTION=true
+ #read the redis log file path
+ _REDIS_LOG_FILE="/var/log/redis_$REDIS_PORT.log"
+ read -p "Please select the redis log file name [$_REDIS_LOG_FILE] " REDIS_LOG_FILE
+ if [ -z "$REDIS_LOG_FILE" ] ; then
+ REDIS_LOG_FILE=$_REDIS_LOG_FILE
+ echo "Selected default - $REDIS_LOG_FILE"
+ fi
fi
-
-#get the redis data directory
-_REDIS_DATA_DIR="/var/lib/redis/$REDIS_PORT"
-read -p "Please select the data directory for this instance [$_REDIS_DATA_DIR] " REDIS_DATA_DIR
if [ -z "$REDIS_DATA_DIR" ] ; then
- REDIS_DATA_DIR=$_REDIS_DATA_DIR
- echo "Selected default - $REDIS_DATA_DIR"
+ _MANUAL_EXECUTION=true
+ #get the redis data directory
+ _REDIS_DATA_DIR="/var/lib/redis/$REDIS_PORT"
+ read -p "Please select the data directory for this instance [$_REDIS_DATA_DIR] " REDIS_DATA_DIR
+ if [ -z "$REDIS_DATA_DIR" ] ; then
+ REDIS_DATA_DIR=$_REDIS_DATA_DIR
+ echo "Selected default - $REDIS_DATA_DIR"
+ fi
fi
-#get the redis executable path
-_REDIS_EXECUTABLE=`command -v redis-server`
-read -p "Please select the redis executable path [$_REDIS_EXECUTABLE] " REDIS_EXECUTABLE
if [ ! -x "$REDIS_EXECUTABLE" ] ; then
- REDIS_EXECUTABLE=$_REDIS_EXECUTABLE
-
+ _MANUAL_EXECUTION=true
+ #get the redis executable path
+ _REDIS_EXECUTABLE=`command -v redis-server`
+ read -p "Please select the redis executable path [$_REDIS_EXECUTABLE] " REDIS_EXECUTABLE
if [ ! -x "$REDIS_EXECUTABLE" ] ; then
- echo "Mmmmm... it seems like you don't have a redis executable. Did you run make install yet?"
- exit 1
+ REDIS_EXECUTABLE=$_REDIS_EXECUTABLE
+
+ if [ ! -x "$REDIS_EXECUTABLE" ] ; then
+ echo "Mmmmm... it seems like you don't have a redis executable. Did you run make install yet?"
+ exit 1
+ fi
fi
fi
@@ -112,7 +143,9 @@ echo "Data dir : $REDIS_DATA_DIR"
echo "Executable : $REDIS_EXECUTABLE"
echo "Cli Executable : $CLI_EXEC"
-read -p "Is this ok? Then press ENTER to go on or Ctrl-C to abort." _UNUSED_
+if $_MANUAL_EXECUTION == true ; then
+ read -p "Is this ok? Then press ENTER to go on or Ctrl-C to abort." _UNUSED_
+fi
mkdir -p `dirname "$REDIS_CONFIG_FILE"` || die "Could not create redis config directory"
mkdir -p `dirname "$REDIS_LOG_FILE"` || die "Could not create redis log dir"
@@ -135,13 +168,13 @@ fi
echo "## Generated by install_server.sh ##" > $TMP_FILE
read -r SED_EXPR <<-EOF
-s#^port [0-9]{4}\$#port ${REDIS_PORT}#; \
-s#^logfile .+\$#logfile ${REDIS_LOG_FILE}#; \
-s#^dir .+\$#dir ${REDIS_DATA_DIR}#; \
-s#^pidfile .+\$#pidfile ${PIDFILE}#; \
-s#^daemonize no\$#daemonize yes#;
+s#^port .\+#port ${REDIS_PORT}#; \
+s#^logfile .\+#logfile ${REDIS_LOG_FILE}#; \
+s#^dir .\+#dir ${REDIS_DATA_DIR}#; \
+s#^pidfile .\+#pidfile ${PIDFILE}#; \
+s#^daemonize no#daemonize yes#;
EOF
-sed -r "$SED_EXPR" $DEFAULT_CONFIG >> $TMP_FILE
+sed "$SED_EXPR" $DEFAULT_CONFIG >> $TMP_FILE
#cat $TPL_FILE | while read line; do eval "echo \"$line\"" >> $TMP_FILE; done
cp $TMP_FILE $REDIS_CONFIG_FILE || die "Could not write redis config file $REDIS_CONFIG_FILE"
diff --git a/utils/lru/README b/utils/lru/README
index 288189e3e..f043b2979 100644
--- a/utils/lru/README
+++ b/utils/lru/README
@@ -3,11 +3,17 @@ Redis approximated LRU algorithm against the theoretical output of true
LRU algorithm.
In order to use the program you need to recompile Redis setting the define
-REDIS_LRU_CLOCK_RESOLUTION to 1, by editing redis.h.
+REDIS_LRU_CLOCK_RESOLUTION to 1, by editing the file server.h.
This allows to execute the program in a fast way since the 1 ms resolution
is enough for all the objects to have a different enough time stamp during
the test.
The program is executed like this:
- ruby test-lru.rb > /tmp/lru.html
+ ruby test-lru.rb /tmp/lru.html
+
+You can optionally specify a number of times to run, so that the program
+will output averages of different runs, by adding an additional argument.
+For instance in order to run the test 10 times use:
+
+ ruby test-lru.rb /tmp/lru.html 10
diff --git a/utils/lru/lfu-simulation.c b/utils/lru/lfu-simulation.c
new file mode 100644
index 000000000..6aa5911ac
--- /dev/null
+++ b/utils/lru/lfu-simulation.c
@@ -0,0 +1,158 @@
+#include <stdio.h>
+#include <time.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+int decr_every = 1;
+int keyspace_size = 1000000;
+time_t switch_after = 30; /* Switch access pattern after N seconds. */
+
+struct entry {
+ /* Field that the LFU Redis implementation will have (we have
+ * 24 bits of total space in the object->lru field). */
+ uint8_t counter; /* Logarithmic counter. */
+ uint16_t decrtime; /* (Reduced precision) time of last decrement. */
+
+ /* Fields only useful for visualization. */
+ uint64_t hits; /* Number of real accesses. */
+ time_t ctime; /* Key creation time. */
+};
+
+#define to_16bit_minutes(x) ((x/60) & 65535)
+#define COUNTER_INIT_VAL 5
+
+/* Compute the difference in minutes between two 16 bit minutes times
+ * obtained with to_16bit_minutes(). Since they can wrap around if
+ * we detect the overflow we account for it as if the counter wrapped
+ * a single time. */
+uint16_t minutes_diff(uint16_t now, uint16_t prev) {
+ if (now >= prev) return now-prev;
+ return 65535-prev+now;
+}
+
+/* Increment a couter logaritmically: the greatest is its value, the
+ * less likely is that the counter is really incremented.
+ * The maximum value of the counter is saturated at 255. */
+uint8_t log_incr(uint8_t counter) {
+ if (counter == 255) return counter;
+ double r = (double)rand()/RAND_MAX;
+ double baseval = counter-COUNTER_INIT_VAL;
+ if (baseval < 0) baseval = 0;
+ double limit = 1.0/(baseval*10+1);
+ if (r < limit) counter++;
+ return counter;
+}
+
+/* Simulate an access to an entry. */
+void access_entry(struct entry *e) {
+ e->counter = log_incr(e->counter);
+ e->hits++;
+}
+
+/* Return the entry LFU value and as a side effect decrement the
+ * entry value if the decrement time was reached. */
+uint8_t scan_entry(struct entry *e) {
+ if (minutes_diff(to_16bit_minutes(time(NULL)),e->decrtime)
+ >= decr_every)
+ {
+ if (e->counter) {
+ if (e->counter > COUNTER_INIT_VAL*2) {
+ e->counter /= 2;
+ } else {
+ e->counter--;
+ }
+ }
+ e->decrtime = to_16bit_minutes(time(NULL));
+ }
+ return e->counter;
+}
+
+/* Print the entry info. */
+void show_entry(long pos, struct entry *e) {
+ char *tag = "normal ";
+
+ if (pos >= 10 && pos <= 14) tag = "new no access";
+ if (pos >= 15 && pos <= 19) tag = "new accessed ";
+ if (pos >= keyspace_size -5) tag= "old no access";
+
+ printf("%ld] <%s> frequency:%d decrtime:%d [%lu hits | age:%ld sec]\n",
+ pos, tag, e->counter, e->decrtime, (unsigned long)e->hits,
+ time(NULL) - e->ctime);
+}
+
+int main(void) {
+ time_t start = time(NULL);
+ time_t new_entry_time = start;
+ time_t display_time = start;
+ struct entry *entries = malloc(sizeof(*entries)*keyspace_size);
+ long j;
+
+ /* Initialize. */
+ for (j = 0; j < keyspace_size; j++) {
+ entries[j].counter = COUNTER_INIT_VAL;
+ entries[j].decrtime = to_16bit_minutes(start);
+ entries[j].hits = 0;
+ entries[j].ctime = time(NULL);
+ }
+
+ while(1) {
+ time_t now = time(NULL);
+ long idx;
+
+ /* Scan N random entries (simulates the eviction under maxmemory). */
+ for (j = 0; j < 3; j++) {
+ scan_entry(entries+(rand()%keyspace_size));
+ }
+
+ /* Access a random entry: use a power-law access pattern up to
+ * 'switch_after' seconds. Then revert to flat access pattern. */
+ if (now-start < switch_after) {
+ /* Power law. */
+ idx = 1;
+ while((rand() % 21) != 0 && idx < keyspace_size) idx *= 2;
+ if (idx > keyspace_size) idx = keyspace_size;
+ idx = rand() % idx;
+ } else {
+ /* Flat. */
+ idx = rand() % keyspace_size;
+ }
+
+ /* Never access entries between position 10 and 14, so that
+ * we simulate what happens to new entries that are never
+ * accessed VS new entries which are accessed in positions
+ * 15-19.
+ *
+ * Also never access last 5 entry, so that we have keys which
+ * are never recreated (old), and never accessed. */
+ if ((idx < 10 || idx > 14) && (idx < keyspace_size-5))
+ access_entry(entries+idx);
+
+ /* Simulate the addition of new entries at positions between
+ * 10 and 19, a random one every 10 seconds. */
+ if (new_entry_time <= now) {
+ idx = 10+(rand()%10);
+ entries[idx].counter = COUNTER_INIT_VAL;
+ entries[idx].decrtime = to_16bit_minutes(time(NULL));
+ entries[idx].hits = 0;
+ entries[idx].ctime = time(NULL);
+ new_entry_time = now+10;
+ }
+
+ /* Show the first 20 entries and the last 20 entries. */
+ if (display_time != now) {
+ printf("=============================\n");
+ printf("Current minutes time: %d\n", (int)to_16bit_minutes(now));
+ printf("Access method: %s\n",
+ (now-start < switch_after) ? "power-law" : "flat");
+
+ for (j = 0; j < 20; j++)
+ show_entry(j,entries+j);
+
+ for (j = keyspace_size-20; j < keyspace_size; j++)
+ show_entry(j,entries+j);
+ display_time = now;
+ }
+ }
+ return 0;
+}
+
diff --git a/utils/lru/test-lru.rb b/utils/lru/test-lru.rb
index ee0527ef4..d511e206f 100644
--- a/utils/lru/test-lru.rb
+++ b/utils/lru/test-lru.rb
@@ -1,112 +1,223 @@
require 'rubygems'
require 'redis'
-r = Redis.new
-r.config("SET","maxmemory","2000000")
-r.config("SET","maxmemory-policy","allkeys-lru")
-r.config("SET","maxmemory-samples",5)
-r.config("RESETSTAT")
-r.flushall
-
-puts <<EOF
-<html>
-<body>
-<style>
-.box {
- width:5px;
- height:5px;
- float:left;
- margin: 1px;
-}
-
-.old {
- border: 1px black solid;
-}
-
-.new {
- border: 1px green solid;
-}
-
-.ex {
- background-color: #666;
-}
-</style>
-<pre>
+$runs = []; # Remember the error rate of each run for average purposes.
+$o = {}; # Options set parsing arguments
+
+def testit(filename)
+ r = Redis.new
+ r.config("SET","maxmemory","2000000")
+ if $o[:ttl]
+ r.config("SET","maxmemory-policy","volatile-ttl")
+ else
+ r.config("SET","maxmemory-policy","allkeys-lru")
+ end
+ r.config("SET","maxmemory-samples",5)
+ r.config("RESETSTAT")
+ r.flushall
+
+ html = ""
+ html << <<EOF
+ <html>
+ <body>
+ <style>
+ .box {
+ width:5px;
+ height:5px;
+ float:left;
+ margin: 1px;
+ }
+
+ .old {
+ border: 1px black solid;
+ }
+
+ .new {
+ border: 1px green solid;
+ }
+
+ .otherdb {
+ border: 1px red solid;
+ }
+
+ .ex {
+ background-color: #666;
+ }
+ </style>
+ <pre>
EOF
-# Fill
-oldsize = r.dbsize
-id = 0
-while true
- id += 1
- r.set(id,"foo")
- newsize = r.dbsize
- break if newsize == oldsize
- oldsize = newsize
-end
+ # Fill the DB up to the first eviction.
+ oldsize = r.dbsize
+ id = 0
+ while true
+ id += 1
+ begin
+ r.set(id,"foo")
+ rescue
+ break
+ end
+ newsize = r.dbsize
+ break if newsize == oldsize # A key was evicted? Stop.
+ oldsize = newsize
+ end
-inserted = r.dbsize
-first_set_max_id = id
-puts "#{r.dbsize} keys inserted"
+ inserted = r.dbsize
+ first_set_max_id = id
+ html << "#{r.dbsize} keys inserted.\n"
-# Access keys sequentially
+ # Access keys sequentially, so that in theory the first part will be expired
+ # and the latter part will not, according to perfect LRU.
-puts "Access keys sequentially"
-(1..first_set_max_id).each{|id|
- r.get(id)
-# sleep 0.001
-}
+ if $o[:ttl]
+ STDERR.puts "Set increasing expire value"
+ (1..first_set_max_id).each{|id|
+ r.expire(id,1000+id)
+ STDERR.print(".") if (id % 150) == 0
+ }
+ else
+ STDERR.puts "Access keys sequentially"
+ (1..first_set_max_id).each{|id|
+ r.get(id)
+ sleep 0.001
+ STDERR.print(".") if (id % 150) == 0
+ }
+ end
+ STDERR.puts
+
+ # Insert more 50% keys. We expect that the new keys will rarely be expired
+ # since their last access time is recent compared to the others.
+ #
+ # Note that we insert the first 100 keys of the new set into DB1 instead
+ # of DB0, so that we can try how cross-DB eviction works.
+ half = inserted/2
+ html << "Insert enough keys to evict half the keys we inserted.\n"
+ add = 0
+
+ otherdb_start_idx = id+1
+ otherdb_end_idx = id+100
+ while true
+ add += 1
+ id += 1
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ r.set(id,"foo")
+ r.select(0)
+ else
+ r.set(id,"foo")
+ end
+ break if r.info['evicted_keys'].to_i >= half
+ end
+
+ html << "#{add} additional keys added.\n"
+ html << "#{r.dbsize} keys in DB.\n"
+
+ # Check if evicted keys respect LRU
+ # We consider errors from 1 to N progressively more serious as they violate
+ # more the access pattern.
+
+ errors = 0
+ e = 1
+ error_per_key = 100000.0/first_set_max_id
+ half_set_size = first_set_max_id/2
+ maxerr = 0
+ (1..(first_set_max_id/2)).each{|id|
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ exists = r.exists(id)
+ r.select(0)
+ else
+ exists = r.exists(id)
+ end
+ if id < first_set_max_id/2
+ thiserr = error_per_key * ((half_set_size-id).to_f/half_set_size)
+ maxerr += thiserr
+ errors += thiserr if exists
+ elsif id >= first_set_max_id/2
+ thiserr = error_per_key * ((id-half_set_size).to_f/half_set_size)
+ maxerr += thiserr
+ errors += thiserr if !exists
+ end
+ }
+ errors = errors*100/maxerr
+
+ STDERR.puts "Test finished with #{errors}% error! Generating HTML on stdout."
+
+ html << "#{errors}% error!\n"
+ html << "</pre>"
+ $runs << errors
+
+ # Generate the graphical representation
+ (1..id).each{|id|
+ # Mark first set and added items in a different way.
+ c = "box"
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ c << " otherdb"
+ elsif id <= first_set_max_id
+ c << " old"
+ else
+ c << " new"
+ end
+
+ # Add class if exists
+ if id >= otherdb_start_idx && id <= otherdb_end_idx
+ r.select(1)
+ exists = r.exists(id)
+ r.select(0)
+ else
+ exists = r.exists(id)
+ end
+
+ c << " ex" if exists
+ html << "<div title=\"#{id}\" class=\"#{c}\"></div>"
+ }
+
+ # Close HTML page
+
+ html << <<EOF
+ </body>
+ </html>
+EOF
-# Insert more 50% keys. We expect that the new keys
-half = inserted/2
-puts "Insert enough keys to evict half the keys we inserted"
-add = 0
-while true
- add += 1
- id += 1
- r.set(id,"foo")
- break if r.info['evicted_keys'].to_i >= half
+ f = File.open(filename,"w")
+ f.write(html)
+ f.close
end
-puts "#{add} additional keys added."
-puts "#{r.dbsize} keys in DB"
-
-# Check if evicted keys respect LRU
-# We consider errors from 1 to N progressively more serious as they violate
-# more the access pattern.
-
-errors = 0
-e = 1
-edecr = 1.0/(first_set_max_id/2)
-(1..(first_set_max_id/2)).each{|id|
- e -= edecr if e > 0
- e = 0 if e < 0
- if r.exists(id)
- errors += e
- end
-}
+def print_avg
+ avg = ($runs.reduce {|a,b| a+b}) / $runs.length
+ puts "#{$runs.length} runs, AVG is #{avg}"
+end
-puts "#{errors} errors!"
-puts "</pre>"
+if ARGV.length < 1
+ STDERR.puts "Usage: ruby test-lru.rb <html-output-filename> [--runs <count>] [--ttl]"
+ STDERR.puts "Options:"
+ STDERR.puts " --runs <count> Execute the test <count> times."
+ STDERR.puts " --ttl Set keys with increasing TTL values"
+ STDERR.puts " (starting from 1000 seconds) in order to"
+ STDERR.puts " test the volatile-lru policy."
+ exit 1
+end
-# Generate the graphical representation
-(1..id).each{|id|
- # Mark first set and added items in a different way.
- c = "box"
- if id <= first_set_max_id
- c << " old"
+filename = ARGV[0]
+$o[:numruns] = 1
+
+# Options parsing
+i = 1
+while i < ARGV.length
+ if ARGV[i] == '--runs'
+ $o[:numruns] = ARGV[i+1].to_i
+ i+= 1
+ elsif ARGV[i] == '--ttl'
+ $o[:ttl] = true
else
- c << " new"
+ STDERR.puts "Unknown option #{ARGV[i]}"
+ exit 1
end
+ i+= 1
+end
- # Add class if exists
- c << " ex" if r.exists(id)
- puts "<div class=\"#{c}\"></div>"
+$o[:numruns].times {
+ testit(filename)
+ print_avg if $o[:numruns] != 1
}
-
-# Close HTML page
-
-puts <<EOF
-</body>
-</html>
-EOF
diff --git a/utils/redis_init_script b/utils/redis_init_script
index 4dfe98047..006db87e5 100755
--- a/utils/redis_init_script
+++ b/utils/redis_init_script
@@ -3,6 +3,14 @@
# Simple Redis init.d script conceived to work on Linux systems
# as it does use of the /proc filesystem.
+### BEGIN INIT INFO
+# Provides: redis_6379
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: Redis data structure server
+# Description: Redis data structure server. See https://redis.io
+### END INIT INFO
+
REDISPORT=6379
EXEC=/usr/local/bin/redis-server
CLIEXEC=/usr/local/bin/redis-cli
diff --git a/utils/releasetools/00_test_release.sh b/utils/releasetools/03_test_release.sh
index 8fb6e9501..3dfdcd6a3 100755
--- a/utils/releasetools/00_test_release.sh
+++ b/utils/releasetools/03_test_release.sh
@@ -13,6 +13,8 @@ ssh antirez@metal "export TERM=xterm;
cd /tmp;
rm -rf test_release_tmp_dir;
cd test_release_tmp_dir;
+ rm -f $TARNAME;
+ rm -rf redis-${TAG};
wget $DOWNLOADURL;
tar xvzf $TARNAME;
cd redis-${TAG};
diff --git a/utils/releasetools/03_release_hash.sh b/utils/releasetools/04_release_hash.sh
index df082149a..9d5c6ad4b 100755
--- a/utils/releasetools/03_release_hash.sh
+++ b/utils/releasetools/04_release_hash.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-SHA=$(curl -s http://download.redis.io/releases/redis-${1}.tar.gz | shasum | cut -f 1 -d' ')
-ENTRY="hash redis-${1}.tar.gz sha1 $SHA http://download.redis.io/releases/redis-${1}.tar.gz"
+SHA=$(curl -s http://download.redis.io/releases/redis-${1}.tar.gz | shasum -a 256 | cut -f 1 -d' ')
+ENTRY="hash redis-${1}.tar.gz sha256 $SHA http://download.redis.io/releases/redis-${1}.tar.gz"
echo $ENTRY >> ~/hack/redis-hashes/README
vi ~/hack/redis-hashes/README
echo "Press any key to commit, Ctrl-C to abort)."
diff --git a/utils/releasetools/changelog.tcl b/utils/releasetools/changelog.tcl
new file mode 100755
index 000000000..9b3a2cddc
--- /dev/null
+++ b/utils/releasetools/changelog.tcl
@@ -0,0 +1,30 @@
+#!/usr/bin/env tclsh
+
+if {[llength $::argv] != 2} {
+ puts "Usage: $::argv0 <branch> <version>"
+ exit 1
+}
+
+set branch [lindex $::argv 0]
+set ver [lindex $::argv 1]
+
+set template {
+================================================================================
+Redis %ver% Released %date%
+================================================================================
+
+Upgrade urgency <URGENCY>: <DESCRIPTION>
+}
+
+set template [string trim $template]
+append template "\n\n"
+set date [clock format [clock seconds]]
+set template [string map [list %ver% $ver %date% $date] $template]
+
+append template [exec git log $branch~100..$branch "--format=format:%an in commit %h:%n %s" --shortstat]
+
+#Older, more verbose version.
+#
+#append template [exec git log $branch~30..$branch "--format=format:+-------------------------------------------------------------------------------%n| %s%n| By %an, %ai%n+--------------------------------------------------------------------------------%nhttps://github.com/antirez/redis/commit/%H%n%n%b" --stat]
+
+puts $template